├── .clang-format
├── .github
    └── workflows
    │   └── run_tests.yml
├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cmake
    └── CPM.cmake
├── include
    └── math_approx
    │   ├── math_approx.hpp
    │   └── src
    │       ├── basic_math.hpp
    │       ├── hyperbolic_trig_approx.hpp
    │       ├── inverse_hyperbolic_trig_approx.hpp
    │       ├── inverse_trig_approx.hpp
    │       ├── log_approx.hpp
    │       ├── polylogarithm_approx.hpp
    │       ├── pow_approx.hpp
    │       ├── sigmoid_approx.hpp
    │       ├── trig_approx.hpp
    │       └── wright_omega_approx.hpp
├── test
    ├── CMakeLists.txt
    └── src
    │   ├── hyperbolic_trig_approx_test.cpp
    │   ├── inverse_hyperbolic_trig_approx_test.cpp
    │   ├── inverse_trig_approx_test.cpp
    │   ├── log_approx_test.cpp
    │   ├── polylog_approx_test.cpp
    │   ├── pow_approx_test.cpp
    │   ├── reference
    │       ├── polylogarithm.hpp
    │       ├── sincospi.hpp
    │       └── toms917.hpp
    │   ├── sigmoid_approx_test.cpp
    │   ├── test_helpers.hpp
    │   ├── trig_approx_test.cpp
    │   ├── trig_turns_approx_test.cpp
    │   └── wright_omega_approx_test.cpp
└── tools
    ├── CMakeLists.txt
    ├── bench
        ├── CMakeLists.txt
        ├── hyperbolic_trig_bench.cpp
        ├── inverse_hyperbolic_trig_bench.cpp
        ├── inverse_trig_bench.cpp
        ├── log_bench.cpp
        ├── polylog_bench.cpp
        ├── pow_bench.cpp
        ├── sigmoid_bench.cpp
        ├── trig_bench.cpp
        ├── trig_turns_bench.cpp
        └── wright_omega_bench.cpp
    └── plotter
        ├── CMakeLists.txt
        └── plotter.cpp


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | AccessModifierOffset: -4
 3 | AlignAfterOpenBracket: Align
 4 | AlignConsecutiveAssignments: false
 5 | AlignConsecutiveDeclarations: false
 6 | AlignEscapedNewlines: Left
 7 | AlignOperands: Align
 8 | AlignTrailingComments: false
 9 | AllowAllParametersOfDeclarationOnNextLine: false
10 | AllowShortBlocksOnASingleLine: Never
11 | AllowShortCaseLabelsOnASingleLine: false
12 | AllowShortFunctionsOnASingleLine: All
13 | AllowShortIfStatementsOnASingleLine: Never
14 | AllowShortLoopsOnASingleLine: false
15 | AlwaysBreakAfterDefinitionReturnType: None
16 | AlwaysBreakAfterReturnType: None
17 | AlwaysBreakBeforeMultilineStrings: false
18 | AlwaysBreakTemplateDeclarations: Yes
19 | BinPackArguments: false
20 | BinPackParameters: false
21 | BreakAfterJavaFieldAnnotations: false
22 | BreakBeforeBinaryOperators: NonAssignment
23 | BreakBeforeBraces: Allman
24 | BreakBeforeTernaryOperators: true
25 | BreakConstructorInitializersBeforeComma: false
26 | BreakStringLiterals: false
27 | ColumnLimit: 0
28 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
29 | ConstructorInitializerIndentWidth: 4
30 | ContinuationIndentWidth: 4
31 | Cpp11BracedListStyle: false
32 | DerivePointerAlignment: false
33 | DisableFormat: false
34 | ExperimentalAutoDetectBinPacking: false
35 | ForEachMacros: ['forEachXmlChildElement']
36 | IndentCaseLabels: true
37 | IndentWidth: 4
38 | IndentWrappedFunctionNames: true
39 | KeepEmptyLinesAtTheStartOfBlocks: false
40 | Language: Cpp
41 | MaxEmptyLinesToKeep: 1
42 | NamespaceIndentation: Inner
43 | PointerAlignment: Left
44 | ReflowComments: false
45 | SortIncludes: true
46 | SpaceAfterCStyleCast: true
47 | SpaceAfterLogicalNot: true
48 | SpaceBeforeAssignmentOperators: true
49 | SpaceBeforeCpp11BracedList: true
50 | SpaceBeforeParens: NonEmptyParentheses
51 | SpaceInEmptyParentheses: false
52 | SpaceBeforeInheritanceColon: true
53 | SpacesInAngles: false
54 | SpacesInCStyleCastParentheses: false
55 | SpacesInContainerLiterals: true
56 | SpacesInParentheses: false
57 | SpacesInSquareBrackets: false
58 | Standard: "c++17"
59 | TabWidth: 4
60 | UseTab: Never
61 | ---
62 | Language: ObjC
63 | BasedOnStyle: Chromium
64 | AlignTrailingComments: true
65 | BreakBeforeBraces: Allman
66 | ColumnLimit: 0
67 | IndentWidth: 4
68 | KeepEmptyLinesAtTheStartOfBlocks: false
69 | ObjCSpaceAfterProperty: true
70 | ObjCSpaceBeforeProtocolList: true
71 | PointerAlignment: Left
72 | SpacesBeforeTrailingComments: 1
73 | TabWidth: 4
74 | UseTab: Never
75 | ...
76 | 


--------------------------------------------------------------------------------
/.github/workflows/run_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 |   workflow_dispatch:
12 | 
13 | 
14 | jobs:
15 |   build_and_test:
16 |     name: "Run tests on ${{ matrix.os }}"
17 |     runs-on: ${{ matrix.os }}
18 |     strategy:
19 |       fail-fast: false # show all errors for each platform (vs. cancel jobs on error)
20 |       matrix:
21 |         os: [ubuntu-latest, macos-13, windows-latest]
22 | 
23 |     steps:
24 | #      - name: Install Linux Deps
25 | #        if: runner.os == 'Linux'
26 | #        run: |
27 | #          sudo apt-get update
28 | #          sudo apt install libasound2-dev libcurl4-openssl-dev libx11-dev libxinerama-dev libxext-dev libfreetype6-dev libwebkit2gtk-4.0-dev libglu1-mesa-dev libsamplerate-dev
29 | 
30 |       - name: Get latest CMake
31 |         uses: lukka/get-cmake@latest
32 | 
33 |       - name: Checkout code
34 |         uses: actions/checkout@v2
35 | 
36 |       - name: CMake Configure
37 |         run: cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo
38 | 
39 |       - name: CMake Build
40 |         run: cmake --build build --config RelWithDebInfo --parallel
41 | 
42 |       - name: CMake Test
43 |         run: |
44 |           ctest --test-dir build -C RelWithDebInfo --show-only
45 |           ctest --test-dir build -C RelWithDebInfo -j2 --output-on-failure
46 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | .vscode/
 3 | .focus-config
 4 | *.raddbg_project
 5 | 
 6 | build*/
 7 | *.zip
 8 | 
 9 | .DS_Store
10 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.18)
 2 | project(math_approx VERSION 1.0.0 LANGUAGES CXX)
 3 | 
 4 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake/)
 5 | 
 6 | if(PROJECT_IS_TOP_LEVEL)
 7 |     include(CPM)
 8 |     CPMAddPackage(
 9 |         NAME xsimd
10 |         GIT_REPOSITORY https://github.com/xtensor-stack/xsimd
11 |         GIT_TAG master
12 |     )
13 | endif()
14 | 
15 | add_library(math_approx INTERFACE)
16 | target_include_directories(math_approx INTERFACE include)
17 | if(MSVC)
18 |     target_compile_definitions(math_approx INTERFACE _USE_MATH_DEFINES=1)
19 |     if(CMAKE_CXX_COMPILER_ID STREQUAL Clang)
20 |         target_compile_options(math_approx INTERFACE -msse4.2)
21 |     else()
22 |         target_compile_options(math_approx INTERFACE /arch:SSE4.2)
23 |     endif()
24 | else()
25 |     target_compile_options(math_approx INTERFACE -msse4.2 -Wno-unused-command-line-argument)
26 | endif()
27 | 
28 | if (TARGET xsimd)
29 |     message(STATUS "math_approx -- Linking with XSIMD...")
30 |     target_link_libraries(math_approx INTERFACE xsimd)
31 |     target_compile_definitions(math_approx INTERFACE MATH_APPROX_XSIMD_TARGET=1)
32 | endif()
33 | 
34 | if(PROJECT_IS_TOP_LEVEL)
35 |     include(CTest)
36 |     add_subdirectory(test)
37 |     add_subdirectory(tools)
38 | endif()
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, jatinchowdhury18
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # math_approx
  2 | 
  3 | [![Test](https://github.com/Chowdhury-DSP/math_approx/actions/workflows/run_tests.yml/badge.svg)](https://github.com/Chowdhury-DSP/math_approx/actions/workflows/run_tests.yml)
  4 | 
  5 | 
  6 | `math_approx` is a C++ library for math approximations.
  7 | 
  8 | Currently supported:
  9 | 
 10 | - sin/cos/tan
 11 | - sin(2*pi*x)/cos(2*pi*x)
 12 | - arcsin/arccos/arctan
 13 | - exp/exp2/exp10/expm1
 14 | - log/log2/log10/log1p
 15 | - sinh/cosh/tanh
 16 | - arcsinh/arccosh/arctanh
 17 | - [Sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function)
 18 | - [Wright-Omega function](https://en.wikipedia.org/wiki/Wright_omega_function)
 19 | - [Dilogarithm function](https://en.wikipedia.org/wiki/Dilogarithm)
 20 | 
 21 | At the moment, most of these implementations have been "good enough"
 22 | for my own use cases (both in terms of performance and accuracy). That
 23 | said, I definitely believe that it's possible to achieve better results
 24 | for many of these functions. If you have ideas for improving these
 25 | approximations, either by:
 26 | - Modifying an approximation to achieve better accuracy with the same (or similar) performance
 27 | - Modifying an approximation to achieve better performance with the same (or similar) accuracy
 28 | 
 29 | then please get in touch with a GitHub issue or pull request!
 30 | 
 31 | ## Usage
 32 | 
 33 | ### With CMake
 34 | 
 35 | `math_approx` is set up as a CMake `INTERFACE` library. To use it as
 36 | such, you'll need to add the following to your `CMakeLists.txt` file:
 37 | 
 38 | ```cmake
 39 | add_subdirectory(math_approx)
 40 | target_link_libraries(<your_target> math_approx)
 41 | ```
 42 | 
 43 | And then in your C++ code, you can use the approximations like so:
 44 | 
 45 | ```cpp
 46 | #include <math_approx/math_approx.hpp>
 47 | 
 48 | constexpr auto sin_half = math_approx::sin<5> (0.5f);
 49 | ```
 50 | 
 51 | ### Without CMake
 52 | 
 53 | To use `math_approx` without CMake, you'll need to add
 54 | `/path/to/repo/include` to your include path. If you're
 55 | compiling your program with MSVC, you may also need to
 56 | add the pre-processor definition `_USE_MATH_DEFINES`.
 57 | 
 58 | ## A few other thoughts
 59 | 
 60 | ### Accuracy vs. Performance
 61 | 
 62 | Most of the methods in this library are provided with template
 63 | arguments which control the "order" of the approximation. The
 64 | "order" typically refers to the order of a polynomial used in
 65 | the approximation. In general, higher-order approximations will
 66 | achieve greater accuracy, while taking longer to compute.
 67 | 
 68 | ### Showing My Work
 69 | 
 70 | Since the approximations in this library are primarily based on
 71 | polynomial approximations, I've tried to provide the details
 72 | for how those polynomials were derived, by providing a zipped
 73 | folder containing the Mathematica notebooks that were used to
 74 | derive the polynomials. Since not everyone has access to
 75 | Mathematica, the folder also contains a PDF version of each
 76 | notebook. At the moment, I'm planning to upload an updated
 77 | copy of the zipped folder with each release of the library,
 78 | but if I can think of a better method of distribution, that
 79 | doesn't involve adding the notebook files to the repository
 80 | directly, I'll do that instead.
 81 | 
 82 | ### Measuring Accuracy
 83 | 
 84 | This library uses three approaches for measuring accuracy:
 85 | - Absolute error (`Error = |actual - approx|`)
 86 | - Relative error (`Error = |(actual - approx) / actual|`)
 87 | - [ULP Distance](https://en.wikipedia.org/wiki/Unit_in_the_last_place#:~:text=In%20computer%20science%20and%20numerical,of%20accuracy%20in%20numeric%20calculations.)
 88 | 
 89 | ### Single vs. Double Precision
 90 | 
 91 | At the moment, the approximations in this library have been
 92 | primarily tested with single-precision floating-point numbers.
 93 | It is possible (maybe even likely) that most of the approximations
 94 | do not achieve sufficient accuracy for double-precision computations.
 95 | 
 96 | ### C++ Standard
 97 | 
 98 | The library has been mostly developed and tested with C++20, with
 99 | a little bit of effort to provide compatibility with C++17.
100 | Personally, I would rather not extend support to C++14 or earlier.
101 | 
102 | ### SIMD
103 | 
104 | These approximations are intended to work for both scalar floating-point
105 | data types, as well as SIMD floating-point data types. At the moment,
106 | the library is set up to be compatible with the [XSIMD library](https://github.com/xtensor-stack/xsimd).
107 | That said, I would like to make it as easy as possible to use this
108 | library with other SIMD libraries (or matrix math libraries), so if
109 | anyone has some suggestions, please let me know!
110 | 
111 | ### Constexpr
112 | 
113 | The majority of the approximations in this library are implemented
114 | so as to be constexpr-compatible. That said, there are some
115 | approximations that are only constexpr if the compiler supports
116 | `std::bit_cast` (typically C++20 and later), and some that cannot
117 | be made constexpr because they depend on `std::sqrt`. If someone
118 | knows of any portable constexpr-compatible implementations of these
119 | methods, I would be happy to add them to the library!
120 | 
121 | ## License
122 | 
123 | `math_approx` is open source, and is licensed under the
124 | BSD 3-clause license.
125 | 
126 | Enjoy!
127 | 


--------------------------------------------------------------------------------
/cmake/CPM.cmake:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | #
 3 | # SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors
 4 | 
 5 | set(CPM_DOWNLOAD_VERSION 0.40.2)
 6 | set(CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d")
 7 | 
 8 | if(CPM_SOURCE_CACHE)
 9 |     set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
10 | elseif(DEFINED ENV{CPM_SOURCE_CACHE})
11 |     set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
12 | else()
13 |     set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
14 | endif()
15 | 
16 | # Expand relative path. This is important if the provided path contains a tilde (~)
17 | get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
18 | 
19 | file(DOWNLOAD
20 |         https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
21 |         ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
22 | )
23 | 
24 | include(${CPM_DOWNLOAD_LOCATION})
25 | 


--------------------------------------------------------------------------------
/include/math_approx/math_approx.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace math_approx
 4 | {
 5 | }
 6 | 
 7 | #include "src/basic_math.hpp"
 8 | 
 9 | #include "src/trig_approx.hpp"
10 | #include "src/inverse_trig_approx.hpp"
11 | #include "src/pow_approx.hpp"
12 | #include "src/log_approx.hpp"
13 | #include "src/hyperbolic_trig_approx.hpp"
14 | #include "src/inverse_hyperbolic_trig_approx.hpp"
15 | #include "src/sigmoid_approx.hpp"
16 | #include "src/wright_omega_approx.hpp"
17 | #include "src/polylogarithm_approx.hpp"
18 | 


--------------------------------------------------------------------------------
/include/math_approx/src/basic_math.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | // If MATH_APPROX_XSIMD_TARGET is not defined
  4 | // the user can still use XSIMD by manually including
  5 | // it before including the math_approx header.
  6 | #if MATH_APPROX_XSIMD_TARGET
  7 | #include <xsimd/xsimd.hpp>
  8 | #endif
  9 | 
 10 | #if ! defined(XSIMD_HPP)
 11 | #include <cmath>
 12 | #endif
 13 | 
 14 | #include <algorithm>
 15 | #include <bit>
 16 | 
 17 | namespace math_approx
 18 | {
 19 | template <typename T>
 20 | struct scalar_of
 21 | {
 22 |     using type = T;
 23 | };
 24 | 
 25 | /**
 26 |  * When T is a scalar floating-point type, scalar_of_t<T> is T.
 27 |  * When T is a SIMD floating-point type, scalar_of_t<T> is the corresponding scalar type.
 28 |  */
 29 | template <typename T>
 30 | using scalar_of_t = typename scalar_of<T>::type;
 31 | 
 32 | /** Inverse square root */
 33 | template <typename T>
 34 | T rsqrt (T x)
 35 | {
 36 |     // @TODO: figure out a way that we can make this method constexpr
 37 | 
 38 |     // sqrtss followed by divss... this seems to measure a bit faster than the rsqrtss plus NR iteration below
 39 |     return (T) 1 / std::sqrt (x);
 40 | 
 41 |     // fast inverse square root (using rsqrtss hardware instruction), plus one Newton-Raphson iteration
 42 |     //    auto r = xsimd::rsqrt (xsimd::broadcast (x)).get (0);
 43 |     //    x *= r;
 44 |     //    x *= r;
 45 |     //    x += -3.0f;
 46 |     //    r *= -0.5f;
 47 |     //    return x * r;
 48 | }
 49 | 
 50 | /** Function interface for the ternary operator. */
 51 | template <typename T>
 52 | T select (bool q, T t, T f)
 53 | {
 54 |     return q ? t : f;
 55 | }
 56 | 
 57 | #if defined(XSIMD_HPP)
 58 | template <typename T>
 59 | struct scalar_of<xsimd::batch<T>>
 60 | {
 61 |     using type = T;
 62 | };
 63 | 
 64 | /** Inverse square root */
 65 | template <typename T>
 66 | xsimd::batch<T> rsqrt (xsimd::batch<T> x)
 67 | {
 68 |     using S = scalar_of_t<T>;
 69 |     auto r = xsimd::rsqrt (x);
 70 |     x *= r;
 71 |     x *= r;
 72 |     x += (S) -3;
 73 |     r *= (S) -0.5;
 74 |     return x * r;
 75 | }
 76 | 
 77 | /** Function interface for the ternary operator. */
 78 | template <typename T>
 79 | xsimd::batch<T> select (xsimd::batch_bool<T> q, xsimd::batch<T> t, xsimd::batch<T> f)
 80 | {
 81 |     return xsimd::select (q, t, f);
 82 | }
 83 | #endif
 84 | 
 85 | #if ! __cpp_lib_bit_cast
 86 | // bit_cast requirement.
 87 | template <typename From, typename To>
 88 | using is_bitwise_castable = std::integral_constant<bool,
 89 |                                                    (sizeof (From) == sizeof (To)) && std::is_trivially_copyable<From>::value && std::is_trivially_copyable<To>::value>;
 90 | 
 91 | // compiler support is needed for bitwise copy with constexpr.
 92 | template <typename To, typename From>
 93 | inline typename std::enable_if<is_bitwise_castable<From, To>::value, To>::type bit_cast (const From& from) noexcept
 94 | {
 95 |     union U
 96 |     {
 97 |         U() {};
 98 |         char storage[sizeof (To)] {};
 99 |         typename std::remove_const<To>::type dest;
100 |     } u; // instead of To dest; because To doesn't require DefaultConstructible.
101 |     std::memcpy (&u.dest, &from, sizeof from);
102 |     return u.dest;
103 | }
104 | #else
105 | using std::bit_cast;
106 | #endif
107 | } // namespace math_approx
108 | 


--------------------------------------------------------------------------------
/include/math_approx/src/hyperbolic_trig_approx.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "pow_approx.hpp"
  4 | 
  5 | namespace math_approx
  6 | {
  7 | // ref: https://en.wikipedia.org/wiki/Hyperbolic_functions#Definitions
  8 | // sinh = (e^(2x) - 1) / (2e^x), cosh = (e^(2x) + 1) / (2e^x)
  9 | // let B = e^x, then sinh = (B^2 - 1) / (2B), cosh = (B^2 + 1) / (2B)
 10 | // simplifying, we get: sinh = 0.5 (B - 1/B), cosh = 0.5 (B + 1/B)
 11 | 
 12 | /** Approximation of sinh(x), using exp(x) internally */
 13 | template <int order, typename T>
 14 | constexpr T sinh (T x)
 15 | {
 16 |     using S = scalar_of_t<T>;
 17 |     auto B = exp<order> (x);
 18 |     auto Br = (S) 0.5 / B;
 19 |     B *= (S) 0.5;
 20 |     return B - Br;
 21 | }
 22 | 
 23 | /** Approximation of cosh(x), using exp(x) internally */
 24 | template <int order, typename T>
 25 | constexpr T cosh (T x)
 26 | {
 27 |     using S = scalar_of_t<T>;
 28 |     auto B = exp<order> (x);
 29 |     auto Br = (S) 0.5 / B;
 30 |     B *= (S) 0.5;
 31 |     return B + Br;
 32 | }
 33 | 
 34 | /**
 35 |  * Simultaneous pproximation of sinh(x) and cosh(x),
 36 |  * using exp(x) internally.
 37 |  *
 38 |  * For more information see the comments above.
 39 |  */
 40 | template <int order, typename T>
 41 | constexpr auto sinh_cosh (T x)
 42 | {
 43 |     using S = scalar_of_t<T>;
 44 |     auto B = exp<order> (x);
 45 |     auto Br = (S) 0.5 / B;
 46 |     B *= (S) 0.5;
 47 | 
 48 |     auto sinh = B - Br;
 49 |     auto cosh = B + Br;
 50 | 
 51 |     return std::make_pair (sinh, cosh);
 52 | }
 53 | 
 54 | namespace tanh_detail
 55 | {
 56 |     // See notebooks/tanh_approx.nb for the derivation of these polynomials
 57 | 
 58 |     template <typename T>
 59 |     constexpr T tanh_poly_11 (T x)
 60 |     {
 61 |         using S = scalar_of_t<T>;
 62 |         const auto x_sq = x * x;
 63 |         const auto y_9_11 = (S) 2.63661358122e-6 + (S) 3.33765558362e-8 * x_sq;
 64 |         const auto y_7_9_11 = (S) 0.000199027336899 + y_9_11 * x_sq;
 65 |         const auto y_5_7_9_11 = (S) 0.00833223857843 + y_7_9_11 * x_sq;
 66 |         const auto y_3_5_7_9_11 = (S) 0.166667159320 + y_5_7_9_11 * x_sq;
 67 |         const auto y_1_3_5_7_9_11 = (S) 1 + y_3_5_7_9_11 * x_sq;
 68 |         return x * y_1_3_5_7_9_11;
 69 |     }
 70 | 
 71 |     template <typename T>
 72 |     constexpr T tanh_poly_9 (T x)
 73 |     {
 74 |         using S = scalar_of_t<T>;
 75 |         const auto x_sq = x * x;
 76 |         const auto y_7_9 = (S) 0.000192218110330 + (S) 3.54808622170e-6 * x_sq;
 77 |         const auto y_5_7_9 = (S) 0.00834777254865 + y_7_9 * x_sq;
 78 |         const auto y_3_5_7_9 = (S) 0.166658873283 + y_5_7_9 * x_sq;
 79 |         const auto y_1_3_5_7_9 = (S) 1 + y_3_5_7_9 * x_sq;
 80 |         return x * y_1_3_5_7_9;
 81 |     }
 82 | 
 83 |     template <typename T>
 84 |     constexpr T tanh_poly_7 (T x)
 85 |     {
 86 |         using S = scalar_of_t<T>;
 87 |         const auto x_sq = x * x;
 88 |         const auto y_5_7 = (S) 0.00818199927912 + (S) 0.000243153287690 * x_sq;
 89 |         const auto y_3_5_7 = (S) 0.166769941467 + y_5_7 * x_sq;
 90 |         const auto y_1_3_5_7 = (S) 1 + y_3_5_7 * x_sq;
 91 |         return x * y_1_3_5_7;
 92 |     }
 93 | 
 94 |     template <typename T>
 95 |     constexpr T tanh_poly_5 (T x)
 96 |     {
 97 |         using S = scalar_of_t<T>;
 98 |         const auto x_sq = x * x;
 99 |         const auto y_3_5 = (S) 0.165326984031 + (S) 0.00970240200826 * x_sq;
100 |         const auto y_1_3_5 = (S) 1 + y_3_5 * x_sq;
101 |         return x * y_1_3_5;
102 |     }
103 | 
104 |     template <typename T>
105 |     constexpr T tanh_poly_3 (T x)
106 |     {
107 |         using S = scalar_of_t<T>;
108 |         const auto x_sq = x * x;
109 |         const auto y_1_3 = (S) 1 + (S) 0.183428244899 * x_sq;
110 |         return x * y_1_3;
111 |     }
112 | } // namespace tanh_detail
113 | 
114 | /**
115 |  * Approximation of tanh(x), using tanh(x) ≈ p(x) / (p(x)^2 + 1),
116 |  * where p(x) is an odd polynomial fit to minimize the maxinimum relative error.
117 |  */
118 | template <int order, typename T>
119 | T tanh (T x)
120 | {
121 |     static_assert (order % 2 == 1 && order <= 11 && order >= 3, "Order must e an odd number within [3, 11]");
122 | 
123 |     T x_poly {};
124 |     if constexpr (order == 11)
125 |         x_poly = tanh_detail::tanh_poly_11 (x);
126 |     else if constexpr (order == 9)
127 |         x_poly = tanh_detail::tanh_poly_9 (x);
128 |     else if constexpr (order == 7)
129 |         x_poly = tanh_detail::tanh_poly_7 (x);
130 |     else if constexpr (order == 5)
131 |         x_poly = tanh_detail::tanh_poly_5 (x);
132 |     else if constexpr (order == 3)
133 |         x_poly = tanh_detail::tanh_poly_3 (x);
134 | 
135 |     using S = scalar_of_t<T>;
136 |     return x_poly * rsqrt (x_poly * x_poly + (S) 1);
137 | }
138 | } // namespace math_approx
139 | 


--------------------------------------------------------------------------------
/include/math_approx/src/inverse_hyperbolic_trig_approx.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "basic_math.hpp"
  4 | #include "log_approx.hpp"
  5 | 
  6 | namespace math_approx
  7 | {
  8 | struct AsinhLog2Provider
  9 | {
 10 |     // for polynomial derivations, see notebooks/asinh_approx.nb
 11 | 
 12 |     /** approximation for log2(x), optimized on the range [1, 2], to be used within an asinh(x) computation */
 13 |     template <typename T, int order, bool /*C1_continuous*/>
 14 |     static constexpr T log2_approx (T x)
 15 |     {
 16 |         static_assert (order >= 3 && order <= 5);
 17 |         using S = scalar_of_t<T>;
 18 | 
 19 |         const auto x_sq = x * x;
 20 |         if constexpr (order == 3)
 21 |         {
 22 |             const auto x_2_3 = (S) -1.21535595794871 + (S) 0.194363894384581 * x;
 23 |             const auto x_0_1 = (S) -2.26452854958994 + (S) 3.28552061315407 * x;
 24 |             return x_0_1 + x_2_3 * x_sq;
 25 |         }
 26 |         else if constexpr (order == 4)
 27 |         {
 28 |             const auto x_3_4 = (S) 0.770443387059628 + (S) -0.102652345633016 * x;
 29 |             const auto x_1_2 = (S) 4.33013912645867 + (S) -2.39448588379361 * x;
 30 |             const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
 31 |             return (S) -2.60344428409168 + x_1_2_3_4 * x;
 32 |         }
 33 |         else if constexpr (order == 5)
 34 |         {
 35 |             const auto x_4_5 = (S) -0.511946284688366 + (S) 0.0578217518982235 * x;
 36 |             const auto x_2_3 = (S) -3.94632584968643 + (S) 1.90796087279737 * x;
 37 |             const auto x_0_1 = (S) -2.87748189127908 + (S) 5.36997140095829 * x;
 38 |             const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
 39 |             return x_0_1 + x_2_3_4_5 * x_sq;
 40 |         }
 41 |         else
 42 |         {
 43 |             return {};
 44 |         }
 45 |     }
 46 | };
 47 | 
 48 | /**
 49 |  * Approximation of asinh(x) in the full range, using identity
 50 |  * asinh(x) = log(x + sqrt(x^2 + 1)).
 51 |  *
 52 |  * Orders 6 and 7 use an additional Newton-Raphson iteration,
 53 |  * but for most cases the accuracy improvement is not worth
 54 |  * the additional cost (when compared to the performance and
 55 |  * accuracy achieved by the STL implementation).
 56 |  */
 57 | template <int order, typename T>
 58 | constexpr T asinh (T x)
 59 | {
 60 |     using S = scalar_of_t<T>;
 61 |     using std::abs, std::sqrt;
 62 | #if defined(XSIMD_HPP)
 63 |     using xsimd::abs, xsimd::sqrt;
 64 | #endif
 65 | 
 66 |     const auto sign = select (x > (S) 0, (T) (S) 1, select (x < (S) 0, (T) (S) -1, (T) (S) 0));
 67 |     x = abs (x);
 68 | 
 69 |     const auto log_arg = x + sqrt (x * x + (S) 1);
 70 |     auto y = log<pow_detail::BaseE<scalar_of_t<T>>, std::min (order, 5), false, AsinhLog2Provider> (log_arg);
 71 | 
 72 |     if constexpr (order > 5)
 73 |     {
 74 |         const auto exp_y = math_approx::exp<order - 1> (y);
 75 |         y -= (exp_y - log_arg) / exp_y;
 76 |     }
 77 | 
 78 |     return sign * y;
 79 | }
 80 | 
 81 | /**
 82 |  * Approximation of acosh(x) in the full range, using identity
 83 |  * acosh(x) = log(x + sqrt(x^2 - 1)).
 84 |  */
 85 | template <int order, typename T>
 86 | constexpr T acosh (T x)
 87 | {
 88 |     using S = scalar_of_t<T>;
 89 |     using std::sqrt;
 90 | #if defined(XSIMD_HPP)
 91 |     using xsimd::sqrt;
 92 | #endif
 93 | 
 94 |     const auto z1 = x + sqrt (x * x - (S) 1);
 95 |     return log<order> (z1);
 96 | }
 97 | 
 98 | /**
 99 |  * Approximation of atanh(x), using identity
100 |  * atanh(x) = (1/2) log((x + 1) / (x - 1)).
101 |  */
102 | template <int order, typename T>
103 | constexpr T atanh (T x)
104 | {
105 |     using S = scalar_of_t<T>;
106 |     return (S) 0.5 * log<order> (((S) 1 + x) / ((S) 1 - x));
107 | }
108 | } // namespace math_approx
109 | 


--------------------------------------------------------------------------------
/include/math_approx/src/inverse_trig_approx.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "basic_math.hpp"
  4 | 
  5 | namespace math_approx
  6 | {
  7 | namespace inv_trig_detail
  8 | {
  9 |     // for polynomial derivations, see notebooks/asin_acos_approx.nb
 10 | 
 11 |     template <int order, typename T>
 12 |     constexpr T asin_kernel (T x)
 13 |     {
 14 |         using S = scalar_of_t<T>;
 15 |         static_assert (order >= 1 && order <= 4);
 16 | 
 17 |         if constexpr (order == 1)
 18 |         {
 19 |             return (S) 0.16443531037029196495 + x * (S) 0.097419577664394046979;
 20 |         }
 21 |         else if constexpr (order == 2)
 22 |         {
 23 |             return (S) 0.16687742065041710759 + x * ((S) 0.070980446338571381859 + x * (S) 0.066682760821292624831);
 24 |         }
 25 |         else if constexpr (order == 3)
 26 |         {
 27 |             return (S) 0.16665080061757006624 + x * ((S) 0.075508850204912977833 + x * ((S) 0.039376231206556484843 + x * (S) 0.051275338699694958389));
 28 |         }
 29 |         else if constexpr (order == 4)
 30 |         {
 31 |             return (S) 0.16666803275183153521 + x * ((S) 0.074936964020844071266 + x * ((S) 0.045640288439217274741 + x * ((S) 0.023435504410713306478 + x * (S) 0.043323710842752508055)));
 32 |         }
 33 |         else
 34 |         {
 35 |             return {};
 36 |         }
 37 |     }
 38 | 
 39 |     template <int order, typename T>
 40 |     constexpr T acos_kernel (T x)
 41 |     {
 42 |         using S = scalar_of_t<T>;
 43 |         static_assert (order >= 1 && order <= 5);
 44 | 
 45 |         if constexpr (order == 1)
 46 |         {
 47 |             return (S) 0.061454830783555181029 + x * (S) 0.50934149601134137697;
 48 |         }
 49 |         else if constexpr (order == 2)
 50 |         {
 51 |             return (S) 0.18188825560430002537 + x * ((S) -0.092825628092384385170 + x * (S) 0.48173369928298098719);
 52 |         }
 53 |         else if constexpr (order == 3)
 54 |         {
 55 |             return (S) 0.16480511788348814473 + x * ((S) 0.11286070199090997290 + x * ((S) -0.18795205899643871450 + x * (S) 0.48108256591693704385));
 56 |         }
 57 |         else if constexpr (order == 4)
 58 |         {
 59 |             return (S) 0.16687235373875186628 + x * ((S) 0.068412956842158992310 + x * ((S) 0.11466969910945928879 + x * ((S) -0.27433862418620241774 + x * (S) 0.49517994129072917531)));
 60 |         }
 61 |         else if constexpr (order == 5)
 62 |         {
 63 |             return (S) 0.16664924406383360700 + x * ((S) 0.075837825275592588015 + x * ((S) 0.030665158374004904823 + x * ((S) 0.13572846625592635550 + x * ((S) -0.34609357317006372856 + x * (S) 0.50800920599560273061))));
 64 |         }
 65 |         else
 66 |         {
 67 |             return {};
 68 |         }
 69 |     }
 70 | 
 71 |     // for polynomial derivations, see notebooks/arctan_approx.nb
 72 | 
 73 |     template <int order, typename T>
 74 |     constexpr T atan_kernel (T x)
 75 |     {
 76 |         using S = scalar_of_t<T>;
 77 |         static_assert (order >= 4 && order <= 7);
 78 | 
 79 |         if constexpr (order == 4)
 80 |         {
 81 |             const auto x_sq = x * x;
 82 |             const auto num = x + x_sq * (S) 0.498001992540;
 83 |             const auto den = (S) 1 + x * (S) 0.481844539675 + x_sq * (S) 0.425470835319;
 84 |             return num / den;
 85 |         }
 86 |         else if constexpr (order == 5 || order == 6)
 87 |         {
 88 |             const auto x_sq = x * x;
 89 |             const auto num = (S) 0.177801521472 + x * (S) 0.116983970701;
 90 |             const auto den = (S) 1 + x * (S) 0.174763903018 + x_sq * (S) 0.473808187566;
 91 |             return (x + x_sq * num) / den;
 92 |         }
 93 |         else if constexpr (order == 7)
 94 |         {
 95 |             const auto x_sq = x * x;
 96 |             const auto num = (S) 0.274959104817 + (S) 0.351814748865 * x + (S) -0.0395798531406 * x_sq;
 97 |             const auto den = (S) 1 + x * ((S) 0.275079063405 + x * ((S) 0.683311392128 + x * (S) 0.0624877111229));
 98 |             return (x + x_sq * num) / den;
 99 |         }
100 |         else
101 |         {
102 |             return {};
103 |         }
104 |     }
105 | } // namespace inv_trig_detail
106 | 
107 | /**
108 |  * Approximation of asin(x) using asin(x) ≈ p(x^2) * x^3 + x for x in [0, 0.5],
109 |  * and asin(x) ≈ pi/2 - p((1-x)/2) * ((1-x)/2)^3/2 + ((1-x)/2)^1/2 for x in [0.5, 1],
110 |  * where p(x) is a polynomial fit to achieve the minimum absolute error.
111 |  */
112 | template <int order, typename T>
113 | T asin (T x)
114 | {
115 |     using S = scalar_of_t<T>;
116 | 
117 |     using std::abs, std::sqrt;
118 | #if defined(XSIMD_HPP)
119 |     using xsimd::abs, xsimd::sqrt;
120 | #endif
121 | 
122 |     const auto abs_x = abs (x);
123 | 
124 |     const auto reflect = abs_x > (S) 0.5;
125 |     auto z0 = select (reflect, (S) 0.5 * ((S) 1 - abs_x), abs_x * abs_x);
126 | 
127 |     auto x2 = select (reflect, sqrt (z0), abs_x);
128 |     auto z1 = inv_trig_detail::asin_kernel<order> (z0);
129 | 
130 |     auto z2 = z1 * (z0 * x2) + x2;
131 |     auto res = select (reflect, (S) M_PI_2 - (z2 + z2), z2);
132 |     return select (x > (S) 0, res, -res);
133 | }
134 | 
135 | /**
136 |  * Approximation of acos(x) using the same approach as asin(x),
137 |  * but with a different polynomial fit.
138 |  */
139 | template <int order, typename T>
140 | T acos (T x)
141 | {
142 |     using S = scalar_of_t<T>;
143 | 
144 |     using std::abs, std::sqrt;
145 | #if defined(XSIMD_HPP)
146 |     using xsimd::abs, xsimd::sqrt;
147 | #endif
148 | 
149 |     const auto abs_x = abs (x);
150 | 
151 |     const auto reflect = abs_x > (S) 0.5;
152 |     auto z0 = select (reflect, (S) 0.5 * ((S) 1 - abs_x), abs_x * abs_x);
153 | 
154 |     auto x2 = select (reflect, sqrt (z0), abs_x);
155 |     auto z1 = inv_trig_detail::acos_kernel<order> (z0);
156 | 
157 |     auto z2 = z1 * (z0 * x2) + x2;
158 |     auto res = select (reflect, (S) M_PI_2 - (z2 + z2), z2);
159 |     return (S) M_PI_2 - select (x > (S) 0, res, -res);
160 | }
161 | 
162 | /**
163 |  * Approximation of atan(x) using a polynomial approximation of arctan(x) on [0, 1],
164 |  * and arctan(x) = pi/2 - arctan(1/x) for x > 1.
165 |  */
166 | template <int order, typename T>
167 | T atan (T x)
168 | {
169 |     using S = scalar_of_t<T>;
170 | 
171 |     using std::abs, std::sqrt;
172 | #if defined(XSIMD_HPP)
173 |     using xsimd::abs, xsimd::sqrt;
174 | #endif
175 | 
176 |     const auto abs_x = abs (x);
177 |     const auto reflect = abs_x > (S) 1;
178 | 
179 |     const auto z = select (reflect, (S) 1 / abs_x, abs_x);
180 |     const auto atan_01 = inv_trig_detail::atan_kernel<order> (z);
181 | 
182 |     const auto res = select (reflect, (S) M_PI_2 - atan_01, atan_01);
183 |     return select (x > (S) 0, res, -res);
184 | }
185 | } // namespace math_approx
186 | 


--------------------------------------------------------------------------------
/include/math_approx/src/log_approx.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "basic_math.hpp"
  4 | #include "pow_approx.hpp"
  5 | 
  6 | namespace math_approx
  7 | {
  8 | namespace log_detail
  9 | {
 10 |     struct Log2Provider
 11 |     {
 12 |         // for polynomial derivations, see notebooks/log_approx.nb
 13 | 
 14 |         /** approximation for log2(x), optimized on the range [1, 2] */
 15 |         template <typename T, int order, bool C1_continuous>
 16 |         static constexpr T log2_approx (T x)
 17 |         {
 18 |             static_assert (order >= 3 && order <= 6);
 19 |             using S = scalar_of_t<T>;
 20 | 
 21 |             const auto x_sq = x * x;
 22 |             if constexpr (C1_continuous)
 23 |             {
 24 |                 if constexpr (order == 3)
 25 |                 {
 26 |                     const auto x_2_3 = (S) -1.09886528622 + (S) 0.164042561333 * x;
 27 |                     const auto x_0_1 = (S) -2.21347520444 + (S) 3.14829792933 * x;
 28 |                     return x_0_1 + x_2_3 * x_sq;
 29 |                 }
 30 |                 else if constexpr (order == 4)
 31 |                 {
 32 |                     const auto x_3_4 = (S) 0.671618567027 + (S) -0.0845960009489 * x;
 33 |                     const auto x_1_2 = (S) 4.16344994072 + (S) -2.19861329856 * x;
 34 |                     const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
 35 |                     return (S) -2.55185920824 + x_1_2_3_4 * x;
 36 |                 }
 37 |                 else if constexpr (order == 5)
 38 |                 {
 39 |                     const auto x_4_5 = (S) -0.432338320780 + (S) 0.0464481811023 * x;
 40 |                     const auto x_2_3 = (S) -3.65368350361 + (S) 1.68976432066 * x;
 41 |                     const auto x_0_1 = (S) -2.82807214111 + (S) 5.17788146374 * x;
 42 |                     const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
 43 |                     return x_0_1 + x_2_3_4_5 * x_sq;
 44 |                 }
 45 |                 else if constexpr (order == 6)
 46 |                 {
 47 |                     const auto x_5_6 = (S) 0.284794437502 + (S) -0.0265448504094 * x;
 48 |                     const auto x_3_4 = (S) 3.38542517475 + (S) -1.31007090775 * x;
 49 |                     const auto x_1_2 = (S) 6.19242937536 + (S) -5.46521465640 * x;
 50 |                     const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
 51 |                     const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
 52 |                     return (S) -3.06081857306 + x_1_2_3_4_5_6 * x;
 53 |                 }
 54 |                 else
 55 |                 {
 56 |                     return {};
 57 |                 }
 58 |             }
 59 |             else
 60 |             {
 61 |                 if constexpr (order == 3)
 62 |                 {
 63 |                     const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x;
 64 |                     const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x;
 65 |                     return x_0_1 + x_2_3 * x_sq;
 66 |                 }
 67 |                 else if constexpr (order == 4)
 68 |                 {
 69 |                     const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x;
 70 |                     const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x;
 71 |                     const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
 72 |                     return (S) -2.51982743265 + x_1_2_3_4 * x;
 73 |                 }
 74 |                 else if constexpr (order == 5)
 75 |                 {
 76 |                     const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x;
 77 |                     const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x;
 78 |                     const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x;
 79 |                     const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
 80 |                     return x_0_1 + x_2_3_4_5 * x_sq;
 81 |                 }
 82 |                 else if constexpr (order == 6)
 83 |                 {
 84 |                     const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x;
 85 |                     const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x;
 86 |                     const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x;
 87 |                     const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
 88 |                     const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
 89 |                     return (S) -3.04376925958 + x_1_2_3_4_5_6 * x;
 90 |                 }
 91 |                 else
 92 |                 {
 93 |                     return {};
 94 |                 }
 95 |             }
 96 |         }
 97 |     };
 98 | }
 99 | 
100 | #if defined(__GNUC__)
101 | #pragma GCC diagnostic push
102 | #pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning
103 | #pragma GCC diagnostic ignored "-Wuninitialized"
104 | #endif
105 | 
106 | /** approximation for log(Base, x) (32-bit) */
107 | template <typename Base, int order, bool C1_continuous, typename Log2ProviderType = log_detail::Log2Provider>
108 | constexpr float log (float x)
109 | {
110 |     const auto vi = bit_cast<int32_t> (x);
111 |     const auto ex = vi & 0x7f800000;
112 |     const auto e = (ex >> 23) - 127;
113 |     const auto vfi = (vi - ex) | 0x3f800000;
114 |     const auto vf = bit_cast<float> (vfi);
115 | 
116 |     constexpr auto log2_base_r = 1.0f / Base::log2_base;
117 |     return log2_base_r * ((float) e + Log2ProviderType::template log2_approx<float, order, C1_continuous> (vf));
118 | }
119 | 
120 | /** approximation for log(x) (64-bit) */
121 | template <typename Base, int order, bool C1_continuous, typename Log2ProviderType = log_detail::Log2Provider>
122 | constexpr double log (double x)
123 | {
124 |     const auto vi = bit_cast<int64_t> (x);
125 |     const auto ex = vi & 0x7ff0000000000000;
126 |     const auto e = (ex >> 52) - 1023;
127 |     const auto vfi = (vi - ex) | 0x3ff0000000000000;
128 |     const auto vf = bit_cast<double> (vfi);
129 | 
130 |     constexpr auto log2_base_r = 1.0 / Base::log2_base;
131 |     return log2_base_r * ((double) e + Log2ProviderType::template log2_approx<double, order, C1_continuous> (vf));
132 | }
133 | 
134 | #if defined(XSIMD_HPP)
135 | /** approximation for pow(Base, x) (32-bit SIMD) */
136 | template <typename Base, int order, bool C1_continuous, typename Log2ProviderType = log_detail::Log2Provider>
137 | xsimd::batch<float> log (xsimd::batch<float> x)
138 | {
139 |     const auto vi = xsimd::bit_cast<xsimd::batch<int32_t>> (x);
140 |     const auto ex = vi & 0x7f800000;
141 |     const auto e = (ex >> 23) - 127;
142 |     const auto vfi = (vi - ex) | 0x3f800000;
143 |     const auto vf = xsimd::bit_cast<xsimd::batch<float>> (vfi);
144 | 
145 |     static constexpr auto log2_base_r = 1.0f / Base::log2_base;
146 |     return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx<xsimd::batch<float>, order, C1_continuous> (vf));
147 | }
148 | 
149 | /** approximation for pow(Base, x) (64-bit SIMD) */
150 | template <typename Base, int order, bool C1_continuous, typename Log2ProviderType = log_detail::Log2Provider>
151 | xsimd::batch<double> log (xsimd::batch<double> x)
152 | {
153 |     const auto vi = xsimd::bit_cast<xsimd::batch<int64_t>> (x);
154 |     const auto ex = vi & 0x7ff0000000000000;
155 |     const auto e = (ex >> 52) - 1023;
156 |     const auto vfi = (vi - ex) | 0x3ff0000000000000;
157 |     const auto vf = xsimd::bit_cast<xsimd::batch<double>> (vfi);
158 | 
159 |     static constexpr auto log2_base_r = 1.0 / Base::log2_base;
160 |     return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx<xsimd::batch<double>, order, C1_continuous> (vf));
161 | }
162 | #endif
163 | 
164 | #if defined(__GNUC__)
165 | #pragma GCC diagnostic pop // end ignore strict-aliasing warnings
166 | #endif
167 | 
168 | /**
169 |  * Approximation of log(x), using
170 |  * log(x) = (1 / log2(e)) * (Exponent(x) + log2(1 + Mantissa(x))
171 |  */
172 | template <int order, bool C1_continuous = false, typename T>
173 | constexpr T log (T x)
174 | {
175 |     return log<pow_detail::BaseE<scalar_of_t<T>>, order, C1_continuous> (x);
176 | }
177 | 
178 | /**
179 |  * Approximation of log2(x), using
180 |  * log2(x) = Exponent(x) + log2(1 + Mantissa(x)
181 |  */
182 | template <int order, bool C1_continuous = false, typename T>
183 | constexpr T log2 (T x)
184 | {
185 |     return log<pow_detail::Base2<scalar_of_t<T>>, order, C1_continuous> (x);
186 | }
187 | 
188 | /**
189 |  * Approximation of log10(x), using
190 |  * log10(x) = (1 / log2(10)) * (Exponent(x) + log2(1 + Mantissa(x))
191 |  */
192 | template <int order, bool C1_continuous = false, typename T>
193 | constexpr T log10 (T x)
194 | {
195 |     return log<pow_detail::Base10<scalar_of_t<T>>, order, C1_continuous> (x);
196 | }
197 | 
198 | /** Approximation of log(1 + x), using math_approx::log(x) */
199 | template <int order, bool C1_continuous = false, typename T>
200 | constexpr T log1p (T x)
201 | {
202 |     return log<pow_detail::BaseE<scalar_of_t<T>>, order, C1_continuous> ((T) 1 + x);
203 | }
204 | }
205 | 


--------------------------------------------------------------------------------
/include/math_approx/src/polylogarithm_approx.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "basic_math.hpp"
  4 | 
  5 | namespace math_approx
  6 | {
  7 | /**
  8 |  * Approximation of the "dilogarithm" function for inputs
  9 |  * in the range [0, 1/2]. This method does not do any
 10 |  * bounds-checking.
 11 |  *
 12 |  * Orders higher than 3 are generally not recommended for
 13 |  * single-precision floating-point types, since they don't
 14 |  * improve the accuracy very much.
 15 |  *
 16 |  * For derivations, see notebooks/li2_approx.nb
 17 |  */
 18 | template <int order, typename T>
 19 | constexpr T li2_0_half (T x)
 20 | {
 21 |     static_assert (order >= 1 && order <= 6);
 22 |     using S = scalar_of_t<T>;
 23 | 
 24 |     if constexpr (order == 1)
 25 |     {
 26 |         const auto n_0 = (S) 0.996460629617;
 27 |         const auto d_0_1 = (S) 1 + (S) -0.288575624121 * x;
 28 |         return x * n_0 / d_0_1;
 29 |     }
 30 |     else if constexpr (order == 2)
 31 |     {
 32 |         const auto n_0_1 = (S) 0.999994847641 + (S) -0.546961998015 * x;
 33 |         const auto d_1_2 = (S) -0.797206910618 + (S) 0.0899936224040 * x;
 34 |         const auto d_0_1_2 = (S) 1 + d_1_2 * x;
 35 |         return x * n_0_1 / d_0_1_2;
 36 |     }
 37 |     else if constexpr (order == 3)
 38 |     {
 39 |         const auto x_sq = x * x;
 40 |         const auto n_0_2 = (S) 0.999999991192 + (S) 0.231155739205 * x_sq;
 41 |         const auto n_0_1_2 = n_0_2 + (S) -1.07612533343 * x;
 42 |         const auto d_2_3 = (S) 0.451592861555 + (S) -0.0281544399023 * x;
 43 |         const auto d_0_1 = (S) 1 + (S) -1.32612627824 * x;
 44 |         const auto d_0_1_2_3 = d_0_1 + d_2_3 * x_sq;
 45 |         return x * n_0_1_2 / d_0_1_2_3;
 46 |     }
 47 |     else if constexpr (order == 4)
 48 |     {
 49 |         const auto x_sq = x * x;
 50 |         const auto n_2_3 = (S) 0.74425269014090502911555775982556365472 + (S) -0.08749607277005140673532964399704145939 * x;
 51 |         const auto n_0_1 = (S) 0.99999999998544094594795118478024862055 + (S) -1.6098648159028159794757437744309391591 * x;
 52 |         const auto n_0_1_2_3 = n_0_1 + n_2_3 * x_sq;
 53 |         const auto d_3_4 = (S) -0.21787247785577362691148412819704459614 + (S) 0.00870385570778120787932426702624346169 * x;
 54 |         const auto d_1_2 = (S) -1.85986481869406218896935179306183665107 + (S) 1.09810787318601772062220747277929300408 * x;
 55 |         const auto d_1_2_3_4 = d_1_2 + d_3_4 * x_sq;
 56 |         const auto d_0_1_2_3_4 = (S) 1 + d_1_2_3_4 * x;
 57 |         return x * n_0_1_2_3 / d_0_1_2_3_4;
 58 |     }
 59 |     else if constexpr (order == 5)
 60 |     {
 61 |         const auto x_sq = x * x;
 62 | 
 63 |         const auto n_3_4 = (S) -0.41945653857264507277532555842378439927 + (S) 0.03140351694981020435408321943912212079 * x;
 64 |         const auto n_1_2 = (S) -2.14843104749890205674150618938194330623 + (S) 1.54956546570292751217524363072830456069 * x;
 65 |         const auto n_1_2_3_4 = n_1_2 + n_3_4 * x_sq;
 66 |         const auto n_0_1_2_3_4 = (S) 0.99999999999997312289180148636206726177 + n_1_2_3_4 * x;
 67 | 
 68 |         const auto d_4_5 = (S) 0.09609912057603552016206051904306797162 + (S) -0.00269129500193871901659324657805482418 * x;
 69 |         const auto d_2_3 = (S) 2.03806211686824385201410542913121040892 + (S) -0.72497973694183708484311198715866984035 * x;
 70 |         const auto d_0_1 = (S) 1 + (S) -2.398431047506893407956406025441134862 * x;
 71 |         const auto d_2_3_4_5 = d_2_3 + d_4_5 * x_sq;
 72 |         const auto d_0_1_2_3_4_5 = d_0_1 + d_2_3_4_5 * x_sq;
 73 | 
 74 |         return x * n_0_1_2_3_4 / d_0_1_2_3_4_5;
 75 |     }
 76 |     else if constexpr (order == 6)
 77 |     {
 78 |         const auto x_sq = x * x;
 79 | 
 80 |         const auto n_4_5 = (S) 0.20885966267164674441979654645138181067 + (S) -0.01085968986663512120143497781484214416 * x;
 81 |         const auto n_2_3 = (S) 2.64771686149306717256638234054408732899 + (S) -1.15385196641292513334184445301529897694 * x;
 82 |         const auto n_0_1 = (S) 0.99999999999999995022522902211061062582 + (S) -2.6883902117841251600624689886592808124 * x;
 83 |         const auto n_2_3_4_5 = n_2_3 + n_4_5 * x_sq;
 84 |         const auto n_0_1_2_3_4_5 = n_0_1 + n_2_3_4_5 * x_sq;
 85 | 
 86 |         const auto d_5_6 = (S) -0.03980108270103465616851961097089502921 + (S) 0.00082742905522813187941384917520432493 * x;
 87 |         const auto d_3_4 = (S) -1.70766499097900947314107956633154245176 + (S) 0.41595826557420951684124942212799147948 * x;
 88 |         const auto d_1_2 = (S) -2.93839021178414636324893816529360171731 + (S) 3.27120330332951521662427278605230451458 * x;
 89 |         const auto d_3_4_5_6 = d_3_4 + d_5_6 * x_sq;
 90 |         const auto d_0_1_2 = (S) 1 + d_1_2 * x;
 91 |         const auto d_0_1_2_3_4_5_6 = d_0_1_2 + d_3_4_5_6 * x_sq * x;
 92 | 
 93 |         return x * n_0_1_2_3_4_5 / d_0_1_2_3_4_5_6;
 94 |     }
 95 |     else
 96 |     {
 97 |         return {};
 98 |     }
 99 | }
100 | 
101 | /**
102 |  * Approximation of the "dilogarithm" function for all inputs.
103 |  *
104 |  * Orders higher than 3 are generally not recommended for
105 |  * single-precision floating-point types, since they don't
106 |  * improve the accuracy very much.
107 |  */
108 | template <int order, int log_order = std::min (order + 2, 6), bool log_C1 = (log_order >= 5), typename T>
109 | constexpr T li2 (T x)
110 | {
111 |     const auto x_r = (T) 1 / x;
112 |     const auto x_r1 = (T) 1 / (x - (T) 1);
113 | 
114 |     constexpr auto pisq_o_6 = (T) M_PI * (T) M_PI / (T) 6;
115 |     constexpr auto pisq_o_3 = (T) M_PI * (T) M_PI / (T) 3;
116 | 
117 |     T y, r;
118 |     bool sign = true;
119 |     if (x < (T) -1)
120 |     {
121 |         y = -x_r1;
122 |         const auto l = log<log_order, log_C1> ((T) 1 - x);
123 |         r = -pisq_o_6 + l * ((T) 0.5 * l - log<log_order, log_C1> (-x));
124 |     }
125 |     else if (x < (T) 0)
126 |     {
127 |         y = x * x_r1;
128 |         const auto l = log<log_order, log_C1> ((T) 1 - x);
129 |         r = (T) -0.5 * l * l;
130 |         sign = false;
131 |     }
132 |     else if (x < (T) 0.5)
133 |     {
134 |         y = x;
135 |         r = {};
136 |     }
137 |     else if (x < (T) 1)
138 |     {
139 |         y = (T) 1 - x;
140 |         r = pisq_o_6 - log<log_order, log_C1> (x) * log<log_order, log_C1> (y);
141 |         sign = false;
142 |     }
143 |     else if (x < (T) 2)
144 |     {
145 |         y = (T) 1 - x_r;
146 |         const auto l = log<log_order, log_C1> (x);
147 |         r = pisq_o_6 - l * (log<log_order, log_C1> (y) + (T) 0.5 * l);
148 |     }
149 |     else
150 |     {
151 |         y = x_r;
152 |         const auto l = log<log_order, log_C1> (x);
153 |         r = pisq_o_3 - (T) 0.5 * l * l;
154 |         sign = false;
155 |     }
156 | 
157 |     const auto li2_reduce = li2_0_half<order> (y);
158 |     return r + select (sign, li2_reduce, -li2_reduce);
159 | }
160 | 
161 | #if defined(XSIMD_HPP)
162 | /**
163 |  * Approximation of the "dilogarithm" function for all inputs.
164 |  *
165 |  * Orders higher than 3 are generally not recommended for
166 |  * single-precision floating-point types, since they don't
167 |  * improve the accuracy very much.
168 |  */
169 | template <int order, int log_order = std::min (order + 2, 6), bool log_C1 = (log_order >= 5), typename T>
170 | xsimd::batch<T> li2 (const xsimd::batch<T>& x)
171 | {
172 |     // x < -1:
173 |     // - log(-x) -> [1, inf]
174 |     // - log(1-x) -> [2, inf]
175 |     // x < 0:
176 |     // - NOP
177 |     // - log(1-x) -> [1, 2]
178 |     // x < 1/2:
179 |     // - NOP
180 |     // - NOP
181 |     // x < 1:
182 |     // - log(x) -> [1/2, 1]
183 |     // - log(1-x) -> [0, 1/2]
184 |     // x < 2:
185 |     // - log(x) -> [1, 2]
186 |     // - log(1-1/x) -> [0, 1/2]
187 |     // x >= 2:
188 |     // - log(x) -> [2, inf]
189 |     // - NOP
190 | 
191 |     const auto x_r = (T) 1 / x;
192 |     const auto x_r1 = (T) 1 / (x - (T) 1);
193 |     const auto log_arg1 = select (x < (T) -1, -x, select (x < (T) 0.5, xsimd::broadcast ((T) 1), x));
194 |     const auto log_arg2 = select (x < (T) 1, (T) 1 - x, (T) 1 - x_r);
195 | 
196 |     const auto log1 = log<log_order, log_C1> (log_arg1);
197 |     const auto log2 = log<log_order, log_C1> (log_arg2);
198 | 
199 |     // clang-format off
200 |     const auto y = select (x < (T) -1, (T) -1 * x_r1,
201 |                    select (x < (T) 0, x * x_r1,
202 |                    select (x < (T) 0.5, x,
203 |                    select (x < (T) 1, (T) 1 - x,
204 |                    select (x < (T) 2, (T) 1 - x_r,
205 |                        x_r)))));
206 |     const auto sign = x < (T) -1 || (x >= (T) 0 && x < (T) 0.5) || (x >= (T) 1 && x < (T) 2);
207 | 
208 |     static constexpr auto pisq_o_6 = (T) M_PI * (T) M_PI / (T) 6;
209 |     static constexpr auto pisq_o_3 = (T) M_PI * (T) M_PI / (T) 3;
210 |     const auto log1_log2 = log1 * log2;
211 |     const auto half_log1_sq = (T) 0.5 * log1 * log1;
212 |     const auto half_log2_sq = (T) 0.5 * log2 * log2;
213 |     const auto r = select (x < (T) -1, -pisq_o_6 + half_log2_sq - log1_log2,
214 |                    select (x < (T) 0, -half_log2_sq,
215 |                    select (x < (T) 0.5, xsimd::broadcast ((T) 0),
216 |                    select (x < (T) 1, pisq_o_6 - log1_log2,
217 |                    select (x < (T) 2, pisq_o_6 - log1_log2 - half_log1_sq,
218 |                        pisq_o_3 - half_log1_sq)))));
219 |     //clang-format on
220 | 
221 |     const auto li2_reduce = li2_0_half<order> (y);
222 |     return r + select (sign, li2_reduce, -li2_reduce);
223 | }
224 | #endif
225 | } // namespace math_approx
226 | 


--------------------------------------------------------------------------------
/include/math_approx/src/pow_approx.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "basic_math.hpp"
  4 | 
  5 | namespace math_approx
  6 | {
  7 | namespace pow_detail
  8 | {
  9 |     // for polynomial derivations, see notebooks/exp_approx.nb
 10 | 
 11 |     /** approximation for 2^x, optimized on the range [0, 1] */
 12 |     template <typename T, int order, bool C1_continuous>
 13 |     constexpr T pow2_approx (T x)
 14 |     {
 15 |         static_assert (order >= 3 && order <= 7);
 16 |         using S = scalar_of_t<T>;
 17 | 
 18 |         const auto x_sq = x * x;
 19 |         if constexpr (C1_continuous)
 20 |         {
 21 |             if constexpr (order == 3)
 22 |             {
 23 |                 const auto x_2_3 = (S) 0.227411277760 + (S) 0.0794415416798 * x;
 24 |                 const auto x_0_1 = (S) 1 + (S) 0.693147180560 * x;
 25 |                 return x_0_1 + x_2_3 * x_sq;
 26 |             }
 27 |             else if constexpr (order == 4)
 28 |             {
 29 |                 const auto x_3_4 = (S) 0.0521277476109 + (S) 0.0136568970345 * x;
 30 |                 const auto x_1_2 = (S) 0.693147180560 + (S) 0.241068174795 * x;
 31 |                 const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
 32 |                 return (S) 1 + x_1_2_3_4 * x;
 33 |             }
 34 |             else if constexpr (order == 5)
 35 |             {
 36 |                 const auto x_4_5 = (S) 0.00899838527231 + (S) 0.00188723482038 * x;
 37 |                 const auto x_2_3 = (S) 0.240184132673 + (S) 0.0557830666741 * x;
 38 |                 const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
 39 |                 const auto x_0_1 = (S) 1 + (S) 0.693147180560 * x;
 40 |                 return x_0_1 + x_2_3_4_5 * x_sq;
 41 |             }
 42 |             else if constexpr (order == 6)
 43 |             {
 44 |                 const auto x_5_6 = (S) 0.00124453797252 + (S) 0.000217714753229 * x;
 45 |                 const auto x_3_4 = (S) 0.0554875633068 + (S) 0.00967475272129 * x;
 46 |                 const auto x_1_2 = (S) 0.693147180560 + (S) 0.240228250686 * x;
 47 |                 const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
 48 |                 const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
 49 |                 return (S) 1 + x_1_2_3_4_5_6 * x;
 50 |             }
 51 |             else if constexpr (order == 7)
 52 |             {
 53 |                 // doesn't seem to help at single-precision
 54 |                 const auto x_6_7 = (S) 0.000133154170702612 + (S) 0.0000245778949916153 * x;
 55 |                 const auto x_4_5 = (S) 0.00960612128901630 + (S) 0.00135551454943593 * x;
 56 |                 const auto x_2_3 = (S) 0.240226202240181 + (S) 0.0555072492957270 * x;
 57 |                 const auto x_0_1 = (S) 1 + (S) 0.693147180559945 * x;
 58 |                 const auto x_4_5_6_7 = x_4_5 + x_6_7 * x_sq;
 59 |                 const auto x_0_1_2_3 = x_0_1 + x_2_3 * x_sq;
 60 |                 return x_0_1_2_3 + x_4_5_6_7 * x_sq * x_sq;
 61 |             }
 62 |             else
 63 |             {
 64 |                 return {};
 65 |             }
 66 |         }
 67 |         else
 68 |         {
 69 |             if constexpr (order == 3)
 70 |             {
 71 |                 const auto x_2_3 = (S) 0.226307586882 + (S) 0.0782680256330 * x;
 72 |                 const auto x_0_1 = (S) 1 + (S) 0.695424387485 * x;
 73 |                 return x_0_1 + x_2_3 * x_sq;
 74 |             }
 75 |             else if constexpr (order == 4)
 76 |             {
 77 |                 const auto x_3_4 = (S) 0.0520324008177 + (S) 0.0135557244044 * x;
 78 |                 const auto x_1_2 = (S) 0.693032120001 + (S) 0.241379754777 * x;
 79 |                 const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
 80 |                 return (S) 1 + x_1_2_3_4 * x;
 81 |             }
 82 |             else if constexpr (order == 5)
 83 |             {
 84 |                 const auto x_4_5 = (S) 0.00899009909264 + (S) 0.00187839071291 * x;
 85 |                 const auto x_2_3 = (S) 0.240156326598 + (S) 0.0558229130202 * x;
 86 |                 const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
 87 |                 const auto x_0_1 = (S) 1 + (S) 0.693152270576 * x;
 88 |                 return x_0_1 + x_2_3_4_5 * x_sq;
 89 |             }
 90 |             else if constexpr (order == 6)
 91 |             {
 92 |                 const auto x_5_6 = (S) 0.00124359387839 + (S) 0.000217187820427 * x;
 93 |                 const auto x_3_4 = (S) 0.0554833098983 + (S) 0.00967911763840 * x;
 94 |                 const auto x_1_2 = (S) 0.693147003658 + (S) 0.240229787107 * x;
 95 |                 const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
 96 |                 const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
 97 |                 return (S) 1 + x_1_2_3_4_5_6 * x;
 98 |             }
 99 |             else if constexpr (order == 7)
100 |             {
101 |                 // doesn't seem to help at single-precision
102 |                 const auto x_6_7 = (S) 0.000136898688977877 + (S) 0.0000234440812713967 * x;
103 |                 const auto x_4_5 = (S) 0.00960825566419915 + (S) 0.00135107295099880 * x;
104 |                 const auto x_2_3 = (S) 0.240226092549669 + (S) 0.0555070350342468 * x;
105 |                 const auto x_0_1 = (S) 1 + (S) 0.693147201030637 * x;
106 |                 const auto x_4_5_6_7 = x_4_5 + x_6_7 * x_sq;
107 |                 const auto x_0_1_2_3 = x_0_1 + x_2_3 * x_sq;
108 |                 return x_0_1_2_3 + x_4_5_6_7 * x_sq * x_sq;
109 |             }
110 |             else
111 |             {
112 |                 return {};
113 |             }
114 |         }
115 |     }
116 | 
117 |     template <typename T>
118 |     struct BaseE
119 |     {
120 |         static constexpr auto log2_base = (T) 1.4426950408889634074;
121 |     };
122 | 
123 |     template <typename T>
124 |     struct Base2
125 |     {
126 |         static constexpr auto log2_base = (T) 1;
127 |     };
128 | 
129 |     template <typename T>
130 |     struct Base10
131 |     {
132 |         static constexpr auto log2_base = (T) 3.3219280948873623479;
133 |     };
134 | }
135 | 
136 | #if defined(__GNUC__)
137 | #pragma GCC diagnostic push
138 | #pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning
139 | #pragma GCC diagnostic ignored "-Wuninitialized"
140 | #endif
141 | 
142 | /** approximation for pow(Base, x) (32-bit) */
143 | template <typename Base, int order, bool C1_continuous>
144 | constexpr float pow (float x)
145 | {
146 |     x = std::max (-126.0f, Base::log2_base * x);
147 | 
148 |     const auto xi = (int32_t) x;
149 |     const auto l = x < 0.0f ? xi - 1 : xi;
150 |     const auto f = x - (float) l;
151 |     const auto vi = (l + 127) << 23;
152 | 
153 |     return bit_cast<float> (vi) * pow_detail::pow2_approx<float, order, C1_continuous> (f);
154 | }
155 | 
156 | /** approximation for pow(Base, x) (64-bit) */
157 | template <typename Base, int order, bool C1_continuous>
158 | constexpr double pow (double x)
159 | {
160 |     x = std::max (-1022.0, Base::log2_base * x);
161 | 
162 |     const auto xi = (int64_t) x;
163 |     const auto l = x < 0.0 ? xi - 1 : xi;
164 |     const auto d = x - (double) l;
165 |     const auto vi = (l + 1023) << 52;
166 | 
167 |     return bit_cast<double> (vi) * pow_detail::pow2_approx<double, order, C1_continuous> (d);
168 | }
169 | 
170 | #if defined(XSIMD_HPP)
171 | /** approximation for pow(Base, x) (32-bit SIMD) */
172 | template <typename Base, int order, bool C1_continuous>
173 | xsimd::batch<float> pow (xsimd::batch<float> x)
174 | {
175 |     x = xsimd::max (xsimd::broadcast (-126.0f), Base::log2_base * x);
176 | 
177 |     const auto xi = xsimd::to_int (x);
178 |     const auto l = xsimd::select (xsimd::batch_bool_cast<int32_t> (x < 0.0f), xi - 1, xi);
179 |     const auto f = x - xsimd::to_float (l);
180 |     const auto vi = (l + 127) << 23;
181 | 
182 |     return xsimd::bit_cast<xsimd::batch<float>> (vi) * pow_detail::pow2_approx<xsimd::batch<float>, order, C1_continuous> (f);
183 | }
184 | 
185 | /** approximation for pow(Base, x) (64-bit SIMD) */
186 | template <typename Base, int order, bool C1_continuous>
187 | xsimd::batch<double> pow (xsimd::batch<double> x)
188 | {
189 |     x = xsimd::max (-1022.0, Base::log2_base * x);
190 | 
191 |     const auto xi = xsimd::to_int (x);
192 |     const auto l = xsimd::select (xsimd::batch_bool_cast<int64_t> (x < 0.0), xi - 1, xi);
193 |     const auto d = x - xsimd::to_float (l);
194 |     const auto vi = (l + 1023) << 52;
195 | 
196 |     return xsimd::bit_cast<xsimd::batch<double>> (vi) * pow_detail::pow2_approx<xsimd::batch<double>, order, C1_continuous> (d);
197 | }
198 | #endif
199 | 
200 | #if defined(__GNUC__)
201 | #pragma GCC diagnostic pop // end ignore strict-aliasing warnings
202 | #endif
203 | 
204 | /** Approximation of exp(x), using exp(x) = 2^floor(x * log2(e)) * 2^frac(x * log2(e)) */
205 | template <int order, bool C1_continuous = false, typename T>
206 | constexpr T exp (T x)
207 | {
208 |     return pow<pow_detail::BaseE<scalar_of_t<T>>, order, C1_continuous> (x);
209 | }
210 | 
211 | /** Approximation of exp2(x), using exp(x) = 2^floor(x) * 2^frac(x) */
212 | template <int order, bool C1_continuous = false, typename T>
213 | constexpr T exp2 (T x)
214 | {
215 |     return pow<pow_detail::Base2<scalar_of_t<T>>, order, C1_continuous> (x);
216 | }
217 | 
218 | /** Approximation of exp(x), using exp10(x) = 2^floor(x * log2(10)) * 2^frac(x * log2(10)) */
219 | template <int order, bool C1_continuous = false, typename T>
220 | constexpr T exp10 (T x)
221 | {
222 |     return pow<pow_detail::Base10<scalar_of_t<T>>, order, C1_continuous> (x);
223 | }
224 | 
225 | /** Approximation of exp(1) - 1, using math_approx::exp(x) */
226 | template <int order, bool C1_continuous = false, typename T>
227 | constexpr T expm1 (T x)
228 | {
229 |     return pow<pow_detail::BaseE<scalar_of_t<T>>, order, C1_continuous> (x) - (T) 1;
230 | }
231 | }
232 | 


--------------------------------------------------------------------------------
/include/math_approx/src/sigmoid_approx.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "basic_math.hpp"
 4 | 
 5 | namespace math_approx
 6 | {
 7 | namespace sigmoid_detail
 8 | {
 9 |     // for polynomial derivations, see notebooks/sigmoid_approx.nb
10 | 
11 |     template <typename T>
12 |     constexpr T sig_poly_9 (T x)
13 |     {
14 |         using S = scalar_of_t<T>;
15 |         const auto x_sq = x * x;
16 |         const auto y_7_9 = (S) 1.50024356624e-6 + (S) 6.92468584642e-9 * x_sq;
17 |         const auto y_5_7_9 = (S) 0.000260923534301 + y_7_9 * x_sq;
18 |         const auto y_3_5_7_9 = (S) 0.0208320229264 + y_5_7_9 * x_sq;
19 |         const auto y_1_3_5_7_9 = (S) 0.5 + y_3_5_7_9 * x_sq;
20 |         return x * y_1_3_5_7_9;
21 |     }
22 | 
23 |     template <typename T>
24 |     constexpr T sig_poly_7 (T x)
25 |     {
26 |         using S = scalar_of_t<T>;
27 |         const auto x_sq = x * x;
28 |         const auto y_5_7 = (S) 0.000255174491559 + (S) 1.90805380557e-6 * x_sq;
29 |         const auto y_3_5_7 = (S) 0.0208503675870 + y_5_7 * x_sq;
30 |         const auto y_1_3_5_7 = (S) 0.5 + y_3_5_7 * x_sq;
31 |         return x * y_1_3_5_7;
32 |     }
33 | 
34 |     template <typename T>
35 |     constexpr T sig_poly_5 (T x)
36 |     {
37 |         using S = scalar_of_t<T>;
38 |         const auto x_sq = x * x;
39 |         const auto y_3_5 = (S) 0.0206108521251 + (S) 0.000307906311109 * x_sq;
40 |         const auto y_1_3_5 = (S) 0.5 + y_3_5 * x_sq;
41 |         return x * y_1_3_5;
42 |     }
43 | 
44 |     template <typename T>
45 |     constexpr T sig_poly_3 (T x)
46 |     {
47 |         using S = scalar_of_t<T>;
48 |         const auto x_sq = x * x;
49 |         const auto y_1_3 = (S) 0.5 + (S) 0.0233402955195 * x_sq;
50 |         return x * y_1_3;
51 |     }
52 | } // namespace sigmoid_detail
53 | 
54 | /**
55 |  * Approximation of sigmoid(x) := 1 / (1 + e^-x),
56 |  * using sigmoid(x) ≈ (1/2) p(x) / (p(x)^2 + 1) + (1/2),
57 |  * where p(x) is an odd polynomial fit to minimize the maxinimum relative error.
58 |  */
59 | template <int order, typename T>
60 | T sigmoid (T x)
61 | {
62 |     static_assert (order % 2 == 1 && order <= 9 && order >= 3, "Order must e an odd number within [3, 9]");
63 | 
64 |     T x_poly {};
65 |     if constexpr (order == 9)
66 |         x_poly = sigmoid_detail::sig_poly_9 (x);
67 |     else if constexpr (order == 7)
68 |         x_poly = sigmoid_detail::sig_poly_7 (x);
69 |     else if constexpr (order == 5)
70 |         x_poly = sigmoid_detail::sig_poly_5 (x);
71 |     else if constexpr (order == 3)
72 |         x_poly = sigmoid_detail::sig_poly_3 (x);
73 | 
74 |     using S = scalar_of_t<T>;
75 |     return (S) 0.5 * x_poly * rsqrt (x_poly * x_poly + (S) 1) + (S) 0.5;
76 | }
77 | 
78 | 
79 | /**
80 |  * Approximation of sigmoid(x) := 1 / (1 + e^-x),
81 |  * using math_approx::exp (x).
82 |  *
83 |  * So far this has tested slower than the above approximation
84 |  * for similar absolute error, but has better relative error
85 |  * characteristics.
86 |  */
87 | template <int order, bool C1_continuous = false, typename T>
88 | T sigmoid_exp (T x)
89 | {
90 |     return (T) 1 / ((T) 1 + math_approx::exp<order, C1_continuous> (-x));
91 | }
92 | } // namespace math_approx
93 | 


--------------------------------------------------------------------------------
/include/math_approx/src/trig_approx.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "basic_math.hpp"
  4 | 
  5 | namespace math_approx
  6 | {
  7 | namespace trig_detail
  8 | {
  9 |     template <typename T>
 10 |     constexpr T truncate (T x)
 11 |     {
 12 |         return static_cast<T> (static_cast<int> (x));
 13 |     }
 14 | 
 15 | #if defined(XSIMD_HPP)
 16 |     template <typename T>
 17 |     xsimd::batch<T> truncate (xsimd::batch<T> x)
 18 |     {
 19 |         return xsimd::to_float (xsimd::to_int (x));
 20 |     }
 21 | #endif
 22 | 
 23 |     /** Fast method to wrap a value into the range [-pi, pi] */
 24 |     template <typename T>
 25 |     constexpr T fast_mod_mpi_pi (T x)
 26 |     {
 27 |         using S = scalar_of_t<T>;
 28 |         constexpr auto pi = static_cast<S> (M_PI);
 29 |         constexpr auto two_pi = static_cast<S> (2.0 * M_PI);
 30 |         constexpr auto recip_two_pi = static_cast<S> (1) / two_pi;
 31 | 
 32 |         x += pi;
 33 |         const auto mod = x - two_pi * truncate (x * recip_two_pi);
 34 |         return select (x >= (T) 0, mod, mod + two_pi) - pi;
 35 |     }
 36 | 
 37 |     /** Fast method to wrap a value into the range [-pi/2, pi/2] */
 38 |     template <typename T>
 39 |     constexpr T fast_mod_mhalfpi_halfpi (T x)
 40 |     {
 41 |         using S = scalar_of_t<T>;
 42 |         constexpr auto half_pi = static_cast<S> (M_PI) * (S) 0.5;
 43 |         constexpr auto pi = static_cast<S> (M_PI);
 44 |         constexpr auto recip_pi = (S) 1 / pi;
 45 | 
 46 |         x += half_pi;
 47 |         const auto mod = x - pi * truncate (x * recip_pi);
 48 |         return select (x >= (T) 0, mod, mod + pi) - half_pi;
 49 |     }
 50 | 
 51 |     // Polynomials were derived using the method presented in
 52 |     // https://mooooo.ooo/chebyshev-sine-approximation/
 53 |     // and then adapted for various (odd) orders.
 54 | 
 55 |     template <typename T>
 56 |     constexpr T sin_poly_9 (T x, T x_sq)
 57 |     {
 58 |         using S = scalar_of_t<T>;
 59 |         const auto x_7_9 = (S) -2.49397084313e-6 + (S) 2.00382818811e-8 * x_sq;
 60 |         const auto x_5_7_9 = (S) 0.000173405228576 + x_7_9 * x_sq;
 61 |         const auto x_3_5_7_9 = (S) -0.00662075636230 + x_5_7_9 * x_sq;
 62 |         const auto x_1_3_5_7_9 = (S) 0.101321159036 + x_3_5_7_9 * x_sq;
 63 |         return x * x_1_3_5_7_9;
 64 |     }
 65 | 
 66 |     template <typename T>
 67 |     constexpr T sin_poly_7 (T x, T x_sq)
 68 |     {
 69 |         using S = scalar_of_t<T>;
 70 |         const auto x_5_7 = (S) 0.000170965340046 + (S) -2.09843101304e-6 * x_sq;
 71 |         const auto x_3_5_7 = (S) -0.00661594021539 + x_5_7 * x_sq;
 72 |         const auto x_1_3_5_7 = (S) 0.101319673615 + x_3_5_7 * x_sq;
 73 |         return x * x_1_3_5_7;
 74 |     }
 75 | 
 76 |     template <typename T>
 77 |     constexpr T sin_poly_5 (T x, T x_sq)
 78 |     {
 79 |         using S = scalar_of_t<T>;
 80 |         const auto x_3_5 = (S) -0.00650096169550 + (S) 0.000139899314103 * x_sq;
 81 |         const auto x_1_3_5 = (S) 0.101256629587 + x_3_5 * x_sq;
 82 |         return x * x_1_3_5;
 83 |     }
 84 | } // namespace trig_detail
 85 | 
 86 | /** Polynomial approximation of sin(x) on the range [-pi, pi] */
 87 | template <int order, typename T>
 88 | constexpr T sin_mpi_pi (T x)
 89 | {
 90 |     static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must be an odd number within [5, 9]");
 91 | 
 92 |     using S = scalar_of_t<T>;
 93 |     constexpr auto pi = static_cast<S> (M_PI);
 94 |     constexpr auto pi_sq = pi * pi;
 95 |     const auto x_sq = x * x;
 96 | 
 97 |     T x_poly {};
 98 |     if constexpr (order == 9)
 99 |         x_poly = trig_detail::sin_poly_9 (x, x_sq);
100 |     else if constexpr (order == 7)
101 |         x_poly = trig_detail::sin_poly_7 (x, x_sq);
102 |     else if constexpr (order == 5)
103 |         x_poly = trig_detail::sin_poly_5 (x, x_sq);
104 | 
105 |     return (pi_sq - x_sq) * x_poly;
106 | }
107 | 
108 | /** Full range approximation of sin(x) */
109 | template <int order, typename T>
110 | constexpr T sin (T x)
111 | {
112 |     return sin_mpi_pi<order, T> (trig_detail::fast_mod_mpi_pi (x));
113 | }
114 | 
115 | /**
116 |  * Polynomial approximation of cos(x) on the range [-pi, pi],
117 |  * using a range-shifted approximation of sin(x).
118 |  */
119 | template <int order, typename T>
120 | constexpr T cos_mpi_pi (T x)
121 | {
122 |     static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must be an odd number within [5, 9]");
123 | 
124 |     using S = scalar_of_t<T>;
125 |     constexpr auto pi = static_cast<S> (M_PI);
126 |     constexpr auto pi_sq = pi * pi;
127 |     constexpr auto pi_o_2 = pi * (S) 0.5;
128 | 
129 |     using std::abs;
130 | #if defined(XSIMD_HPP)
131 |     using xsimd::abs;
132 | #endif
133 |     x = abs (x);
134 | 
135 |     const auto hpmx = pi_o_2 - x;
136 |     const auto hpmx_sq = hpmx * hpmx;
137 | 
138 |     T x_poly {};
139 |     if constexpr (order == 9)
140 |         x_poly = trig_detail::sin_poly_9 (hpmx, hpmx_sq);
141 |     else if constexpr (order == 7)
142 |         x_poly = trig_detail::sin_poly_7 (hpmx, hpmx_sq);
143 |     else if constexpr (order == 5)
144 |         x_poly = trig_detail::sin_poly_5 (hpmx, hpmx_sq);
145 | 
146 |     return (pi_sq - hpmx_sq) * x_poly;
147 | }
148 | 
149 | /** Full range approximation of cos(x) */
150 | template <int order, typename T>
151 | constexpr T cos (T x)
152 | {
153 |     return cos_mpi_pi<order, T> (trig_detail::fast_mod_mpi_pi (x));
154 | }
155 | 
156 | /** Polynomial approximation of tan(x) on the range [-pi/4, pi/4] */
157 | template <int order, typename T>
158 | constexpr T tan_mquarterpi_quarterpi (T x)
159 | {
160 |     static_assert (order % 2 == 1 && order >= 3 && order <= 15, "Order must be an odd number within [3, 15]");
161 | 
162 |     // for polynomial derivation, see notebooks/tan_approx.nb
163 | 
164 |     using S = scalar_of_t<T>;
165 |     const auto x_sq = x * x;
166 |     if constexpr (order == 3)
167 |     {
168 |         const auto x_1_3 = (S) 1 + (S) 0.442959265447 * x_sq;
169 |         return x * x_1_3;
170 |     }
171 |     else if constexpr (order == 5)
172 |     {
173 |         const auto x_3_5 = (S) 0.317574684334 + (S) 0.203265826702 * x_sq;
174 |         const auto x_1_3_5 = (S) 1 + x_3_5 * x_sq;
175 |         return x * x_1_3_5;
176 |     }
177 |     else if constexpr (order == 7)
178 |     {
179 |         const auto x_5_7 = (S) 0.116406244996 + (S) 0.0944480566104 * x_sq;
180 |         const auto x_1_3 = (S) 1 + (S) 0.335216153138 * x_sq;
181 |         const auto x_1_3_5_7 = x_1_3 + x_5_7 * x_sq * x_sq;
182 |         return x * x_1_3_5_7;
183 |     }
184 |     else if constexpr (order == 9)
185 |     {
186 |         const auto x_7_9 = (S) 0.0405232529373 + (S) 0.0439292071029 * x_sq;
187 |         const auto x_3_5 = (S) 0.333131667276 + (S) 0.136333765649 * x_sq;
188 |         const auto x_3_5_7_9 = x_3_5 + x_7_9 * x_sq * x_sq;
189 |         return x * ((S) 1 + x_3_5_7_9 * x_sq);
190 |     }
191 |     else if constexpr (order == 11)
192 |     {
193 |         const auto x_q = x_sq * x_sq;
194 |         const auto x_9_11 = (S) 0.0126603694551 + (S) 0.0203633469693 * x_sq;
195 |         const auto x_5_7 = (S) 0.132897195017 + (S) 0.0570525279731 * x_sq;
196 |         const auto x_1_3 = (S) 1 + (S) 0.333353019629 * x_sq;
197 |         const auto x_5_7_9_11 = x_5_7 + x_9_11 * x_q;
198 |         const auto x_1_3_5_7_9_11 = x_1_3 + x_5_7_9_11 * x_q;
199 |         return x * x_1_3_5_7_9_11;
200 |     }
201 |     else if constexpr (order == 13)
202 |     {
203 |         const auto x_q = x_sq * x_sq;
204 |         const auto x_6 = x_q * x_sq;
205 |         const auto x_11_13 = (S) 0.00343732283737 + (S) 0.00921082294855 * x_sq;
206 |         const auto x_7_9 = (S) 0.0534743904687 + (S) 0.0242183751709 * x_sq;
207 |         const auto x_3_5 = (S) 0.333331890901 + (S) 0.133379954680 * x_sq;
208 |         const auto x_7_9_11_13 = x_7_9 + x_11_13 * x_q;
209 |         const auto x_1_3_5 = (S) 1 + x_3_5 * x_sq;
210 |         return x * (x_1_3_5 + x_7_9_11_13 * x_6);
211 |     }
212 |     else if constexpr (order == 15)
213 |     {
214 |         // doesn't seem to help much at single-precision, but here it is:
215 |         const auto x_q = x_sq * x_sq;
216 |         const auto x_8 = x_q * x_q;
217 |         const auto x_13_15 = (S) 0.000292958045126 + (S) 0.00427933470414 * x_sq;
218 |         const auto x_9_11 = (S) 0.0213477960960 + (S) 0.0106702896251 * x_sq;
219 |         const auto x_5_7 = (S) 0.133327796402 + (S) 0.0540469276103 * x_sq;
220 |         const auto x_1_3 = (S) 1 + (S) 0.333333463757 * x_sq;
221 |         const auto x_9_11_13_15 = x_9_11 + x_13_15 * x_q;
222 |         const auto x_1_3_5_7 = x_1_3 + x_5_7 * x_q;
223 |         const auto x_1_3_5_7_9_11_13_15 = x_1_3_5_7 + x_9_11_13_15 * x_8;
224 |         return x * x_1_3_5_7_9_11_13_15;
225 |     }
226 |     else
227 |     {
228 |         return {};
229 |     }
230 | }
231 | 
232 | /**
233 |  * Approximation of tan(x) on the range [-pi/2, pi/2],
234 |  * using the tangent half-angle formula.
235 |  *
236 |  * Accuracy may suffer as x approaches ±pi/2.
237 |  */
238 | template <int order, typename T>
239 | constexpr T tan_mhalfpi_halfpi (T x)
240 | {
241 |     using S = scalar_of_t<T>;
242 |     const auto h_x = tan_mquarterpi_quarterpi<order> ((S) 0.5 * x);
243 |     return (S) 2 * h_x / ((S) 1 - h_x * h_x);
244 | }
245 | 
246 | /**
247 |  * Full-range approximation of tan(x)
248 |  *
249 |  * Accuracy may suffer as x approaches values for which tan(x) approaches ±Inf.
250 |  */
251 | template <int order, typename T>
252 | constexpr T tan (T x)
253 | {
254 |     return tan_mhalfpi_halfpi<order> (trig_detail::fast_mod_mhalfpi_halfpi (x));
255 | }
256 | 
257 | //===============================================================================
258 | namespace trig_turns_detail
259 | {
260 |     using namespace trig_detail;
261 | 
262 |     /** Fast method to wrap a value into the range [-pi, pi] */
263 |     template <typename T>
264 |     constexpr T fast_mod_mhalf_half (T x)
265 |     {
266 |         if constexpr (std::is_same_v<T, float>)
267 |         {
268 | #if defined(__SSE4_1__) || defined(_MSC_VER)
269 |             auto y = _mm_round_ss (_mm_load_ps1 (&x), _mm_load_ps1 (&x), 12);
270 |             return x - reinterpret_cast<float&> (y);
271 | #else
272 |             using std::nearbyint;
273 | #if defined(XSIMD_HPP)
274 |             using xsimd::nearbyint;
275 | #endif
276 |             return x - nearbyint (x);
277 | #endif
278 |         }
279 |         else
280 |         {
281 |             using std::nearbyint;
282 | #if defined(XSIMD_HPP)
283 |             using xsimd::nearbyint;
284 | #endif
285 |             return x - nearbyint (x);
286 |         }
287 |     }
288 | } // namespace trig_turns_detail
289 | 
290 | /** Polynomial approximation of sin(2*pi*x) on the range [-pi/2, pi/2] */
291 | template <int order, typename T>
292 | constexpr T sin_turns_mhalfpi_halfpi (T x)
293 | {
294 |     static_assert (order % 2 == 1 && order <= 11 && order >= 5, "Order must be an odd number within [5, 11]");
295 | 
296 |     using S = scalar_of_t<T>;
297 |     const auto x_sq = x * x;
298 |     T y;
299 |     if constexpr (order == 11)
300 |     {
301 |         // -25.1327411554 x + 64.8358228565 x^3 - 67.0766273790 x^5 + 38.4958788775 x^7 - 14.0496638478 x^9 + 3.16160207407
302 |         const auto x_q = x_sq * x_sq;
303 |         const auto x_9_11 = (S) -14.0496638478f + (S) 3.16160207407f * x_sq;
304 |         const auto x_5_7 = (S) -67.0766273790f + (S) 38.4958788775f * x_sq;
305 |         const auto x_1_3 = (S) -25.1327411554f + (S) 64.8358228565f * x_sq;
306 |         const auto x_5_7_9_11 = x_5_7 + x_9_11 * x_q;
307 |         const auto x_1_3_5_7_9_11 = x_1_3 + x_5_7_9_11 * x_q;
308 |         y = x * x_1_3_5_7_9_11;
309 |     }
310 |     else if constexpr (order == 9)
311 |     {
312 |         const auto x_q = x_sq * x_sq;
313 |         const auto x_7_9 = (S) 38.0636285939f - (S) 12.0736625515f * x_sq;
314 |         const auto x_3_5 = (S) 64.8346168010f - (S) 67.0380336036f * x_sq;
315 |         const auto x_3_5_7_9 = x_3_5 + x_7_9 * x_q;
316 |         const auto x_1_3_5_7_9 = (S) -25.1327351251f + x_3_5_7_9 * x_sq;
317 |         y = x * x_1_3_5_7_9;
318 |     }
319 |     else if constexpr (order == 7)
320 |     {
321 |         // -25.1323666662 x + 64.7874540567 x^3 - 66.0947787168 x^5 + 32.0267973181 x^7
322 |         const auto x_q = x_sq * x_sq;
323 |         const auto x_5_7 = (S) -66.0947787168f + (S) 32.0267973181f * x_sq;
324 |         const auto x_1_3 = (S) -25.1323666662f + (S) 64.7874540567f * x_sq;
325 |         const auto x_1_3_5_7 = x_1_3 + x_5_7 * x_q;
326 |         y = x * x_1_3_5_7;
327 |     }
328 |     else if constexpr (order == 5)
329 |     {
330 |         // -25.1167285815 x + 63.6615119634 x^3 - 54.0847297225 x^5
331 |         const auto x_3_5 = (S) 63.6615119634f + (S) -54.0847297225f * x_sq;
332 |         const auto x_1_3_5 = (S) -25.1167285815f + x_3_5 * x_sq;
333 |         y = x * x_1_3_5;
334 |     }
335 | 
336 |     return y * (x + 0.5f) * (x - 0.5f);
337 |     // return y * (x_sq - 0.25f); // this costs us a lot of precision :(
338 | }
339 | 
340 | /**
341 |  * Full-range approximation of sin(2*pi*x)
342 |  */
343 | template <int order, typename T>
344 | constexpr T sin_turns (T x)
345 | {
346 |     return sin_turns_mhalfpi_halfpi<order> (trig_turns_detail::fast_mod_mhalf_half (x));
347 | }
348 | 
349 | /** Polynomial approximation of cos(2*pi*x) on the range [-pi/2, pi/2] */
350 | template <int order, typename T>
351 | constexpr T cos_turns_mhalfpi_halfpi (T x)
352 | {
353 |     using S = scalar_of_t<T>;
354 |     using std::abs;
355 | #if defined(XSIMD_HPP)
356 |     using xsimd::abs;
357 | #endif
358 |     return sin_turns_mhalfpi_halfpi<order> ((S) 0.25 - abs (x));
359 | }
360 | 
361 | /**
362 |  * Full-range approximation of cos(2*pi*x)
363 |  */
364 | template <int order, typename T>
365 | constexpr T cos_turns (T x)
366 | {
367 |     return cos_turns_mhalfpi_halfpi<order> (trig_turns_detail::fast_mod_mhalf_half (x));
368 | }
369 | } // namespace math_approx
370 | 


--------------------------------------------------------------------------------
/include/math_approx/src/wright_omega_approx.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "basic_math.hpp"
 4 | 
 5 | namespace math_approx
 6 | {
 7 | /**
 8 |  * Approximation of the Wright-Omega function, using
 9 |  * w(x) ≈ 0 for x < -3
10 |  * w(x) ≈ p(x) for -3 <= x < e
11 |  * w(x) ≈ x - log(x) + alpha * exp(-beta * x) for x >= e,
12 |  * where p(x) is a polynomial, and alpha and beta are coefficients,
13 |  * all fit to minimize the maximum absolute error.
14 |  *
15 |  * The above fit is optionally followed by some number of Newton-Raphson iterations.
16 |  */
17 | template <int num_nr_iters, int poly_order = 3, int log_order = (num_nr_iters <= 1 ? 3 : 4), int exp_order = log_order, typename T>
18 | constexpr T wright_omega (T x)
19 | {
20 |     static_assert (poly_order == 3 || poly_order == 5);
21 | 
22 |     using S = scalar_of_t<T>;
23 |     constexpr auto E = (S) 2.7182818284590452354;
24 | 
25 |     const auto x1 = [] (T _x)
26 |     {
27 |         const auto x_sq = _x * _x;
28 |         if constexpr (poly_order == 3)
29 |         {
30 |             const auto y_2_3 = (S) 0.0534379648805832 + (S) -0.00251076420630778 * _x;
31 |             const auto y_0_1 = (S) 0.616522951065868 + (S) 0.388418422853809 * _x;
32 |             return y_0_1 + y_2_3 * x_sq;
33 |         }
34 |         else if constexpr (poly_order == 5)
35 |         {
36 |             const auto y_4_5 = (S) -0.00156418794118294 + (S) -0.00151562297325209 * _x;
37 |             const auto y_2_3 = (S) 0.0719291313363515 + (S) 0.0216881206167543 * _x;
38 |             const auto y_0_1 = (S) 0.569291529016010 + (S) 0.290890537885083 * _x;
39 |             const auto y_2_3_4_5 = y_2_3 + y_4_5 * x_sq;
40 |             return y_0_1 + y_2_3_4_5 * x_sq;
41 |         }
42 |         else
43 |         {
44 |             return T {};
45 |         }
46 |     }(x);
47 |     const auto x2 = x - log<log_order> (x) + (S) 0.32352057096397160124 * exp<exp_order> ((S) -0.029614177658043381316 * x);
48 | 
49 |     auto y = select (x < (S) -3, T {}, select (x < (S) E, x1, x2));
50 | 
51 |     const auto nr_update = [] (T _x, T _y)
52 |     {
53 |         return _y - (_y - exp<exp_order> (_x - _y)) / (_y + (S) 1);
54 |     };
55 | 
56 |     for (int i = 0; i < num_nr_iters; ++i)
57 |         y = nr_update (x, y);
58 | 
59 |     return y;
60 | }
61 | 
62 | /**
63 |  * Wright-Omega function using Stephano D'Angelo's derivation (https://www.dafx.de/paper-archive/2019/DAFx2019_paper_5.pdf)
64 |  * With `num_nr_iters == 0`, this is the fastest implementation, but the least accurate.
65 |  * With `num_nr_iters == 1`, this is faster than the other implementation with 0 iterations, and little bit more accurate.
66 |  * For more accuracy, use the other implementation with at least 1 NR iteration.
67 |  */
68 | template <int num_nr_iters, int log_order = 3, int exp_order = log_order, typename T>
69 | constexpr T wright_omega_dangelo (T x)
70 | {
71 |     using S = scalar_of_t<T>;
72 | 
73 |     const auto x1 = [] (T _x)
74 |     {
75 |         const auto x_sq = _x * _x;
76 |         const auto y_2_3 = (S) 4.775931364975583e-2 + (S) -1.314293149877800e-3 * _x;
77 |         const auto y_0_1 = (S) 6.313183464296682e-1 + (S) 3.631952663804445e-1 * _x;
78 |         return y_0_1 + y_2_3 * x_sq;
79 |     }(x);
80 |     const auto x2 = x - log<log_order> (x);
81 | 
82 |     auto y = select (x < (S) -3.341459552768620, T {}, select (x < (S) 8, x1, x2));
83 | 
84 |     const auto nr_update = [] (T _x, T _y)
85 |     {
86 |         return _y - (_y - exp<exp_order> (_x - _y)) / (_y + (S) 1);
87 |     };
88 | 
89 |     for (int i = 0; i < num_nr_iters; ++i)
90 |         y = nr_update (x, y);
91 | 
92 |     return y;
93 | }
94 | } // namespace math_approx
95 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | message(STATUS "math_approx -- Configuring tests...")
 2 | 
 3 | CPMAddPackage("gh:catchorg/Catch2@3.8.1")
 4 | include(${Catch2_SOURCE_DIR}/extras/Catch.cmake)
 5 | 
 6 | function(setup_catch_test target)
 7 |     add_executable(${target})
 8 |     target_sources(${target} PRIVATE src/${target}.cpp)
 9 |     target_include_directories(${target} PRIVATE ${CMAKE_SOURCE_DIR}/tests/test_utils)
10 |     target_link_libraries(${target}
11 |         PRIVATE
12 |             Catch2::Catch2WithMain
13 |             math_approx
14 |     )
15 |     target_compile_features(${target} PUBLIC cxx_std_20)
16 |     target_compile_definitions(${target} PUBLIC _USE_MATH_DEFINES=1)
17 |      target_compile_options(${target} PRIVATE
18 |          $<$<CXX_COMPILER_ID:MSVC>:/W4 /WX>
19 |          $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic -Werror>
20 |      )
21 | 
22 |     add_custom_command(TARGET ${target}
23 |         POST_BUILD
24 |         WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
25 |         COMMAND ${CMAKE_COMMAND} -E echo "Copying $<TARGET_FILE:${target}> to test-binary"
26 |         COMMAND ${CMAKE_COMMAND} -E make_directory test-binary
27 |         COMMAND ${CMAKE_COMMAND} -E copy "$<TARGET_FILE:${target}>" test-binary
28 |     )
29 | 
30 |     catch_discover_tests(${target} TEST_PREFIX ${target}_)
31 | endfunction(setup_catch_test)
32 | 
33 | setup_catch_test(trig_approx_test)
34 | setup_catch_test(trig_turns_approx_test)
35 | setup_catch_test(inverse_trig_approx_test)
36 | setup_catch_test(pow_approx_test)
37 | setup_catch_test(log_approx_test)
38 | setup_catch_test(hyperbolic_trig_approx_test)
39 | setup_catch_test(inverse_hyperbolic_trig_approx_test)
40 | setup_catch_test(sigmoid_approx_test)
41 | setup_catch_test(wright_omega_approx_test)
42 | setup_catch_test(polylog_approx_test)
43 | 


--------------------------------------------------------------------------------
/test/src/hyperbolic_trig_approx_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test_helpers.hpp"
  2 | #include <catch2/catch_test_macros.hpp>
  3 | #include <iostream>
  4 | 
  5 | #include <math_approx/math_approx.hpp>
  6 | 
  7 | TEST_CASE ("Sinh Approx Test")
  8 | {
  9 | #if ! defined(WIN32)
 10 |     const auto all_floats = test_helpers::all_32_bit_floats (-3.5f, 3.5f, 1.0e-3f);
 11 | #else
 12 |     const auto all_floats = test_helpers::all_32_bit_floats (-3.5f, 3.5f, 1.0e-1f);
 13 | #endif
 14 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 15 |                                                     { return std::sinh (x); });
 16 | 
 17 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound)
 18 |     {
 19 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 20 | 
 21 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
 22 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
 23 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
 24 | 
 25 |         const auto max_error = test_helpers::abs_max<float> (error);
 26 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
 27 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
 28 | 
 29 |         std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl;
 30 |         REQUIRE (std::abs (max_error) < err_bound);
 31 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
 32 |         if (ulp_bound > 0)
 33 |             REQUIRE (max_ulp_error < ulp_bound);
 34 |     };
 35 | 
 36 |     SECTION ("6th-Order simul.")
 37 |     {
 38 |         test_approx ([] (auto x)
 39 |                      { return math_approx::sinh_cosh<6> (x).first; },
 40 |                      6.0e-6f,
 41 |                      0.00012f,
 42 |                      1050);
 43 |     }
 44 |     SECTION ("6th-Order")
 45 |     {
 46 |         test_approx ([] (auto x)
 47 |                      { return math_approx::sinh<6> (x); },
 48 |                      6.0e-6f,
 49 |                      0.00012f,
 50 |                      1050);
 51 |     }
 52 |     SECTION ("5th-Order")
 53 |     {
 54 |         test_approx ([] (auto x)
 55 |                      { return math_approx::sinh<5> (x); },
 56 |                      8.0e-6f,
 57 |                      0.00015f,
 58 |                      0);
 59 |     }
 60 |     SECTION ("4th-Order")
 61 |     {
 62 |         test_approx ([] (auto x)
 63 |                      { return math_approx::sinh<4> (x); },
 64 |                      6.0e-5f,
 65 |                      0.00027f,
 66 |                      0);
 67 |     }
 68 |     SECTION ("3rd-Order")
 69 |     {
 70 |         test_approx ([] (auto x)
 71 |                      { return math_approx::sinh<3> (x); },
 72 |                      0.002f,
 73 |                      0.0035f,
 74 |                      0);
 75 |     }
 76 | }
 77 | 
 78 | TEST_CASE ("Cosh Approx Test")
 79 | {
 80 | #if ! defined(WIN32)
 81 |     const auto all_floats = test_helpers::all_32_bit_floats (-5.0f, 5.0f, 1.0e-3f);
 82 | #else
 83 |     const auto all_floats = test_helpers::all_32_bit_floats (-5.0f, 5.0f, 1.0e-1f);
 84 | #endif
 85 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 86 |                                                     { return std::cosh (x); });
 87 | 
 88 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound)
 89 |     {
 90 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 91 | 
 92 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
 93 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
 94 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
 95 | 
 96 |         const auto max_error = test_helpers::abs_max<float> (error);
 97 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
 98 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
 99 | 
100 |         std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl;
101 |         REQUIRE (std::abs (max_error) < err_bound);
102 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
103 |         if (ulp_bound > 0)
104 |             REQUIRE (max_ulp_error < ulp_bound);
105 |     };
106 | 
107 |     SECTION ("6th-Order simul.")
108 |     {
109 |         test_approx ([] (auto x)
110 |                      { return math_approx::sinh_cosh<6> (x).second; },
111 |                      2.5e-5f,
112 |                      4.5e-7f,
113 |                      8);
114 |     }
115 |     SECTION ("6th-Order")
116 |     {
117 |         test_approx ([] (auto x)
118 |                      { return math_approx::cosh<6> (x); },
119 |                      2.5e-5f,
120 |                      4.5e-7f,
121 |                      8);
122 |     }
123 |     SECTION ("5th-Order")
124 |     {
125 |         test_approx ([] (auto x)
126 |                      { return math_approx::cosh<5> (x); },
127 |                      3.5e-5f,
128 |                      5.5e-7f,
129 |                      10);
130 |     }
131 |     SECTION ("4th-Order")
132 |     {
133 |         test_approx ([] (auto x)
134 |                      { return math_approx::cosh<4> (x); },
135 |                      0.0003f,
136 |                      4.0e-6f,
137 |                      60);
138 |     }
139 |     SECTION ("3rd-Order")
140 |     {
141 |         test_approx ([] (auto x)
142 |                      { return math_approx::cosh<3> (x); },
143 |                      0.0075f,
144 |                      0.00015f,
145 |                      0);
146 |     }
147 | }
148 | 
149 | TEST_CASE ("Tanh Approx Test")
150 | {
151 | #if ! defined(WIN32)
152 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-2f);
153 | #else
154 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
155 | #endif
156 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
157 |                                                     { return std::tanh (x); });
158 | 
159 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound)
160 |     {
161 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
162 | 
163 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
164 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
165 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
166 | 
167 |         const auto max_error = test_helpers::abs_max<float> (error);
168 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
169 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
170 | 
171 |         std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl;
172 |         REQUIRE (std::abs (max_error) < err_bound);
173 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
174 |         if (ulp_bound > 0)
175 |             REQUIRE (max_ulp_error < ulp_bound);
176 |     };
177 | 
178 |     SECTION ("11th-Order")
179 |     {
180 |         test_approx ([] (auto x)
181 |                      { return math_approx::tanh<11> (x); },
182 |                      2.5e-7f,
183 |                      4.0e-7f,
184 |                      7);
185 |     }
186 |     SECTION ("9th-Order")
187 |     {
188 |         test_approx ([] (auto x)
189 |                      { return math_approx::tanh<9> (x); },
190 |                      1.5e-6f,
191 |                      1.5e-6f,
192 |                      20);
193 |     }
194 |     SECTION ("7th-Order")
195 |     {
196 |         test_approx ([] (auto x)
197 |                      { return math_approx::tanh<7> (x); },
198 |                      1.5e-5f,
199 |                      1.5e-5f,
200 |                      230);
201 |     }
202 |     SECTION ("5th-Order")
203 |     {
204 |         test_approx ([] (auto x)
205 |                      { return math_approx::tanh<5> (x); },
206 |                      2.5e-4f,
207 |                      2.5e-4f,
208 |                      0);
209 |     }
210 |     SECTION ("3th-Order")
211 |     {
212 |         test_approx ([] (auto x)
213 |                      { return math_approx::tanh<3> (x); },
214 |                      4.0e-3f,
215 |                      4.0e-3f,
216 |                      0);
217 |     }
218 | }
219 | 


--------------------------------------------------------------------------------
/test/src/inverse_hyperbolic_trig_approx_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test_helpers.hpp"
  2 | #include "catch2/catch_template_test_macros.hpp"
  3 | 
  4 | #include <catch2/catch_test_macros.hpp>
  5 | #include <iostream>
  6 | 
  7 | #include <math_approx/math_approx.hpp>
  8 | 
  9 | template <typename T = float>
 10 | void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float err_bound)
 11 | {
 12 |     const auto y_approx = test_helpers::compute_all<T> (all_floats, f_approx);
 13 |     const auto error = test_helpers::compute_error<T> (y_exact, y_approx);
 14 |     const auto max_error = test_helpers::abs_max<T> (error);
 15 | 
 16 |     std::cout << max_error << std::endl;
 17 |     REQUIRE (std::abs (max_error) < err_bound);
 18 | }
 19 | 
 20 | TEMPLATE_TEST_CASE ("Asinh Approx Test", "", float, double)
 21 | {
 22 | #if ! defined(WIN32)
 23 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (-10.0f, 10.0f, 1.0e-2f);
 24 | #else
 25 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (-10.0f, 10.0f, 1.0e-1f);
 26 | #endif
 27 |     const auto y_exact = test_helpers::compute_all<TestType> (all_floats, [] (auto x)
 28 |                                                     { return std::asinh (x); });
 29 | 
 30 |     SECTION ("6th-Order")
 31 |     {
 32 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 33 |                      { return math_approx::asinh<6> (x); },
 34 |                      5.0e-7f);
 35 |     }
 36 |     SECTION ("5th-Order")
 37 |     {
 38 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 39 |                      { return math_approx::asinh<5> (x); },
 40 |                      6.0e-5f);
 41 |     }
 42 |     SECTION ("4th-Order")
 43 |     {
 44 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 45 |                      { return math_approx::asinh<4> (x); },
 46 |                      3.5e-4f);
 47 |     }
 48 |     SECTION ("3th-Order")
 49 |     {
 50 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 51 |                      { return math_approx::asinh<3> (x); },
 52 |                      2.5e-3f);
 53 |     }
 54 | }
 55 | 
 56 | TEMPLATE_TEST_CASE ("Acosh Approx Test", "", float, double)
 57 | {
 58 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (1.0f, 10.0f, 1.0e-2f);
 59 |     const auto y_exact = test_helpers::compute_all<TestType> (all_floats, [] (auto x)
 60 |                                                     { return std::acosh (x); });
 61 | 
 62 |     SECTION ("6th-Order")
 63 |     {
 64 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 65 |                      { return math_approx::acosh<6> (x); },
 66 |                      4.5e-6f);
 67 |     }
 68 |     SECTION ("5th-Order")
 69 |     {
 70 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 71 |                      { return math_approx::acosh<5> (x); },
 72 |                      1.5e-5f);
 73 |     }
 74 |     SECTION ("4th-Order")
 75 |     {
 76 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 77 |                      { return math_approx::acosh<4> (x); },
 78 |                      8.5e-5f);
 79 |     }
 80 |     SECTION ("3th-Order")
 81 |     {
 82 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 83 |                      { return math_approx::acosh<3> (x); },
 84 |                      6.5e-4f);
 85 |     }
 86 | }
 87 | 
 88 | TEMPLATE_TEST_CASE ("Atanh Approx Test", "", float, double)
 89 | {
 90 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (-0.9999f, 0.9999f, 1.0e-2f);
 91 |     const auto y_exact = test_helpers::compute_all<TestType> (all_floats, [] (auto x)
 92 |                                                     { return std::atanh (x); });
 93 | 
 94 |     SECTION ("6th-Order")
 95 |     {
 96 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 97 |                      { return math_approx::atanh<6> (x); },
 98 |                      2.5e-6f);
 99 |     }
100 |     SECTION ("5th-Order")
101 |     {
102 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
103 |                      { return math_approx::atanh<5> (x); },
104 |                      6.5e-6f);
105 |     }
106 |     SECTION ("4th-Order")
107 |     {
108 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
109 |                      { return math_approx::atanh<4> (x); },
110 |                      4.5e-5f);
111 |     }
112 |     SECTION ("3th-Order")
113 |     {
114 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
115 |                      { return math_approx::atanh<3> (x); },
116 |                      3.5e-4f);
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/test/src/inverse_trig_approx_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test_helpers.hpp"
  2 | #include <catch2/catch_test_macros.hpp>
  3 | #include <iostream>
  4 | 
  5 | #include <math_approx/math_approx.hpp>
  6 | 
  7 | TEST_CASE ("Asin Approx Test")
  8 | {
  9 | #if ! defined(WIN32)
 10 |     const auto all_floats = test_helpers::all_32_bit_floats (-1.0f, 1.0f, 1.0e-2f);
 11 | #else
 12 |     const auto all_floats = test_helpers::all_32_bit_floats (-1.0f, 1.0f, 1.0e-1f);
 13 | #endif
 14 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 15 |                                                     { return std::asin (x); });
 16 | 
 17 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound)
 18 |     {
 19 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 20 | 
 21 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
 22 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
 23 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
 24 | 
 25 |         const auto max_error = test_helpers::abs_max<float> (error);
 26 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
 27 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
 28 | 
 29 |         std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl;
 30 |         REQUIRE (std::abs (max_error) < err_bound);
 31 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
 32 |         if (ulp_bound > 0)
 33 |             REQUIRE (max_ulp_error < ulp_bound);
 34 |     };
 35 | 
 36 |     SECTION ("4th-Order")
 37 |     {
 38 |         test_approx ([] (auto x)
 39 |                      { return math_approx::asin<4> (x); },
 40 |                      2.5e-7f,
 41 |                      4.0e-7f,
 42 |                      4);
 43 |     }
 44 |     SECTION ("3rd-Order")
 45 |     {
 46 |         test_approx ([] (auto x)
 47 |                      { return math_approx::asin<3> (x); },
 48 |                      3.0e-7f,
 49 |                      5.0e-7f,
 50 |                      5);
 51 |     }
 52 |     SECTION ("2nd-Order")
 53 |     {
 54 |         test_approx ([] (auto x)
 55 |                      { return math_approx::asin<2> (x); },
 56 |                      2.0e-6f,
 57 |                      4.0e-6f,
 58 |                      50);
 59 |     }
 60 |     SECTION ("1st-Order")
 61 |     {
 62 |         test_approx ([] (auto x)
 63 |                      { return math_approx::asin<1> (x); },
 64 |                      4.0e-5f,
 65 |                      6.5e-5f,
 66 |                      0);
 67 |     }
 68 | }
 69 | 
 70 | TEST_CASE ("Acos Approx Test")
 71 | {
 72 | #if ! defined(WIN32)
 73 |     const auto all_floats = test_helpers::all_32_bit_floats (-1.0f, 1.0f, 1.0e-2f);
 74 | #else
 75 |     const auto all_floats = test_helpers::all_32_bit_floats (-1.0f, 1.0f, 1.0e-1f);
 76 | #endif
 77 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 78 |                                                     { return std::acos (x); });
 79 | 
 80 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound)
 81 |     {
 82 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 83 | 
 84 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
 85 | 
 86 |         const auto max_error = test_helpers::abs_max<float> (error);
 87 | 
 88 |         std::cout << max_error << std::endl;
 89 |         REQUIRE (std::abs (max_error) < err_bound);
 90 |     };
 91 | 
 92 |     SECTION ("5th-Order")
 93 |     {
 94 |         test_approx ([] (auto x)
 95 |                      { return math_approx::acos<5> (x); },
 96 |                      5.0e-7f);
 97 |     }
 98 |     SECTION ("4th-Order")
 99 |     {
100 |         test_approx ([] (auto x)
101 |                      { return math_approx::acos<4> (x); },
102 |                      1.0e-6f);
103 |     }
104 |     SECTION ("3rd-Order")
105 |     {
106 |         test_approx ([] (auto x)
107 |                      { return math_approx::acos<3> (x); },
108 |                      1.5e-5f);
109 |     }
110 |     SECTION ("2nd-Order")
111 |     {
112 |         test_approx ([] (auto x)
113 |                      { return math_approx::acos<2> (x); },
114 |                      2.5e-4f);
115 |     }
116 |     SECTION ("1st-Order")
117 |     {
118 |         test_approx ([] (auto x)
119 |                      { return math_approx::acos<1> (x); },
120 |                      5.0e-3f);
121 |     }
122 | }
123 | 
124 | TEST_CASE ("Atan Approx Test")
125 | {
126 | #if ! defined(WIN32)
127 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-2f);
128 | #else
129 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
130 | #endif
131 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
132 |                                                     { return std::atan (x); });
133 | 
134 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound)
135 |     {
136 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
137 | 
138 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
139 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
140 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
141 | 
142 |         const auto max_error = test_helpers::abs_max<float> (error);
143 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
144 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
145 | 
146 |         std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl;
147 |         REQUIRE (std::abs (max_error) < err_bound);
148 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
149 |         if (ulp_bound > 0)
150 |             REQUIRE (max_ulp_error < ulp_bound);
151 |     };
152 | 
153 |     SECTION ("7th-Order")
154 |     {
155 |         test_approx ([] (auto x)
156 |                      { return math_approx::atan<7> (x); },
157 |                      4.0e-7f,
158 |                      3.0e-6f,
159 |                      45);
160 |     }
161 |     SECTION ("5th-Order")
162 |     {
163 |         test_approx ([] (auto x)
164 |                      { return math_approx::atan<5> (x); },
165 |                      2.0e-5f,
166 |                      1.5e-4f,
167 |                      0);
168 |     }
169 |     SECTION ("4th-Order")
170 |     {
171 |         test_approx ([] (auto x)
172 |                      { return math_approx::atan<4> (x); },
173 |                      1.5e-4f,
174 |                      8.5e-4f,
175 |                      0);
176 |     }
177 | }
178 | 


--------------------------------------------------------------------------------
/test/src/log_approx_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test_helpers.hpp"
  2 | #include "catch2/catch_template_test_macros.hpp"
  3 | 
  4 | #include <catch2/catch_test_macros.hpp>
  5 | #include <iostream>
  6 | 
  7 | #include <math_approx/math_approx.hpp>
  8 | 
  9 | template <typename T = float>
 10 | void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float err_bound)
 11 | {
 12 |     const auto y_approx = test_helpers::compute_all<T> (all_floats, f_approx);
 13 |     const auto error = test_helpers::compute_error<T> (y_exact, y_approx);
 14 |     const auto max_error = test_helpers::abs_max<T> (error);
 15 | 
 16 |     std::cout << max_error << std::endl;
 17 |     REQUIRE (std::abs (max_error) < err_bound);
 18 | }
 19 | 
 20 | 
 21 | TEMPLATE_TEST_CASE ("Log Approx Test", "", float, double)
 22 | {
 23 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (0.01f, 10.0f, 1.0e-3f);
 24 |     const auto y_exact = test_helpers::compute_all<TestType> (all_floats, [] (auto x)
 25 |                                                     { return std::log (x); });
 26 | 
 27 |     SECTION ("6th-Order")
 28 |     {
 29 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 30 |                      { return math_approx::log<6> (x); },
 31 |                      4.5e-6f);
 32 |     }
 33 |     SECTION ("6th-Order (C1-cont)")
 34 |     {
 35 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 36 |                      { return math_approx::log<6, true> (x); },
 37 |                      6.5e-6f);
 38 |     }
 39 |     SECTION ("5th-Order")
 40 |     {
 41 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 42 |                      { return math_approx::log<5> (x); },
 43 |                      1.5e-5f);
 44 |     }
 45 |     SECTION ("5th-Order (C1-cont)")
 46 |     {
 47 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 48 |                      { return math_approx::log<5, true> (x); },
 49 |                      3.5e-5f);
 50 |     }
 51 |     SECTION ("4th-Order")
 52 |     {
 53 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 54 |                      { return math_approx::log<4> (x); },
 55 |                      8.5e-5f);
 56 |     }
 57 |     SECTION ("4th-Order (C1-cont)")
 58 |     {
 59 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 60 |                      { return math_approx::log<4, true> (x); },
 61 |                      3.0e-4f);
 62 |     }
 63 |     SECTION ("3th-Order")
 64 |     {
 65 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 66 |                      { return math_approx::log<3> (x); },
 67 |                      6.5e-4f);
 68 |     }
 69 |     SECTION ("3th-Order (C1-cont)")
 70 |     {
 71 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 72 |                      { return math_approx::log<3, true> (x); },
 73 |                      4.0e-3f);
 74 |     }
 75 | }
 76 | 
 77 | TEMPLATE_TEST_CASE ("Log2 Approx Test", "", float, double)
 78 | {
 79 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (0.01f, 10.0f, 1.0e-3f);
 80 |     const auto y_exact = test_helpers::compute_all<TestType> (all_floats, [] (auto x)
 81 |                                                     { return std::log2 (x); });
 82 | 
 83 |     SECTION ("6th-Order")
 84 |     {
 85 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 86 |                      { return math_approx::log2<6> (x); },
 87 |                      6.0e-6f);
 88 |     }
 89 |     SECTION ("6th-Order (C1-cont)")
 90 |     {
 91 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 92 |                      { return math_approx::log2<6, true> (x); },
 93 |                      8.5e-6f);
 94 |     }
 95 |     SECTION ("5th-Order")
 96 |     {
 97 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 98 |                      { return math_approx::log2<5> (x); },
 99 |                      2.0e-5f);
100 |     }
101 |     SECTION ("5th-Order (C1-cont)")
102 |     {
103 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
104 |                      { return math_approx::log2<5, true> (x); },
105 |                      5.0e-5f);
106 |     }
107 |     SECTION ("4th-Order")
108 |     {
109 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
110 |                      { return math_approx::log2<4> (x); },
111 |                      1.5e-4f);
112 |     }
113 |     SECTION ("4th-Order (C1-cont)")
114 |     {
115 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
116 |                      { return math_approx::log2<4, true> (x); },
117 |                      4.5e-4f);
118 |     }
119 |     SECTION ("3th-Order")
120 |     {
121 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
122 |                      { return math_approx::log2<3> (x); },
123 |                      9.0e-4f);
124 |     }
125 |     SECTION ("3th-Order (C1-cont)")
126 |     {
127 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
128 |                      { return math_approx::log2<3, true> (x); },
129 |                      5.5e-3f);
130 |     }
131 | }
132 | 
133 | TEMPLATE_TEST_CASE ("Log10 Approx Test", "", float, double)
134 | {
135 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (0.01f, 10.0f, 1.0e-3f);
136 |     const auto y_exact = test_helpers::compute_all<TestType> (all_floats, [] (auto x)
137 |                                                     { return std::log10 (x); });
138 | 
139 |     SECTION ("6th-Order")
140 |     {
141 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
142 |                      { return math_approx::log10<6> (x); },
143 |                      2.0e-6f);
144 |     }
145 |     SECTION ("6th-Order (C1-cont)")
146 |     {
147 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
148 |                      { return math_approx::log10<6, true> (x); },
149 |                      3.0e-6f);
150 |     }
151 |     SECTION ("5th-Order")
152 |     {
153 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
154 |                      { return math_approx::log10<5> (x); },
155 |                      6.0e-6f);
156 |     }
157 |     SECTION ("5th-Order (C1-cont)")
158 |     {
159 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
160 |                      { return math_approx::log10<5, true> (x); },
161 |                      1.5e-5f);
162 |     }
163 |     SECTION ("4th-Order")
164 |     {
165 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
166 |                      { return math_approx::log10<4> (x); },
167 |                      4.0e-5f);
168 |     }
169 |     SECTION ("4th-Order (C1-cont)")
170 |     {
171 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
172 |                      { return math_approx::log10<4, true> (x); },
173 |                      1.5e-4f);
174 |     }
175 |     SECTION ("3th-Order")
176 |     {
177 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
178 |                      { return math_approx::log10<3> (x); },
179 |                      3.0e-4f);
180 |     }
181 |     SECTION ("3th-Order (C1-cont)")
182 |     {
183 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
184 |                      { return math_approx::log10<3, true> (x); },
185 |                      2.0e-3f);
186 |     }
187 | }
188 | 


--------------------------------------------------------------------------------
/test/src/polylog_approx_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "test_helpers.hpp"
 2 | #include <catch2/catch_test_macros.hpp>
 3 | #include <iostream>
 4 | 
 5 | #include <math_approx/math_approx.hpp>
 6 | 
 7 | #include "reference/polylogarithm.hpp"
 8 | 
 9 | TEST_CASE ("Li2 Approx Test")
10 | {
11 | #if ! defined(WIN32)
12 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-2f);
13 | #else
14 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
15 | #endif
16 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
17 |                                                            { return polylogarithm::Li2 (x); });
18 | 
19 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound)
20 |     {
21 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
22 | 
23 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
24 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
25 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
26 | 
27 |         const auto max_error = test_helpers::abs_max<float> (error);
28 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
29 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
30 | 
31 |         std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl;
32 |         REQUIRE (std::abs (max_error) < err_bound);
33 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
34 |         if (ulp_bound > 0)
35 |             REQUIRE (max_ulp_error < ulp_bound);
36 |     };
37 | 
38 |     SECTION ("3rd-Order_Log-6")
39 |     {
40 |         test_approx ([] (auto x)
41 |                      { return math_approx::li2<3, 6> (x); },
42 |                      2.5e-5f,
43 |                      1.5e-5f,
44 |                      200);
45 |     }
46 |     SECTION ("3rd-Order")
47 |     {
48 |         test_approx ([] (auto x)
49 |                      { return math_approx::li2<3> (x); },
50 |                      8.0e-5f,
51 |                      1.5e-4f,
52 |                      0);
53 |     }
54 |     SECTION ("2nd-Order")
55 |     {
56 |         test_approx ([] (auto x)
57 |                      { return math_approx::li2<2> (x); },
58 |                      3.0e-4f,
59 |                      3.0e-4f,
60 |                      0);
61 |     }
62 |     SECTION ("1st-Order")
63 |     {
64 |         test_approx ([] (auto x)
65 |                      { return math_approx::li2<1> (x); },
66 |                      2.5e-3f,
67 |                      4.0e-3f,
68 |                      0);
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/test/src/pow_approx_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "catch2/catch_template_test_macros.hpp"
  2 | #include "test_helpers.hpp"
  3 | 
  4 | #include <catch2/catch_test_macros.hpp>
  5 | #include <iostream>
  6 | 
  7 | #include <math_approx/math_approx.hpp>
  8 | 
  9 | template <typename T = float>
 10 | void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float rel_err_bound, uint32_t ulp_bound)
 11 | {
 12 |     const auto y_approx = test_helpers::compute_all<T> (all_floats, f_approx);
 13 | 
 14 |     const auto error = test_helpers::compute_error<T> (y_exact, y_approx);
 15 |     const auto rel_error = test_helpers::compute_rel_error<T> (y_exact, y_approx);
 16 |     const auto ulp_error = [&]
 17 |     {
 18 |         if constexpr (std::is_same_v<T, float>)
 19 |             return test_helpers::compute_ulp_error (y_exact, y_approx);
 20 |         else
 21 |             return std::vector<uint32_t> {};
 22 |     }();
 23 | 
 24 |     const auto max_rel_error = test_helpers::abs_max<T> (rel_error);
 25 |     const auto max_ulp_error = std::is_same_v<T, float> ? *std::max_element (ulp_error.begin(), ulp_error.end()) : 0;
 26 | 
 27 |     std::cout << max_rel_error << ", " << max_ulp_error << std::endl;
 28 |     REQUIRE (std::abs (max_rel_error) < rel_err_bound);
 29 |     if (ulp_bound > 0)
 30 |         REQUIRE (max_ulp_error < ulp_bound);
 31 | }
 32 | 
 33 | TEMPLATE_TEST_CASE ("Exp Approx Test", "", float, double)
 34 | {
 35 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (-10.0f, 10.0f, 2.5e-1f);
 36 |     const auto y_exact = test_helpers::compute_all<TestType> (all_floats,
 37 |                                                               [] (auto x)
 38 |                                                               {
 39 |                                                                   return std::exp (x);
 40 |                                                               });
 41 | 
 42 |     SECTION ("6th-Order")
 43 |     {
 44 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 45 |                                { return math_approx::exp<6> (x); },
 46 |                                6.0e-7f,
 47 |                                10);
 48 |     }
 49 |     SECTION ("6th-Order (C1-cont)")
 50 |     {
 51 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 52 |                                { return math_approx::exp<6, true> (x); },
 53 |                                6.0e-7f,
 54 |                                10);
 55 |     }
 56 |     SECTION ("5th-Order")
 57 |     {
 58 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 59 |                                { return math_approx::exp<5> (x); },
 60 |                                7.5e-7f,
 61 |                                15);
 62 |     }
 63 |     SECTION ("5th-Order (C1-cont)")
 64 |     {
 65 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 66 |                                { return math_approx::exp<5, true> (x); },
 67 |                                9.0e-7f,
 68 |                                15);
 69 |     }
 70 |     SECTION ("4th-Order")
 71 |     {
 72 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 73 |                                { return math_approx::exp<4> (x); },
 74 |                                4.0e-6f,
 75 |                                80);
 76 |     }
 77 |     SECTION ("4th-Order (C1-cont)")
 78 |     {
 79 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 80 |                                { return math_approx::exp<4, true> (x); },
 81 |                                1.5e-5f,
 82 |                                180);
 83 |     }
 84 |     SECTION ("3th-Order")
 85 |     {
 86 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 87 |                                { return math_approx::exp<3> (x); },
 88 |                                1.5e-4f,
 89 |                                0);
 90 |     }
 91 |     SECTION ("3th-Order (C1-cont)")
 92 |     {
 93 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
 94 |                                { return math_approx::exp<3, true> (x); },
 95 |                                6.5e-4f,
 96 |                                0);
 97 |     }
 98 | }
 99 | 
100 | TEMPLATE_TEST_CASE ("Exp2 Approx Test", "", float, double)
101 | {
102 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (-10.0f, 10.0f, 2.5e-1f);
103 |     const auto y_exact = test_helpers::compute_all<TestType> (all_floats,
104 |                                                               [] (auto x)
105 |                                                               {
106 |                                                                   return std::exp2 (x);
107 |                                                               });
108 | 
109 |     SECTION ("6th-Order")
110 |     {
111 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
112 |                                { return math_approx::exp2<6> (x); },
113 |                                3.0e-7f,
114 |                                4);
115 |     }
116 |     SECTION ("6th-Order (C1-cont)")
117 |     {
118 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
119 |                                { return math_approx::exp2<6, true> (x); },
120 |                                3.0e-7f,
121 |                                4);
122 |     }
123 |     SECTION ("5th-Order")
124 |     {
125 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
126 |                                { return math_approx::exp2<5> (x); },
127 |                                4.0e-7f,
128 |                                5);
129 |     }
130 |     SECTION ("5th-Order (C1-cont)")
131 |     {
132 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
133 |                                { return math_approx::exp2<5, true> (x); },
134 |                                5.0e-7f,
135 |                                8);
136 |     }
137 |     SECTION ("4th-Order")
138 |     {
139 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
140 |                                { return math_approx::exp2<4> (x); },
141 |                                4.0e-6f,
142 |                                70);
143 |     }
144 |     SECTION ("4th-Order (C1-cont)")
145 |     {
146 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
147 |                                { return math_approx::exp2<4, true> (x); },
148 |                                1.5e-5f,
149 |                                175);
150 |     }
151 |     SECTION ("3th-Order")
152 |     {
153 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
154 |                                { return math_approx::exp2<3> (x); },
155 |                                1.5e-4f,
156 |                                0);
157 |     }
158 |     SECTION ("3th-Order (C1-cont)")
159 |     {
160 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
161 |                                { return math_approx::exp2<3, true> (x); },
162 |                                6.5e-4f,
163 |                                0);
164 |     }
165 | }
166 | 
167 | TEMPLATE_TEST_CASE ("Exp10 Approx Test", "", float, double)
168 | {
169 |     const auto all_floats = test_helpers::all_32_bit_floats<TestType> (-10.0f, 10.0f, 2.5e-1f);
170 |     const auto y_exact = test_helpers::compute_all<TestType> (all_floats,
171 |                                                               [] (auto x)
172 |                                                               {
173 |                                                                   return std::pow (10.0f, x);
174 |                                                               });
175 | 
176 |     SECTION ("6th-Order")
177 |     {
178 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
179 |                                { return math_approx::exp10<6> (x); },
180 |                                2.0e-6f,
181 |                                32);
182 |     }
183 |     SECTION ("6th-Order (C1-cont)")
184 |     {
185 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
186 |                                { return math_approx::exp10<6, true> (x); },
187 |                                6.0e-6f,
188 |                                32);
189 |     }
190 |     SECTION ("5th-Order")
191 |     {
192 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
193 |                                { return math_approx::exp10<5> (x); },
194 |                                2.5e-6f,
195 |                                35);
196 |     }
197 |     SECTION ("5th-Order (C1-cont)")
198 |     {
199 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
200 |                                { return math_approx::exp10<5, true> (x); },
201 |                                2.5e-6f,
202 |                                35);
203 |     }
204 |     SECTION ("4th-Order")
205 |     {
206 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
207 |                                { return math_approx::exp10<4> (x); },
208 |                                5.5e-6f,
209 |                                90);
210 |     }
211 |     SECTION ("4th-Order (C1-cont)")
212 |     {
213 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
214 |                                { return math_approx::exp10<4, true> (x); },
215 |                                1.5e-5f,
216 |                                200);
217 |     }
218 |     SECTION ("3th-Order")
219 |     {
220 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
221 |                                { return math_approx::exp10<3> (x); },
222 |                                1.5e-4f,
223 |                                0);
224 |     }
225 |     SECTION ("3th-Order (C1-cont)")
226 |     {
227 |         test_approx<TestType> (all_floats, y_exact, [] (auto x)
228 |                                { return math_approx::exp10<3, true> (x); },
229 |                                6.5e-4f,
230 |                                0);
231 |     }
232 | }
233 | 


--------------------------------------------------------------------------------
/test/src/reference/polylogarithm.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cmath>
 4 | 
 5 | /**
 6 |  * Implementations of polylogarithm functions.
 7 |  *
 8 |  * Based on the implementations found at: https://github.com/Expander/polylogarithm
 9 |  */
10 | namespace polylogarithm
11 | {
12 |     /** real polylogarithm with n=2 (dilogarithm). */
13 |     template <typename T>
14 |     inline T Li2 (T x) noexcept
15 |     {
16 |         constexpr auto PI_ = static_cast<T> (M_PI);
17 |         constexpr T P[] = {
18 |             (T) 0.9999999999999999502e+0,
19 |             (T) -2.6883926818565423430e+0,
20 |             (T) 2.6477222699473109692e+0,
21 |             (T) -1.1538559607887416355e+0,
22 |             (T) 2.0886077795020607837e-1,
23 |             (T) -1.0859777134152463084e-2
24 |         };
25 |         constexpr T Q[] = {
26 |             (T) 1.0000000000000000000e+0,
27 |             (T) -2.9383926818565635485e+0,
28 |             (T) 3.2712093293018635389e+0,
29 |             (T) -1.7076702173954289421e+0,
30 |             (T) 4.1596017228400603836e-1,
31 |             (T) -3.9801343754084482956e-2,
32 |             (T) 8.2743668974466659035e-4
33 |         };
34 | 
35 |         T y = 0, r = 0, s = 1;
36 | 
37 |         // transform to [0, 1/2]
38 |         if (x < (T) -1)
39 |         {
40 |             const auto l = std::log ((T) 1 - x);
41 |             y = (T) 1 / ((T) 1 - x);
42 |             r = -PI_ * PI_ / (T) 6 + l * ((T) 0.5 * l - std::log (-x));
43 |             s = (T) 1;
44 |         }
45 |         else if (x == (T) -1)
46 |         {
47 |             return -PI_ * PI_ / (T) 12;
48 |         }
49 |         else if (x < (T) 0)
50 |         {
51 |             const auto l = std::log1p (-x);
52 |             y = x / (x - (T) 1);
53 |             r = (T) -0.5 * l * l;
54 |             s = (T) -1;
55 |         }
56 |         else if (x == (T) 0)
57 |         {
58 |             return (T) 0;
59 |         }
60 |         else if (x < (T) 0.5)
61 |         {
62 |             y = x;
63 |             r = (T) 0;
64 |             s = (T) 1;
65 |         }
66 |         else if (x < (T) 1)
67 |         {
68 |             y = (T) 1 - x;
69 |             r = PI_ * PI_ / (T) 6 - std::log (x) * std::log (y);
70 |             s = (T) -1;
71 |         }
72 |         else if (x == (T) 1)
73 |         {
74 |             return PI_ * PI_ / (T) 6;
75 |         }
76 |         else if (x < (T) 2)
77 |         {
78 |             const auto l = std::log (x);
79 |             y = (T) 1 - (T) 1 / x;
80 |             r = PI_ * PI_ / (T) 6 - l * (std::log (y) + (T) 0.5 * l);
81 |             s = (T) 1;
82 |         }
83 |         else
84 |         {
85 |             const auto l = std::log (x);
86 |             y = (T) 1 / x;
87 |             r = PI_ * PI_ / (T) 3 - (T) 0.5 * l * l;
88 |             s = (T) -1;
89 |         }
90 | 
91 |         const auto y2 = y * y;
92 |         const auto y4 = y2 * y2;
93 |         const auto p = P[0] + y * P[1] + y2 * (P[2] + y * P[3]) + y4 * (P[4] + y * P[5]);
94 |         const auto q = Q[0] + y * Q[1] + y2 * (Q[2] + y * Q[3]) + y4 * (Q[4] + y * Q[5] + y2 * Q[6]);
95 | 
96 |         return r + s * y * p / q;
97 |     }
98 | } // namespace polylogarithm
99 | 


--------------------------------------------------------------------------------
/test/src/reference/sincospi.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cmath>
 4 | 
 5 | namespace sincospi
 6 | {
 7 | /* Writes result sine result sin(πa) to the location pointed to by sp
 8 |    Writes result cosine result cos(πa) to the location pointed to by cp
 9 | 
10 |    In exhaustive testing, the maximum error in sine results was 0.96677 ulp,
11 |    the maximum error in cosine results was 0.96563 ulp, meaning results are
12 |    faithfully rounded.
13 | 
14 |    Copied from: https://stackoverflow.com/questions/42792939/implementation-of-sinpi-and-cospi-using-standard-c-math-library
15 | */
16 | void sincospif (float a, float *sp, float *cp)
17 | {
18 |     float az, t, c, r, s;
19 |     int32_t i;
20 | 
21 |     az = a * 0.0f; // must be evaluated with IEEE-754 semantics
22 |     /* for |a| > 2**24, cospi(a) = 1.0f, but cospi(Inf) = NaN */
23 |     a = (fabsf (a) < 0x1.0p24f) ? a : az;
24 |     r = nearbyintf (a + a); // must use IEEE-754 "to nearest" rounding
25 |     i = (int32_t)r;
26 |     t = fmaf (-0.5f, r, a);
27 |     /* compute core approximations */
28 |     s = t * t;
29 |     /* Approximate cos(pi*x) for x in [-0.25,0.25] */
30 |     r =              0x1.d9e000p-3f;
31 |     r = fmaf (r, s, -0x1.55c400p+0f);
32 |     r = fmaf (r, s,  0x1.03c1cep+2f);
33 |     r = fmaf (r, s, -0x1.3bd3ccp+2f);
34 |     c = fmaf (r, s,  0x1.000000p+0f);
35 |     /* Approximate sin(pi*x) for x in [-0.25,0.25] */
36 |     r =             -0x1.310000p-1f;
37 |     r = fmaf (r, s,  0x1.46737ep+1f);
38 |     r = fmaf (r, s, -0x1.4abbfep+2f);
39 |     r = (t * s) * r;
40 |     s = fmaf (t, 0x1.921fb6p+1f, r);
41 |     if (i & 2) {
42 |         s = 0.0f - s; // must be evaluated with IEEE-754 semantics
43 |         c = 0.0f - c; // must be evaluated with IEEE-754 semantics
44 |     }
45 |     if (i & 1) {
46 |         t = 0.0f - s; // must be evaluated with IEEE-754 semantics
47 |         s = c;
48 |         c = t;
49 |     }
50 |     /* IEEE-754: sinPi(+n) is +0 and sinPi(-n) is -0 for positive integers n */
51 |     if (a == floorf (a)) s = az;
52 |     *sp = s;
53 |     *cp = c;
54 | }
55 | 
56 | float sin2pi (float x)
57 | {
58 |     float s, c;
59 |     sincospif (2.0f * x, &s, &c);
60 |     return s;
61 | }
62 | 
63 | float cos2pi (float x)
64 | {
65 |     float s, c;
66 |     sincospif (2.0f * x, &s, &c);
67 |     return c;
68 | }
69 | }
70 | 


--------------------------------------------------------------------------------
/test/src/reference/toms917.hpp:
--------------------------------------------------------------------------------
  1 | # include <cfenv>
  2 | # include <cfloat>
  3 | # include <cmath>
  4 | # include <complex>
  5 | # include <cstdlib>
  6 | # include <ctime>
  7 | # include <iomanip>
  8 | # include <iostream>
  9 | 
 10 | namespace toms917
 11 | {
 12 | using namespace std;
 13 | //
 14 | //  DBL_EPSILON, provided by <cfloat>, is twice the machine epsilon for
 15 | //  double precision arithmetic.
 16 | //
 17 | # define TWOITERTOL DBL_EPSILON
 18 | 
 19 | //****************************************************************************80
 20 | 
 21 | inline int wrightomega_ext ( complex <double> z, complex <double> &w,
 22 |   complex <double> &e, complex <double> &r, complex <double> &cond )
 23 | 
 24 | //****************************************************************************80
 25 | //
 26 | //  Purpose:
 27 | //
 28 | //    WRIGHTOMEGA_EXT computes the Wright Omega function with extra information.
 29 | //
 30 | //  Discussion:
 31 | //
 32 | //    WRIGHTOMEGA_EXT is the extended routine for evaluating the Wright
 33 | //    Omega function with the option of extracting the last update step,
 34 | //    the penultimate residual and the condition number estimate.
 35 | //
 36 | //  Modified:
 37 | //
 38 | //    14 May 2016
 39 | //
 40 | //  Author:
 41 | //
 42 | //    Piers Lawrence, Robert Corless, David Jeffrey
 43 | //
 44 | //  Reference:
 45 | //
 46 | //    Piers Lawrence, Robert Corless, David Jeffrey,
 47 | //    Algorithm 917: Complex Double-Precision Evaluation of the Wright Omega
 48 | //    Function,
 49 | //    ACM Transactions on Mathematical Software,
 50 | //    Volume 38, Number 3, Article 20, April 2012, 17 pages.
 51 | //
 52 | //  Parameters:
 53 | //
 54 | //    Input, complex <double> Z, value at which to evaluate Wrightomega().
 55 | //
 56 | //    Output, complex <double> &W, the value of Wrightomega(z).
 57 | //
 58 | //    Output, complex <double> &E, the last update step in the iterative scheme.
 59 | //
 60 | //    Output, complex <double> &R, the penultimate residual,
 61 | //    r_k = z - w_k - log(w_k)
 62 | //
 63 | //    Output, complex <double> &COND, the condition number estimate.
 64 | //
 65 | //    Output, int WRIGHTOMEGA_EXT, error flag;
 66 | //    0, successful computation.
 67 | //    nonzero, the computation failed.
 68 | //
 69 | {
 70 |   double near;
 71 |   double pi = M_PI;
 72 |   complex <double> pz;
 73 |   double s = 1.0;
 74 |   complex <double> t;
 75 |   complex <double> wp1;
 76 |   double x;
 77 |   double y;
 78 |   double ympi;
 79 |   double yppi;
 80 | //
 81 | //  Extract real and imaginary parts of Z.
 82 | //
 83 |   x = real ( z );
 84 |   y = imag ( z );
 85 | //
 86 | //  Compute if we are near the branch cuts.
 87 | //
 88 |   ympi = y - pi;
 89 |   yppi = y + pi;
 90 |   near = 0.01;
 91 | //
 92 | //  Test for floating point exceptions:
 93 | //
 94 | 
 95 | //
 96 | //  NaN output for NaN input.
 97 | //
 98 |   if ( isnan ( x ) || isnan ( y ) )
 99 |   {
100 |     // w = complex <double> ( ( 0.0 / 0.0 ), ( 0.0 / 0.0 ) );
101 |     // e = complex <double> ( 0.0, 0.0 );
102 |     // r = complex <double> ( 0.0, 0.0 );
103 |     return 0;
104 |   }
105 | //
106 | //  Signed zeros between branches.
107 | //
108 |   else if ( isinf ( x ) && ( x < 0.0 ) && ( - pi < y ) && ( y <= pi ) )
109 |   {
110 |     if ( fabs ( y ) <= pi / 2.0 )
111 |     {
112 |       w = + 0.0;
113 |     }
114 |     else
115 |     {
116 |       w = - 0.0;
117 |     }
118 | 
119 |     if ( 0.0 <= y )
120 |     {
121 |       w = w + complex <double> ( 0.0, 0.0 );
122 |     }
123 |     else
124 |     {
125 |       w = w + complex <double> ( 0.0, - 1.0 * 0.0 );
126 |     }
127 | 
128 |     e = complex <double> ( 0.0, 0.0 );
129 |     r = complex <double> ( 0.0, 0.0 );
130 |     return 0;
131 |   }
132 | //
133 | //  Asymptotic for large z.
134 | //
135 |   else if ( isinf ( x ) || isinf ( y ) )
136 |   {
137 |     w = complex <double> ( x, y );
138 |     e = complex <double> ( 0.0, 0.0 );
139 |     r = complex <double> ( 0.0, 0.0 );
140 |     return 0;
141 |   }
142 | //
143 | //  Test if exactly on the singular points.
144 | //
145 |   if ( ( x == - 1.0 ) && ( fabs ( y ) == pi ) )
146 |   {
147 |     w = complex <double> ( - 1.0, 0.0 );
148 |     e = complex <double> ( 0.0, 0.0 );
149 |     r = complex <double> ( 0.0, 0.0 );
150 |     return 0;
151 |   }
152 | //
153 | //  Choose approximation based on region.
154 | //
155 | 
156 | //
157 | //  Region 1: upper branch point.
158 | //  Series about z=-1+Pi*I.
159 | //
160 |   if ( ( - 2.0 < x && x <= 1.0 && 1.0 < y && y < 2.0 * pi ) )
161 |   {
162 |     pz = conj ( sqrt ( conj ( 2.0 * ( z + complex <double> ( 1.0, - pi ) ) ) ) );
163 | 
164 |     w = - 1.0
165 |       + ( complex <double> ( 0.0, 1.0 )
166 |       + ( 1.0 / 3.0
167 |       + ( - 1.0 / 36.0 * complex <double> ( 0.0, 1.0 )
168 |       + ( 1.0 / 270.0 + 1.0 / 4320.0 * complex <double> ( 0.0, 1.0 ) * pz )
169 |       * pz ) * pz ) * pz ) * pz;
170 |   }
171 | //
172 | //  Region 2: lower branch point.
173 | //  Series about z=-1-Pi*I.
174 | //
175 |   else if ( ( - 2.0 < x && x <= 1.0 && - 2.0 * pi < y && y <- 1.0 ) )
176 |   {
177 |     pz = conj ( sqrt ( conj ( 2.0 * ( z + 1.0 + complex <double> ( 0.0, pi ) ) ) ) );
178 | 
179 |     w = - 1.0
180 |       + ( - complex <double> ( 0.0, 1.0 ) + ( 1.0 / 3.0
181 |       + ( 1.0 / 36.0 * complex <double> ( 0.0, 1.0 )
182 |       + ( 1.0 / 270.0 - 1.0 / 4320.0 * complex <double> ( 0.0, 1.0 ) * pz )
183 |       * pz ) * pz ) * pz ) * pz;
184 |   }
185 | //
186 | //  Region 3: between branch cuts.
187 | //  Series: About -infinity.
188 | //
189 |   else if ( x <= - 2.0 && - pi < y && y <= pi )
190 |   {
191 |     pz = exp ( z );
192 |     w = ( 1.0
193 |       + ( - 1.0
194 |       + ( 3.0 / 2.0
195 |       + ( - 8.0 / 3.0
196 |       + 125.0 / 24.0 * pz ) * pz ) * pz ) * pz ) * pz;
197 |   }
198 | //
199 | //  Region 4: Mushroom.
200 | //  Series about z=1.
201 | //
202 |   else if ( ( ( - 2.0 < x ) && ( x <= 1.0 ) && ( - 1.0 <= y ) && ( y <= 1.0 ) )
203 |     || ( ( - 2.0 < x ) && ( x - 1.0 ) * ( x - 1.0 ) + y * y <= pi * pi ) )
204 |   {
205 |     pz = z - 1.0;
206 |     w = 1.0 / 2.0 + 1.0 / 2.0 * z
207 |       + ( 1.0 / 16.0
208 |       + ( - 1.0 / 192.0
209 |       + ( - 1.0 / 3072.0 + 13.0 / 61440.0 * pz ) * pz ) * pz ) * pz * pz;
210 |   }
211 | //
212 | //  Region 5: Top wing.
213 | //  Negative log series.
214 | //
215 |   else if ( x <= - 1.05 && pi < y && y - pi <= - 0.75 * ( x + 1.0 ) )
216 |   {
217 |     t = z - complex <double> ( 0.0, pi );
218 |     pz = log ( - t );
219 |     w = ( ( 1.0 + ( - 3.0 / 2.0 + 1.0 / 3.0 * pz ) * pz ) * pz
220 |       + ( ( -1.0 + 1.0 / 2.0 * pz ) * pz + ( pz + ( - pz + t ) * t ) * t ) * t )
221 |       / ( t * t * t );
222 |   }
223 | //
224 | //  Region 6: Bottom wing.
225 | //  Negative log series.
226 | //
227 |   else if ( x <= - 1.05 && 0.75 * ( x + 1.0 ) < y + pi && y + pi <= 0.0 )
228 |   {
229 |     t = z + complex <double> ( 0.0, pi );
230 |     pz = log ( - t );
231 |     w = ( ( 1.0 + ( - 3.0 / 2.0 + 1.0 / 3.0 * pz ) * pz ) * pz
232 |       + ( ( - 1.0 + 1.0 / 2.0 * pz ) * pz + ( pz + ( - pz + t ) * t ) * t ) * t )
233 |       / ( t * t * t );
234 |   }
235 | //
236 | //  Region 7: Everywhere else.
237 | //  Series solution about infinity.
238 | //
239 |   else
240 |   {
241 |     pz = log ( z );
242 |     w = ( ( 1.0 + ( - 3.0 / 2.0 + 1.0 / 3.0 * pz ) * pz ) * pz
243 |       + ( ( - 1.0 + 1.0 / 2.0 * pz ) * pz + ( pz + ( - pz + z ) * z ) * z ) * z )
244 |       / ( z * z * z );
245 |   }
246 | //
247 | //  Regularize if near branch cuts.
248 | ///
249 |   if ( x <= - 1.0 + near && ( fabs ( ympi ) <= near || fabs ( yppi ) <= near ) )
250 |   {
251 |     s = - 1.0;
252 |     if ( fabs ( ympi ) <= near )
253 |     {
254 | //
255 | //  Recompute ympi with directed rounding.
256 | //
257 |       fesetround ( FE_UPWARD );
258 |       ympi = y - pi;
259 | 
260 |       if ( ympi <= 0.0 )
261 |       {
262 |         fesetround ( FE_DOWNWARD );
263 |         ympi = y - pi;
264 |       }
265 | 
266 |       z = complex <double> ( x, ympi );
267 | //
268 | //  Return rounding to default.
269 | //
270 |       fesetround ( FE_TONEAREST );
271 |     }
272 |     else
273 |     {
274 | //
275 | //  Recompute yppi with directed rounding.
276 | //
277 |       fesetround ( FE_UPWARD );
278 |       yppi = y + pi;
279 | 
280 |       if ( yppi <= 0.0 )
281 |       {
282 |         fesetround ( FE_DOWNWARD );
283 |         yppi = y + pi;
284 |       }
285 | 
286 |       z = complex <double> ( x, yppi );
287 | //
288 | //  Return rounding to default.
289 | //
290 |       fesetround ( FE_TONEAREST );
291 |     }
292 |   }
293 | //
294 | //  Iteration one.
295 | //
296 |   w = s * w;
297 |   r = z - s * w - log ( w );
298 |   wp1 = s * w + 1.0;
299 |   e = r / wp1 * ( 2.0 * wp1 * ( wp1 + 2.0 / 3.0 * r ) - r )
300 |     / ( 2.0 * wp1 * ( wp1 + 2.0 / 3.0 * r ) - 2.0 * r );
301 |   w = w * ( 1.0 + e );
302 | //
303 | //  Iteration two.
304 | //
305 |   if ( abs ( ( 2.0 * w * w - 8.0 * w - 1.0 ) * pow ( abs ( r ), 4.0 ) )
306 |     >= TWOITERTOL * 72.0 * pow ( abs ( wp1 ), 6.0 ) )
307 |   {
308 |     r = z - s * w - log ( w );
309 |     wp1 = s * w + 1.0;
310 |     e = r / wp1 * ( 2.0 * wp1 * ( wp1 + 2.0 / 3.0 * r ) - r )
311 |       / ( 2.0 * wp1 * ( wp1 + 2.0 / 3.0 * r ) - 2.0 * r );
312 |     w = w * ( 1.0 + e );
313 |   }
314 | //
315 | //  Undo regularization.
316 | //
317 |   w = s * w;
318 | //
319 | //  Provide condition number estimate.
320 | //
321 |   cond = z / ( 1.0 + w );
322 | 
323 |   return 0;
324 | }
325 | //****************************************************************************80
326 | 
327 | //****************************************************************************80
328 | 
329 | inline complex <double> wrightomega ( complex <double> z )
330 | 
331 | //****************************************************************************80
332 | //
333 | //  Purpose:
334 | //
335 | //    WRIGHTOMEGA is the simple routine for evaluating the Wright Omega function. 
336 | //
337 | //  Discussion:
338 | //
339 | //    This function is called by:
340 | //
341 | //      w = wrightomega ( z )
342 | //
343 | //    This function makes a call to the more powerful wrightomega_ext() function.
344 | //
345 | //  Modified:
346 | //
347 | //    14 May 2016
348 | //
349 | //  Author:
350 | //
351 | //    Piers Lawrence, Robert Corless, David Jeffrey
352 | //
353 | //  Reference:
354 | //
355 | //    Piers Lawrence, Robert Corless, David Jeffrey,
356 | //    Algorithm 917: Complex Double-Precision Evaluation of the Wright Omega 
357 | //    Function,
358 | //    ACM Transactions on Mathematical Software,
359 | //    Volume 38, Number 3, Article 20, April 2012, 17 pages.
360 | //
361 | //  Parameters:
362 | //
363 | //    Input, complex <double> Z, the argument.
364 | //
365 | //    Output, complex <double> WRIGHTOMEGA, the value of the Wright Omega
366 | //    function of Z.
367 | //
368 | {
369 |   complex <double> cond;
370 |   complex <double> e;
371 |   complex <double> r;
372 |   complex <double> w;
373 | 
374 |   wrightomega_ext ( z, w, e, r, cond );
375 | 
376 |   return w;
377 | }
378 | 
379 | inline float wrightomega ( float z )
380 | {
381 |     return (float) std::real (wrightomega (std::complex<double> { double (z), 0.0 }));
382 | }
383 | }
384 | 


--------------------------------------------------------------------------------
/test/src/sigmoid_approx_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test_helpers.hpp"
  2 | #include <catch2/catch_test_macros.hpp>
  3 | #include <iostream>
  4 | 
  5 | #include <math_approx/math_approx.hpp>
  6 | 
  7 | TEST_CASE ("Sigmoid Approx Test")
  8 | {
  9 | #if ! defined(WIN32)
 10 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f);
 11 | #else
 12 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
 13 | #endif
 14 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 15 |                                                            { return 1.0f / (1.0f + std::exp (-x)); });
 16 | 
 17 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound)
 18 |     {
 19 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 20 | 
 21 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
 22 |         const auto max_error = test_helpers::abs_max<float> (error);
 23 | 
 24 |         std::cout << max_error << std::endl;
 25 |         REQUIRE (std::abs (max_error) < err_bound);
 26 |     };
 27 | 
 28 |     SECTION ("9th-Order")
 29 |     {
 30 |         test_approx ([] (auto x)
 31 |                      { return math_approx::sigmoid<9> (x); },
 32 |                      6.5e-7f);
 33 |     }
 34 |     SECTION ("7th-Order")
 35 |     {
 36 |         test_approx ([] (auto x)
 37 |                      { return math_approx::sigmoid<7> (x); },
 38 |                      7.0e-6f);
 39 |     }
 40 |     SECTION ("5th-Order")
 41 |     {
 42 |         test_approx ([] (auto x)
 43 |                      { return math_approx::sigmoid<5> (x); },
 44 |                      1.0e-4f);
 45 |     }
 46 |     SECTION ("3th-Order")
 47 |     {
 48 |         test_approx ([] (auto x)
 49 |                      { return math_approx::sigmoid<3> (x); },
 50 |                      2.0e-3f);
 51 |     }
 52 | }
 53 | 
 54 | TEST_CASE ("Sigmoid (Exp) Approx Test")
 55 | {
 56 | #if ! defined(WIN32)
 57 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f);
 58 | #else
 59 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
 60 | #endif
 61 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 62 |                                                            { return 1.0f / (1.0f + std::exp (-x)); });
 63 | 
 64 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound)
 65 |     {
 66 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 67 | 
 68 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
 69 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
 70 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
 71 | 
 72 |         const auto max_error = test_helpers::abs_max<float> (error);
 73 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
 74 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
 75 | 
 76 |         std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl;
 77 |         REQUIRE (std::abs (max_error) < err_bound);
 78 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
 79 |         if (ulp_bound > 0)
 80 |             REQUIRE (max_ulp_error < ulp_bound);
 81 |     };
 82 | 
 83 |     SECTION ("6th-Order (Exp)")
 84 |     {
 85 |         test_approx ([] (auto x)
 86 |                      { return math_approx::sigmoid_exp<6> (x); },
 87 |                      1.5e-7f,
 88 |                      6.5e-7f,
 89 |                      12);
 90 |     }
 91 | 
 92 |     SECTION ("5th-Order (Exp)")
 93 |     {
 94 |         test_approx ([] (auto x)
 95 |                      { return math_approx::sigmoid_exp<5> (x); },
 96 |                      1.5e-7f,
 97 |                      7.5e-7f,
 98 |                      12);
 99 |     }
100 | 
101 |     SECTION ("4th-Order (Exp)")
102 |     {
103 |         test_approx ([] (auto x)
104 |                      { return math_approx::sigmoid_exp<4> (x); },
105 |                      9.5e-7f,
106 |                      4.5e-6f,
107 |                      65);
108 |     }
109 | 
110 |     SECTION ("3rd-Order (Exp)")
111 |     {
112 |         test_approx ([] (auto x)
113 |                      { return math_approx::sigmoid_exp<3> (x); },
114 |                      3.0e-4f,
115 |                      1.5e-4f,
116 |                      0);
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/test/src/test_helpers.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <algorithm>
  4 | #include <cmath>
  5 | #include <cstdint>
  6 | #include <cstring>
  7 | #include <numeric>
  8 | #include <span>
  9 | #include <vector>
 10 | 
 11 | namespace test_helpers
 12 | {
 13 | template <typename T = float>
 14 | inline auto all_32_bit_floats (float begin, float end, float tol = 1.0e-10f)
 15 | {
 16 |     std::vector<T> vec;
 17 |     vec.reserve (1 << 20);
 18 |     begin = (float) vec.emplace_back (static_cast<T> (begin));
 19 |     while (begin < end)
 20 |     {
 21 |         if (std::abs (begin) < tol)
 22 |         {
 23 |             begin = (float) vec.emplace_back (static_cast<T> (0));
 24 |             begin = (float) vec.emplace_back (static_cast<T> (tol));
 25 |         }
 26 |         begin = (float) vec.emplace_back (static_cast<T> (std::nextafter (begin, end)));
 27 |     }
 28 | 
 29 |     return vec;
 30 | }
 31 | 
 32 | template <typename T = float, typename F>
 33 | auto compute_all (std::span<const T> all_floats,
 34 |                   F&& f)
 35 | {
 36 |     std::vector<T> y;
 37 |     y.resize (all_floats.size());
 38 |     for (size_t i = 0; i < all_floats.size(); ++i)
 39 |         y[i] = f (all_floats[i]);
 40 | 
 41 |     return y;
 42 | }
 43 | 
 44 | template <typename T = float>
 45 | inline std::vector<T> compute_error (std::span<const T> actual, std::span<const T> approx)
 46 | {
 47 |     std::vector<T> err;
 48 |     err.resize (actual.size());
 49 |     for (size_t i = 0; i < actual.size(); ++i)
 50 |         err[i] = (actual[i] - approx[i]);
 51 |     return err;
 52 | }
 53 | 
 54 | template <typename T = float>
 55 | inline std::vector<T> compute_rel_error (std::span<const T> actual, std::span<const T> approx)
 56 | {
 57 |     std::vector<T> err;
 58 |     err.resize (actual.size());
 59 |     for (size_t i = 0; i < actual.size(); ++i)
 60 |         err[i] = (actual[i] - approx[i]) / (actual[i] + std::numeric_limits<float>::epsilon());
 61 |     return err;
 62 | }
 63 | 
 64 | // mostly borrowed from Catch2
 65 | inline uint32_t f32_ulp_dist (float lhs, float rhs) // NOLINT
 66 | {
 67 |     // We want X == Y to imply 0 ULP distance even if X and Y aren't
 68 |     // bit-equal (-0 and 0), or X - Y != 0 (same sign infinities).
 69 |     if (lhs == rhs)
 70 |         return 0;
 71 | 
 72 |     // We need a properly typed positive zero for type inference.
 73 |     static constexpr float positive_zero {};
 74 | 
 75 |     // We want to ensure that +/- 0 is always represented as positive zero
 76 |     if (lhs == positive_zero)
 77 |         lhs = positive_zero;
 78 |     if (rhs == positive_zero)
 79 |         rhs = positive_zero;
 80 | 
 81 |     // If arguments have different signs, we can handle them by summing
 82 |     // how far are they from 0 each.
 83 |     if (std::signbit (lhs) != std::signbit (rhs))
 84 |     {
 85 |         return f32_ulp_dist (std::abs (lhs), positive_zero)
 86 |                + f32_ulp_dist (std::abs (rhs), positive_zero);
 87 |     }
 88 | 
 89 |     // get the bit pattern of 'x'
 90 |     const auto f32_to_bits = [] (float x) -> uint32_t
 91 |     {
 92 |         uint32_t u;
 93 |         memcpy (&u, &x, 4);
 94 |         return u;
 95 |     };
 96 | 
 97 |     // When both lhs and rhs are of the same sign, we can just
 98 |     // read the numbers bitwise as integers, and then subtract them
 99 |     // (assuming IEEE).
100 |     uint32_t lc = f32_to_bits (lhs);
101 |     uint32_t rc = f32_to_bits (rhs);
102 | 
103 |     // The ulp distance between two numbers is symmetric, so to avoid
104 |     // dealing with overflows we want the bigger converted number on the lhs
105 |     if (lc < rc)
106 |         std::swap (lc, rc);
107 | 
108 |     return lc - rc;
109 | };
110 | 
111 | inline auto compute_ulp_error (std::span<const float> actual, std::span<const float> approx)
112 | {
113 | 
114 | 
115 |     std::vector<uint32_t> err;
116 |     err.resize (actual.size());
117 |     for (size_t i = 0; i < actual.size(); ++i)
118 |         err[i] = f32_ulp_dist (actual[i], approx[i]);
119 |     return err;
120 | }
121 | 
122 | template <typename T = float>
123 | inline T abs_max (std::span<const T> x)
124 | {
125 |     const auto [min, max] = std::minmax_element (x.begin(), x.end());
126 | 
127 |     if (std::abs (*min) > std::abs (*max))
128 |         return *min;
129 |     return *max;
130 | }
131 | } // namespace test_helpers
132 | 


--------------------------------------------------------------------------------
/test/src/trig_approx_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test_helpers.hpp"
  2 | #include <catch2/catch_test_macros.hpp>
  3 | #include <iostream>
  4 | 
  5 | #include <math_approx/math_approx.hpp>
  6 | 
  7 | TEST_CASE ("Sine Approx Test")
  8 | {
  9 | #if ! defined(WIN32)
 10 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f);
 11 | #else
 12 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
 13 | #endif
 14 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 15 |                                                     { return std::sin (x); });
 16 | 
 17 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound)
 18 |     {
 19 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 20 | 
 21 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
 22 |         const auto max_error = test_helpers::abs_max<float> (error);
 23 | 
 24 |         std::cout << max_error << std::endl;
 25 |         REQUIRE (std::abs (max_error) < err_bound);
 26 |     };
 27 | 
 28 |     SECTION ("9th-Order")
 29 |     {
 30 |         test_approx ([] (auto x)
 31 |                      { return math_approx::sin<9> (x); },
 32 |                      8.5e-7f);
 33 |     }
 34 |     SECTION ("7th-Order")
 35 |     {
 36 |         test_approx ([] (auto x)
 37 |                      { return math_approx::sin<7> (x); },
 38 |                      1.8e-5f);
 39 |     }
 40 |     SECTION ("5th-Order")
 41 |     {
 42 |         test_approx ([] (auto x)
 43 |                      { return math_approx::sin<5> (x); },
 44 |                      7.5e-4f);
 45 |     }
 46 | }
 47 | 
 48 | TEST_CASE ("Cosine Approx Test")
 49 | {
 50 | #if ! defined(WIN32)
 51 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f);
 52 | #else
 53 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
 54 | #endif
 55 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 56 |                                                     { return std::cos (x); });
 57 | 
 58 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound)
 59 |     {
 60 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 61 | 
 62 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
 63 |         const auto max_error = test_helpers::abs_max<float> (error);
 64 | 
 65 |         std::cout << max_error << std::endl;
 66 |         REQUIRE (std::abs (max_error) < err_bound);
 67 |     };
 68 | 
 69 |     SECTION ("9th-Order")
 70 |     {
 71 |         test_approx ([] (auto x)
 72 |                      { return math_approx::cos<9> (x); },
 73 |                      7.5e-7f);
 74 |     }
 75 |     SECTION ("7th-Order")
 76 |     {
 77 |         test_approx ([] (auto x)
 78 |                      { return math_approx::cos<7> (x); },
 79 |                      1.8e-5f);
 80 |     }
 81 |     SECTION ("5th-Order")
 82 |     {
 83 |         test_approx ([] (auto x)
 84 |                      { return math_approx::cos<5> (x); },
 85 |                      7.5e-4f);
 86 |     }
 87 | }
 88 | 
 89 | TEST_CASE ("Tan Approx Test")
 90 | {
 91 | #if ! defined(WIN32)
 92 |     const auto all_floats = test_helpers::all_32_bit_floats (-1.5f, 1.5f, 1.0e-3f);
 93 | #else
 94 |     const auto all_floats = test_helpers::all_32_bit_floats (-1.5f, 1.5f, 1.0e-1f);
 95 | #endif
 96 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 97 |                                                     { return std::tan (x); });
 98 | 
 99 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound)
100 |     {
101 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
102 | 
103 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
104 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
105 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
106 | 
107 |         const auto max_error = test_helpers::abs_max<float> (error);
108 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
109 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
110 | 
111 |         std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl;
112 |         REQUIRE (std::abs (max_error) < err_bound);
113 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
114 |         if (ulp_bound > 0)
115 |             REQUIRE (max_ulp_error < ulp_bound);
116 |     };
117 | 
118 |     SECTION ("13th-Order")
119 |     {
120 |         test_approx ([] (auto x)
121 |                      { return math_approx::tan<13> (x); },
122 |                      5.5e-5f,
123 |                      6.0e-5f,
124 |                      520);
125 |     }
126 |     SECTION ("11th-Order")
127 |     {
128 |         test_approx ([] (auto x)
129 |                      { return math_approx::tan<11> (x); },
130 |                      9.5e-5f,
131 |                      6.0e-5f,
132 |                      520);
133 |     }
134 |     SECTION ("9th-Order")
135 |     {
136 |         test_approx ([] (auto x)
137 |                      { return math_approx::tan<9> (x); },
138 |                      0.0009f,
139 |                      6.0e-5f,
140 |                      900);
141 |     }
142 |     SECTION ("7th-Order")
143 |     {
144 |         test_approx ([] (auto x)
145 |                      { return math_approx::tan<7> (x); },
146 |                      0.015f,
147 |                      0.0009f,
148 |                      0);
149 |     }
150 |     SECTION ("5th-Order")
151 |     {
152 |         test_approx ([] (auto x)
153 |                      { return math_approx::tan<5> (x); },
154 |                      0.14f,
155 |                      0.01f,
156 |                      0);
157 |     }
158 |     SECTION ("3rd-Order")
159 |     {
160 |         test_approx ([] (auto x)
161 |                      { return math_approx::tan<3> (x); },
162 |                      1.5f,
163 |                      0.09f,
164 |                      0);
165 |     }
166 | }
167 | 


--------------------------------------------------------------------------------
/test/src/trig_turns_approx_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test_helpers.hpp"
  2 | #include <catch2/catch_test_macros.hpp>
  3 | #include <iostream>
  4 | 
  5 | #include <math_approx/math_approx.hpp>
  6 | #include "reference/sincospi.hpp"
  7 | 
  8 | TEST_CASE ("Sine Approx Test")
  9 | {
 10 | #if ! defined(WIN32)
 11 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f);
 12 | #else
 13 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
 14 | #endif
 15 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 16 |                                                     { return sincospi::sin2pi (x); });
 17 | 
 18 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float rel_err_bound, uint32_t ulp_err_bound)
 19 |     {
 20 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 21 | 
 22 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
 23 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
 24 | 
 25 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
 26 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
 27 | 
 28 |         std::cout << max_rel_error << ", " << max_ulp_error << std::endl;
 29 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
 30 |         REQUIRE (max_ulp_error < ulp_err_bound);
 31 |     };
 32 | 
 33 |     SECTION ("11th-Order")
 34 |     {
 35 |         test_approx ([] (auto x)
 36 |                      { return math_approx::sin_turns<11> (x); },
 37 |                      5.0e-7f,
 38 |                      6);
 39 |     }
 40 |     SECTION ("9th-Order")
 41 |     {
 42 |         test_approx ([] (auto x)
 43 |                      { return math_approx::sin_turns<9> (x); },
 44 |                      2.0e-6f,
 45 |                      14);
 46 |     }
 47 |     SECTION ("7th-Order")
 48 |     {
 49 |         test_approx ([] (auto x)
 50 |                      { return math_approx::sin_turns<7> (x); },
 51 |                      9.0e-5f,
 52 |                      490);
 53 |     }
 54 |     SECTION ("5th-Order")
 55 |     {
 56 |         test_approx ([] (auto x)
 57 |                      { return math_approx::sin_turns<5> (x); },
 58 |                      5.0e-3f,
 59 |                      22'000);
 60 |     }
 61 | }
 62 | 
 63 | TEST_CASE ("Cosine Approx Test")
 64 | {
 65 | #if ! defined(WIN32)
 66 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f);
 67 | #else
 68 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
 69 | #endif
 70 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 71 |                                                     { return sincospi::cos2pi (x); });
 72 | 
 73 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float rel_err_bound, uint32_t ulp_err_bound)
 74 |     {
 75 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 76 | 
 77 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
 78 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
 79 | 
 80 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
 81 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
 82 | 
 83 |         std::cout << max_rel_error << ", " << max_ulp_error << std::endl;
 84 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
 85 |         REQUIRE (max_ulp_error < ulp_err_bound);
 86 |     };
 87 | 
 88 |     SECTION ("11th-Order")
 89 |     {
 90 |         test_approx ([] (auto x)
 91 |                      { return math_approx::cos_turns<11> (x); },
 92 |                      5.0e-7f,
 93 |                      6);
 94 |     }
 95 |     SECTION ("9th-Order")
 96 |     {
 97 |         test_approx ([] (auto x)
 98 |                      { return math_approx::cos_turns<9> (x); },
 99 |                      2.0e-6f,
100 |                      10);
101 |     }
102 |     SECTION ("7th-Order")
103 |     {
104 |         test_approx ([] (auto x)
105 |                      { return math_approx::cos_turns<7> (x); },
106 |                      6.0e-5f,
107 |                      270);
108 |     }
109 |     SECTION ("5th-Order")
110 |     {
111 |         test_approx ([] (auto x)
112 |                      { return math_approx::cos_turns<5> (x); },
113 |                      3.0e-3f,
114 |                      14'000);
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/test/src/wright_omega_approx_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test_helpers.hpp"
  2 | #include <catch2/catch_test_macros.hpp>
  3 | #include <iostream>
  4 | 
  5 | #include <math_approx/math_approx.hpp>
  6 | #include "reference/toms917.hpp"
  7 | 
  8 | TEST_CASE ("Wright-Omega Approx Test")
  9 | {
 10 | #if ! defined(WIN32)
 11 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 30.0f, 1.0e-1f);
 12 | #else
 13 |     const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 30.0f, 5.0e-1f);
 14 | #endif
 15 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, [] (auto x)
 16 |                                                     { return toms917::wrightomega (x); });
 17 | 
 18 |     const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound)
 19 |     {
 20 |         const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
 21 | 
 22 |         const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
 23 |         const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
 24 |         const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
 25 | 
 26 |         const auto max_error = test_helpers::abs_max<float> (error);
 27 |         const auto max_rel_error = test_helpers::abs_max<float> (rel_error);
 28 |         const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end());
 29 | 
 30 |         std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl;
 31 |         REQUIRE (std::abs (max_error) < err_bound);
 32 |         REQUIRE (std::abs (max_rel_error) < rel_err_bound);
 33 |         if (ulp_bound > 0)
 34 |             REQUIRE (max_ulp_error < ulp_bound);
 35 |     };
 36 | 
 37 |     SECTION ("Iter-3_Poly-3_LogExp-5")
 38 |     {
 39 |         test_approx ([] (auto x)
 40 |                      { return math_approx::wright_omega<3, 3, 5> (x); },
 41 |                      2.0e-6f,
 42 |                      1.5e-6f,
 43 |                      20);
 44 |     }
 45 |     SECTION ("Iter-3_Poly-3")
 46 |     {
 47 |         test_approx ([] (auto x)
 48 |                      { return math_approx::wright_omega<3, 3> (x); },
 49 |                      4.0e-6f,
 50 |                      4.5e-6f,
 51 |                      70);
 52 |     }
 53 |     SECTION ("Iter-2_Poly-5")
 54 |     {
 55 |         test_approx ([] (auto x)
 56 |                      { return math_approx::wright_omega<2, 5> (x); },
 57 |                      7.0e-6f,
 58 |                      1.5e-4f,
 59 |                      0);
 60 |     }
 61 |     SECTION ("Iter-2_Poly-3")
 62 |     {
 63 |         test_approx ([] (auto x)
 64 |                      { return math_approx::wright_omega<2, 3> (x); },
 65 |                      1.5e-5f,
 66 |                      2.0e-4f,
 67 |                      0);
 68 |     }
 69 |     SECTION ("Iter-2_Poly-3_LogExp-3")
 70 |     {
 71 |         test_approx ([] (auto x)
 72 |                      { return math_approx::wright_omega<2, 3, 3> (x); },
 73 |                      1.0e-4f,
 74 |                      3.0e-4f,
 75 |                      0);
 76 |     }
 77 |     SECTION ("Iter-1_Poly-5")
 78 |     {
 79 |         test_approx ([] (auto x)
 80 |                      { return math_approx::wright_omega<1, 5> (x); },
 81 |                      3.0e-3f,
 82 |                      5.1e-2f,
 83 |                      0);
 84 |     }
 85 |     SECTION ("Iter-1_Poly-3")
 86 |     {
 87 |         test_approx ([] (auto x)
 88 |                      { return math_approx::wright_omega<1, 3> (x); },
 89 |                      3.5e-3f,
 90 |                      5.5e-2f,
 91 |                      0);
 92 |     }
 93 |     SECTION ("Iter-0_Poly-5")
 94 |     {
 95 |         test_approx ([] (auto x)
 96 |                      { return math_approx::wright_omega<0, 5> (x); },
 97 |                      5.5e-2f,
 98 |                      2.0f,
 99 |                      0);
100 |     }
101 |     SECTION ("Iter-0_Poly-3")
102 |     {
103 |         test_approx ([] (auto x)
104 |                      { return math_approx::wright_omega<0, 3> (x); },
105 |                      6.0e-2f,
106 |                      2.0f,
107 |                      0);
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/tools/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(plotter)
2 | add_subdirectory(bench)
3 | 


--------------------------------------------------------------------------------
/tools/bench/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if($ENV{CI})
 2 |     message(STATUS "Configuring in CI, skipping benchmarks...")
 3 |     return()
 4 | endif()
 5 | 
 6 | CPMAddPackage(
 7 |     NAME benchmark
 8 |     GITHUB_REPOSITORY google/benchmark
 9 |     VERSION 1.9.4
10 |     OPTIONS "BENCHMARK_ENABLE_TESTING Off"
11 | )
12 | 
13 | function(setup_bench target src)
14 |     add_executable(${target} ${src})
15 |     target_link_libraries(${target} PRIVATE benchmark::benchmark math_approx)
16 |     target_compile_features(${target} PUBLIC cxx_std_20)
17 |     target_compile_definitions(${target} PUBLIC _USE_MATH_DEFINES=1)
18 | endfunction(setup_bench)
19 | 
20 | setup_bench(trig_approx_bench trig_bench.cpp)
21 | setup_bench(inverse_trig_approx_bench inverse_trig_bench.cpp)
22 | setup_bench(pow_approx_bench pow_bench.cpp)
23 | setup_bench(log_approx_bench log_bench.cpp)
24 | setup_bench(hyperbolic_trig_approx_bench hyperbolic_trig_bench.cpp)
25 | setup_bench(inverse_hyperbolic_trig_approx_bench inverse_hyperbolic_trig_bench.cpp)
26 | setup_bench(sigmoid_approx_bench sigmoid_bench.cpp)
27 | setup_bench(wright_omega_approx_bench wright_omega_bench.cpp)
28 | setup_bench(polylog_approx_bench polylog_bench.cpp)
29 | setup_bench(trig_turns_approx_bench trig_turns_bench.cpp)
30 | 


--------------------------------------------------------------------------------
/tools/bench/hyperbolic_trig_bench.cpp:
--------------------------------------------------------------------------------
 1 | #include <math_approx/math_approx.hpp>
 2 | #include <benchmark/benchmark.h>
 3 | 
 4 | static constexpr size_t N = 2000;
 5 | const auto data = []
 6 | {
 7 |     std::vector<float> x;
 8 |     x.resize (N, 0.0f);
 9 |     for (size_t i = 0; i < N; ++i)
10 |         x[i] = -10.0f + 20.0f * (float) i / (float) N;
11 |     return x;
12 | }();
13 | 
14 | #define HTRIG_BENCH(name, func) \
15 | void name (benchmark::State& state) \
16 | { \
17 | for (auto _ : state) \
18 | { \
19 | for (auto& x : data) \
20 | { \
21 | auto y = func (x); \
22 | benchmark::DoNotOptimize (y); \
23 | } \
24 | } \
25 | } \
26 | BENCHMARK (name);
27 | HTRIG_BENCH (sinh_std, std::sinh)
28 | HTRIG_BENCH (sinh_approx6, math_approx::sinh<6>)
29 | HTRIG_BENCH (sinh_approx5, math_approx::sinh<5>)
30 | HTRIG_BENCH (sinh_approx4, math_approx::sinh<4>)
31 | HTRIG_BENCH (sinh_approx3, math_approx::sinh<3>)
32 | 
33 | HTRIG_BENCH (cosh_std, std::sinh)
34 | HTRIG_BENCH (cosh_approx6, math_approx::cosh<6>)
35 | HTRIG_BENCH (cosh_approx5, math_approx::cosh<5>)
36 | HTRIG_BENCH (cosh_approx4, math_approx::cosh<4>)
37 | HTRIG_BENCH (cosh_approx3, math_approx::cosh<3>)
38 | 
39 | HTRIG_BENCH (tanh_std, std::tanh)
40 | HTRIG_BENCH (tanh_approx11, math_approx::tanh<11>)
41 | HTRIG_BENCH (tanh_approx9, math_approx::tanh<9>)
42 | HTRIG_BENCH (tanh_approx7, math_approx::tanh<7>)
43 | HTRIG_BENCH (tanh_approx5, math_approx::tanh<5>)
44 | 
45 | #define HTRIG_SIMD_BENCH(name, func) \
46 | void name (benchmark::State& state) \
47 | { \
48 | for (auto _ : state) \
49 | { \
50 | for (auto& x : data) \
51 | { \
52 | auto y = func (xsimd::broadcast (x)); \
53 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
54 | benchmark::DoNotOptimize (y); \
55 | } \
56 | } \
57 | } \
58 | BENCHMARK (name);
59 | HTRIG_SIMD_BENCH (sinh_xsimd, xsimd::tanh)
60 | HTRIG_SIMD_BENCH (sinh_simd_approx6, math_approx::sinh<6>)
61 | HTRIG_SIMD_BENCH (sinh_simd_approx5, math_approx::sinh<5>)
62 | HTRIG_SIMD_BENCH (sinh_simd_approx4, math_approx::sinh<4>)
63 | HTRIG_SIMD_BENCH (sinh_simd_approx3, math_approx::sinh<3>)
64 | 
65 | HTRIG_SIMD_BENCH (cosh_xsimd, xsimd::tanh)
66 | HTRIG_SIMD_BENCH (cosh_simd_approx6, math_approx::cosh<6>)
67 | HTRIG_SIMD_BENCH (cosh_simd_approx5, math_approx::cosh<5>)
68 | HTRIG_SIMD_BENCH (cosh_simd_approx4, math_approx::cosh<4>)
69 | HTRIG_SIMD_BENCH (cosh_simd_approx3, math_approx::cosh<3>)
70 | 
71 | HTRIG_SIMD_BENCH (tanh_xsimd, xsimd::tanh)
72 | HTRIG_SIMD_BENCH (tanh_simd_approx11, math_approx::tanh<11>)
73 | HTRIG_SIMD_BENCH (tanh_simd_approx9, math_approx::tanh<9>)
74 | HTRIG_SIMD_BENCH (tanh_simd_approx7, math_approx::tanh<7>)
75 | HTRIG_SIMD_BENCH (tanh_simd_approx5, math_approx::tanh<5>)
76 | 
77 | BENCHMARK_MAIN();
78 | 


--------------------------------------------------------------------------------
/tools/bench/inverse_hyperbolic_trig_bench.cpp:
--------------------------------------------------------------------------------
  1 | #include <math_approx/math_approx.hpp>
  2 | #include <benchmark/benchmark.h>
  3 | 
  4 | static constexpr size_t N = 2000;
  5 | const auto data_asinh = []
  6 | {
  7 |     std::vector<float> x;
  8 |     x.resize (N, 0.0f);
  9 |     for (size_t i = 0; i < N; ++i)
 10 |         x[i] = -10.0f + 20.0f * (float) i / (float) N;
 11 |     return x;
 12 | }();
 13 | 
 14 | const auto data_acosh = []
 15 | {
 16 |     std::vector<float> x;
 17 |     x.resize (N, 0.0f);
 18 |     for (size_t i = 0; i < N; ++i)
 19 |         x[i] = 1.0f + 9.0f * (float) i / (float) N;
 20 |     return x;
 21 | }();
 22 | 
 23 | const auto data_atanh = []
 24 | {
 25 |     std::vector<float> x;
 26 |     x.resize (N, 0.0f);
 27 |     for (size_t i = 0; i < N; ++i)
 28 |         x[i] = -1.0f + 2.0f * (float) i / (float) N;
 29 |     return x;
 30 | }();
 31 | 
 32 | #define INV_HTRIG_BENCH(name, func, data) \
 33 | void name (benchmark::State& state) \
 34 | { \
 35 | for (auto _ : state) \
 36 | { \
 37 | for (auto& x : data) \
 38 | { \
 39 | auto y = func (x); \
 40 | benchmark::DoNotOptimize (y); \
 41 | } \
 42 | } \
 43 | } \
 44 | BENCHMARK (name);
 45 | 
 46 | INV_HTRIG_BENCH (asinh_std, std::asinh, data_asinh)
 47 | INV_HTRIG_BENCH (asinh_approx7, math_approx::asinh<7>, data_asinh)
 48 | INV_HTRIG_BENCH (asinh_approx6, math_approx::asinh<6>, data_asinh)
 49 | INV_HTRIG_BENCH (asinh_approx5, math_approx::asinh<5>, data_asinh)
 50 | INV_HTRIG_BENCH (asinh_approx4, math_approx::asinh<4>, data_asinh)
 51 | INV_HTRIG_BENCH (asinh_approx3, math_approx::asinh<3>, data_asinh)
 52 | 
 53 | INV_HTRIG_BENCH (acosh_std, std::acosh, data_acosh)
 54 | INV_HTRIG_BENCH (acosh_approx6, math_approx::acosh<6>, data_acosh)
 55 | INV_HTRIG_BENCH (acosh_approx5, math_approx::acosh<5>, data_acosh)
 56 | INV_HTRIG_BENCH (acosh_approx4, math_approx::acosh<4>, data_acosh)
 57 | INV_HTRIG_BENCH (acosh_approx3, math_approx::acosh<3>, data_acosh)
 58 | 
 59 | INV_HTRIG_BENCH (atanh_std, std::atanh, data_atanh)
 60 | INV_HTRIG_BENCH (atanh_approx6, math_approx::atanh<6>, data_atanh)
 61 | INV_HTRIG_BENCH (atanh_approx5, math_approx::atanh<5>, data_atanh)
 62 | INV_HTRIG_BENCH (atanh_approx4, math_approx::atanh<4>, data_atanh)
 63 | INV_HTRIG_BENCH (atanh_approx3, math_approx::atanh<3>, data_atanh)
 64 | 
 65 | #define INV_HTRIG_SIMD_BENCH(name, func, data) \
 66 | void name (benchmark::State& state) \
 67 | { \
 68 | for (auto _ : state) \
 69 | { \
 70 | for (auto& x : data) \
 71 | { \
 72 | auto y = func (xsimd::broadcast (x)); \
 73 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
 74 | benchmark::DoNotOptimize (y); \
 75 | } \
 76 | } \
 77 | } \
 78 | BENCHMARK (name);
 79 | 
 80 | INV_HTRIG_SIMD_BENCH (asinh_xsimd, xsimd::asinh, data_asinh)
 81 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx7, math_approx::asinh<7>, data_asinh)
 82 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx6, math_approx::asinh<6>, data_asinh)
 83 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx5, math_approx::asinh<5>, data_asinh)
 84 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx4, math_approx::asinh<4>, data_asinh)
 85 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx3, math_approx::asinh<3>, data_asinh)
 86 | 
 87 | INV_HTRIG_SIMD_BENCH (acosh_xsimd, xsimd::acosh, data_acosh)
 88 | INV_HTRIG_SIMD_BENCH (acosh_simd_approx6, math_approx::acosh<6>, data_acosh)
 89 | INV_HTRIG_SIMD_BENCH (acosh_simd_approx5, math_approx::acosh<5>, data_acosh)
 90 | INV_HTRIG_SIMD_BENCH (acosh_simd_approx4, math_approx::acosh<4>, data_acosh)
 91 | INV_HTRIG_SIMD_BENCH (acosh_simd_approx3, math_approx::acosh<3>, data_acosh)
 92 | 
 93 | INV_HTRIG_SIMD_BENCH (atanh_xsimd, xsimd::atanh, data_atanh)
 94 | INV_HTRIG_SIMD_BENCH (atanh_simd_approx6, math_approx::atanh<6>, data_atanh)
 95 | INV_HTRIG_SIMD_BENCH (atanh_simd_approx5, math_approx::atanh<5>, data_atanh)
 96 | INV_HTRIG_SIMD_BENCH (atanh_simd_approx4, math_approx::atanh<4>, data_atanh)
 97 | INV_HTRIG_SIMD_BENCH (atanh_simd_approx3, math_approx::atanh<3>, data_atanh)
 98 | 
 99 | BENCHMARK_MAIN();
100 | 


--------------------------------------------------------------------------------
/tools/bench/inverse_trig_bench.cpp:
--------------------------------------------------------------------------------
 1 | #include <math_approx/math_approx.hpp>
 2 | #include <benchmark/benchmark.h>
 3 | 
 4 | static constexpr size_t N = 1000;
 5 | const auto data = []
 6 | {
 7 |     std::vector<float> x;
 8 |     x.resize (N, 0.0f);
 9 |     for (size_t i = 0; i < N; ++i)
10 |         x[i] = -1.0f + 2.0f * (float) i / (float) N;
11 |     return x;
12 | }();
13 | 
14 | #define TRIG_BENCH(name, func) \
15 | void name (benchmark::State& state) \
16 | { \
17 | for (auto _ : state) \
18 | { \
19 | for (auto& x : data) \
20 | { \
21 | auto y = func (x); \
22 | benchmark::DoNotOptimize (y); \
23 | } \
24 | } \
25 | } \
26 | BENCHMARK (name);
27 | 
28 | TRIG_BENCH (asin_std, std::asin)
29 | TRIG_BENCH (asin_approx4, math_approx::asin<4>)
30 | TRIG_BENCH (asin_approx3, math_approx::asin<3>)
31 | TRIG_BENCH (asin_approx2, math_approx::asin<2>)
32 | TRIG_BENCH (asin_approx1, math_approx::asin<1>)
33 | 
34 | TRIG_BENCH (acos_std, std::acos)
35 | TRIG_BENCH (acos_approx5, math_approx::acos<5>)
36 | TRIG_BENCH (acos_approx4, math_approx::acos<4>)
37 | TRIG_BENCH (acos_approx3, math_approx::acos<3>)
38 | TRIG_BENCH (acos_approx2, math_approx::acos<2>)
39 | TRIG_BENCH (acos_approx1, math_approx::acos<1>)
40 | 
41 | TRIG_BENCH (atan_std, std::atan)
42 | TRIG_BENCH (atan_approx7, math_approx::atan<7>)
43 | TRIG_BENCH (atan_approx5, math_approx::atan<5>)
44 | TRIG_BENCH (atan_approx4, math_approx::atan<4>)
45 | 
46 | #define TRIG_SIMD_BENCH(name, func) \
47 | void name (benchmark::State& state) \
48 | { \
49 | for (auto _ : state) \
50 | { \
51 | for (auto& x : data) \
52 | { \
53 | auto y = func (xsimd::broadcast (x)); \
54 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
55 | benchmark::DoNotOptimize (y); \
56 | } \
57 | } \
58 | } \
59 | BENCHMARK (name);
60 | 
61 | TRIG_SIMD_BENCH (asin_xsimd, xsimd::asin)
62 | TRIG_SIMD_BENCH (asin_simd_approx4, math_approx::asin<4>)
63 | TRIG_SIMD_BENCH (asin_simd_approx3, math_approx::asin<3>)
64 | TRIG_SIMD_BENCH (asin_simd_approx2, math_approx::asin<2>)
65 | TRIG_SIMD_BENCH (asin_simd_approx1, math_approx::asin<1>)
66 | 
67 | TRIG_SIMD_BENCH (acos_xsimd, xsimd::acos)
68 | TRIG_SIMD_BENCH (acos_simd_approx5, math_approx::acos<5>)
69 | TRIG_SIMD_BENCH (acos_simd_approx4, math_approx::acos<4>)
70 | TRIG_SIMD_BENCH (acos_simd_approx3, math_approx::acos<3>)
71 | TRIG_SIMD_BENCH (acos_simd_approx2, math_approx::acos<2>)
72 | TRIG_SIMD_BENCH (acos_simd_approx1, math_approx::acos<1>)
73 | 
74 | TRIG_SIMD_BENCH (atan_xsimd, xsimd::atan)
75 | TRIG_SIMD_BENCH (atan_simd_approx7, math_approx::atan<7>)
76 | TRIG_SIMD_BENCH (atan_simd_approx5, math_approx::atan<5>)
77 | TRIG_SIMD_BENCH (atan_simd_approx4, math_approx::atan<4>)
78 | 
79 | BENCHMARK_MAIN();
80 | 


--------------------------------------------------------------------------------
/tools/bench/log_bench.cpp:
--------------------------------------------------------------------------------
 1 | #include <math_approx/math_approx.hpp>
 2 | #include <benchmark/benchmark.h>
 3 | 
 4 | static constexpr size_t N = 2000;
 5 | const auto data = []
 6 | {
 7 |     std::vector<float> x;
 8 |     x.resize (N, 0.0f);
 9 |     for (size_t i = 0; i < N; ++i)
10 |         x[i] = -10.0f + 20.0f * (float) i / (float) N;
11 |     return x;
12 | }();
13 | 
14 | #define LOG_BENCH(name, func) \
15 | void name (benchmark::State& state) \
16 | { \
17 | for (auto _ : state) \
18 | { \
19 | for (auto& x : data) \
20 | { \
21 | auto y = func (x); \
22 | benchmark::DoNotOptimize (y); \
23 | } \
24 | } \
25 | } \
26 | BENCHMARK (name);
27 | LOG_BENCH (log_std, std::log)
28 | LOG_BENCH (log_approx6, math_approx::log<6>)
29 | LOG_BENCH (log_approx5, math_approx::log<5>)
30 | LOG_BENCH (log_approx4, math_approx::log<4>)
31 | LOG_BENCH (log_approx3, math_approx::log<3>)
32 | 
33 | LOG_BENCH (log2_std, std::log2)
34 | LOG_BENCH (log2_approx6, math_approx::log2<6>)
35 | LOG_BENCH (log2_approx5, math_approx::log2<5>)
36 | LOG_BENCH (log2_approx4, math_approx::log2<4>)
37 | LOG_BENCH (log2_approx3, math_approx::log2<3>)
38 | 
39 | LOG_BENCH (log10_std, std::log10)
40 | LOG_BENCH (log10_approx6, math_approx::log10<6>)
41 | LOG_BENCH (log10_approx5, math_approx::log10<5>)
42 | LOG_BENCH (log10_approx4, math_approx::log10<4>)
43 | LOG_BENCH (log10_approx3, math_approx::log10<3>)
44 | 
45 | #define LOG_SIMD_BENCH(name, func) \
46 | void name (benchmark::State& state) \
47 | { \
48 | for (auto _ : state) \
49 | { \
50 | for (auto& x : data) \
51 | { \
52 | auto y = func (xsimd::broadcast (x)); \
53 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
54 | benchmark::DoNotOptimize (y); \
55 | } \
56 | } \
57 | } \
58 | BENCHMARK (name);
59 | LOG_SIMD_BENCH (log_xsimd, xsimd::log)
60 | LOG_SIMD_BENCH (log_simd_approx6, math_approx::log<6>)
61 | LOG_SIMD_BENCH (log_simd_approx5, math_approx::log<5>)
62 | LOG_SIMD_BENCH (log_simd_approx4, math_approx::log<4>)
63 | LOG_SIMD_BENCH (log_simd_approx3, math_approx::log<3>)
64 | 
65 | LOG_SIMD_BENCH (log2_xsimd, xsimd::log2)
66 | LOG_SIMD_BENCH (log2_simd_approx6, math_approx::log2<6>)
67 | LOG_SIMD_BENCH (log2_simd_approx5, math_approx::log2<5>)
68 | LOG_SIMD_BENCH (log2_simd_approx4, math_approx::log2<4>)
69 | LOG_SIMD_BENCH (log2_simd_approx3, math_approx::log2<3>)
70 | 
71 | LOG_SIMD_BENCH (log10_xsimd, xsimd::log10)
72 | LOG_SIMD_BENCH (log10_simd_approx6, math_approx::log10<6>)
73 | LOG_SIMD_BENCH (log10_simd_approx5, math_approx::log10<5>)
74 | LOG_SIMD_BENCH (log10_simd_approx4, math_approx::log10<4>)
75 | LOG_SIMD_BENCH (log10_simd_approx3, math_approx::log10<3>)
76 | 
77 | BENCHMARK_MAIN();
78 | 


--------------------------------------------------------------------------------
/tools/bench/polylog_bench.cpp:
--------------------------------------------------------------------------------
 1 | #include <math_approx/math_approx.hpp>
 2 | #include <benchmark/benchmark.h>
 3 | #include "../test/src/reference/polylogarithm.hpp"
 4 | 
 5 | static constexpr size_t N = 2000;
 6 | const auto data = []
 7 | {
 8 |     std::vector<float> x;
 9 |     x.resize (N, 0.0f);
10 |     for (size_t i = 0; i < N; ++i)
11 |         x[i] = -10.0f + 20.0f * (float) i / (float) N;
12 |     return x;
13 | }();
14 | 
15 | #define POLYLOG_BENCH(name, func) \
16 | void name (benchmark::State& state) \
17 | { \
18 | for (auto _ : state) \
19 | { \
20 | for (auto& x : data) \
21 | { \
22 | auto y = func (x); \
23 | benchmark::DoNotOptimize (y); \
24 | } \
25 | } \
26 | } \
27 | BENCHMARK (name);
28 | POLYLOG_BENCH (li2_ref, polylogarithm::Li2)
29 | POLYLOG_BENCH (li2_approx3_log6, (math_approx::li2<3,6>))
30 | POLYLOG_BENCH (li2_approx3, math_approx::li2<3>)
31 | POLYLOG_BENCH (li2_approx2, math_approx::li2<2>)
32 | POLYLOG_BENCH (li2_approx1, math_approx::li2<1>)
33 | 
34 | #define POLYLOG_SIMD_BENCH(name, func) \
35 | void name (benchmark::State& state) \
36 | { \
37 | for (auto _ : state) \
38 | { \
39 | for (auto& x : data) \
40 | { \
41 | auto y = func (xsimd::broadcast (x)); \
42 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
43 | benchmark::DoNotOptimize (y); \
44 | } \
45 | } \
46 | } \
47 | BENCHMARK (name);
48 | POLYLOG_SIMD_BENCH (li2_simd_approx3_log6, (math_approx::li2<3,6>))
49 | POLYLOG_SIMD_BENCH (li2_simd_approx3, math_approx::li2<3>)
50 | POLYLOG_SIMD_BENCH (li2_simd_approx2, math_approx::li2<2>)
51 | POLYLOG_SIMD_BENCH (li2_simd_approx1, math_approx::li2<1>)
52 | 
53 | BENCHMARK_MAIN();
54 | 


--------------------------------------------------------------------------------
/tools/bench/pow_bench.cpp:
--------------------------------------------------------------------------------
 1 | #include <math_approx/math_approx.hpp>
 2 | #include <benchmark/benchmark.h>
 3 | 
 4 | static constexpr size_t N = 2000;
 5 | const auto data = []
 6 | {
 7 |     std::vector<float> x;
 8 |     x.resize (N, 0.0f);
 9 |     for (size_t i = 0; i < N; ++i)
10 |         x[i] = -10.0f + 20.0f * (float) i / (float) N;
11 |     return x;
12 | }();
13 | 
14 | #define POW_BENCH(name, func) \
15 | void name (benchmark::State& state) \
16 | { \
17 | for (auto _ : state) \
18 | { \
19 | for (auto& x : data) \
20 | { \
21 | auto y = func (x); \
22 | benchmark::DoNotOptimize (y); \
23 | } \
24 | } \
25 | } \
26 | BENCHMARK (name);
27 | POW_BENCH (exp_std, std::exp)
28 | POW_BENCH (exp_approx6, math_approx::exp<6>)
29 | POW_BENCH (exp_approx5, math_approx::exp<5>)
30 | POW_BENCH (exp_approx4, math_approx::exp<4>)
31 | POW_BENCH (exp_approx3, math_approx::exp<3>)
32 | 
33 | POW_BENCH (exp2_std, std::exp2)
34 | POW_BENCH (exp2_approx6, math_approx::exp2<6>)
35 | POW_BENCH (exp2_approx5, math_approx::exp2<5>)
36 | POW_BENCH (exp2_approx4, math_approx::exp2<4>)
37 | POW_BENCH (exp2_approx3, math_approx::exp2<3>)
38 | 
39 | float stdpow_exp10 (float x)
40 | {
41 |     return std::pow (10.0f, x);
42 | }
43 | POW_BENCH (exp10_std, stdpow_exp10)
44 | POW_BENCH (exp10_approx6, math_approx::exp10<6>)
45 | POW_BENCH (exp10_approx5, math_approx::exp10<5>)
46 | POW_BENCH (exp10_approx4, math_approx::exp10<4>)
47 | POW_BENCH (exp10_approx3, math_approx::exp10<3>)
48 | 
49 | #define POW_SIMD_BENCH(name, func) \
50 | void name (benchmark::State& state) \
51 | { \
52 | for (auto _ : state) \
53 | { \
54 | for (auto& x : data) \
55 | { \
56 | auto y = func (xsimd::broadcast (x)); \
57 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
58 | benchmark::DoNotOptimize (y); \
59 | } \
60 | } \
61 | } \
62 | BENCHMARK (name);
63 | POW_SIMD_BENCH (exp_xsimd, xsimd::exp)
64 | POW_SIMD_BENCH (exp_simd_approx6, math_approx::exp<6>)
65 | POW_SIMD_BENCH (exp_simd_approx5, math_approx::exp<5>)
66 | POW_SIMD_BENCH (exp_simd_approx4, math_approx::exp<4>)
67 | POW_SIMD_BENCH (exp_simd_approx3, math_approx::exp<3>)
68 | 
69 | POW_SIMD_BENCH (exp2_xsimd, xsimd::exp2)
70 | POW_SIMD_BENCH (exp2_simd_approx6, math_approx::exp2<6>)
71 | POW_SIMD_BENCH (exp2_simd_approx5, math_approx::exp2<5>)
72 | POW_SIMD_BENCH (exp2_simd_approx4, math_approx::exp2<4>)
73 | POW_SIMD_BENCH (exp2_simd_approx3, math_approx::exp2<3>)
74 | 
75 | POW_SIMD_BENCH (exp10_xsimd, xsimd::exp10)
76 | POW_SIMD_BENCH (exp10_simd_approx6, math_approx::exp10<6>)
77 | POW_SIMD_BENCH (exp10_simd_approx5, math_approx::exp10<5>)
78 | POW_SIMD_BENCH (exp10_simd_approx4, math_approx::exp10<4>)
79 | POW_SIMD_BENCH (exp10_simd_approx3, math_approx::exp10<3>)
80 | 
81 | BENCHMARK_MAIN();
82 | 


--------------------------------------------------------------------------------
/tools/bench/sigmoid_bench.cpp:
--------------------------------------------------------------------------------
 1 | #include <math_approx/math_approx.hpp>
 2 | #include <benchmark/benchmark.h>
 3 | 
 4 | static constexpr size_t N = 2000;
 5 | const auto data = []
 6 | {
 7 |     std::vector<float> x;
 8 |     x.resize (N, 0.0f);
 9 |     for (size_t i = 0; i < N; ++i)
10 |         x[i] = -10.0f + 20.0f * (float) i / (float) N;
11 |     return x;
12 | }();
13 | 
14 | #define SIGMOID_BENCH(name, func) \
15 | void name (benchmark::State& state) \
16 | { \
17 | for (auto _ : state) \
18 | { \
19 | for (auto& x : data) \
20 | { \
21 | auto y = func (x); \
22 | benchmark::DoNotOptimize (y); \
23 | } \
24 | } \
25 | } \
26 | BENCHMARK (name);
27 | SIGMOID_BENCH (sigmoid_std, [] (auto x) { return 1.0f / (1.0f + std::exp (-x)); })
28 | SIGMOID_BENCH (sigmoid_approx9, math_approx::sigmoid<9>)
29 | SIGMOID_BENCH (sigmoid_approx7, math_approx::sigmoid<7>)
30 | SIGMOID_BENCH (sigmoid_approx5, math_approx::sigmoid<5>)
31 | SIGMOID_BENCH (sigmoid_exp_approx6, math_approx::sigmoid_exp<6>)
32 | SIGMOID_BENCH (sigmoid_exp_approx5, math_approx::sigmoid_exp<5>)
33 | SIGMOID_BENCH (sigmoid_exp_approx4, math_approx::sigmoid_exp<4>)
34 | 
35 | #define SIGMOID_SIMD_BENCH(name, func) \
36 | void name (benchmark::State& state) \
37 | { \
38 | for (auto _ : state) \
39 | { \
40 | for (auto& x : data) \
41 | { \
42 | auto y = func (xsimd::broadcast (x)); \
43 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
44 | benchmark::DoNotOptimize (y); \
45 | } \
46 | } \
47 | } \
48 | BENCHMARK (name);
49 | SIGMOID_SIMD_BENCH (sigmoid_xsimd, [] (auto x) { return 1.0f / (1.0f + xsimd::exp (-x)); })
50 | SIGMOID_SIMD_BENCH (sigmoid_simd_approx9, math_approx::tanh<9>)
51 | SIGMOID_SIMD_BENCH (sigmoid_simd_approx7, math_approx::tanh<7>)
52 | SIGMOID_SIMD_BENCH (sigmoid_simd_approx5, math_approx::tanh<5>)
53 | SIGMOID_SIMD_BENCH (sigmoid_exp_simd_approx6, math_approx::sigmoid_exp<6>)
54 | SIGMOID_SIMD_BENCH (sigmoid_exp_simd_approx5, math_approx::sigmoid_exp<5>)
55 | SIGMOID_SIMD_BENCH (sigmoid_exp_simd_approx4, math_approx::sigmoid_exp<4>)
56 | 
57 | BENCHMARK_MAIN();
58 | 


--------------------------------------------------------------------------------
/tools/bench/trig_bench.cpp:
--------------------------------------------------------------------------------
 1 | #include <math_approx/math_approx.hpp>
 2 | #include <benchmark/benchmark.h>
 3 | 
 4 | static constexpr size_t N = 2000;
 5 | const auto data = []
 6 | {
 7 |     std::vector<float> x;
 8 |     x.resize (N, 0.0f);
 9 |     for (size_t i = 0; i < N; ++i)
10 |         x[i] = -10.0f + 20.0f * (float) i / (float) N;
11 |     return x;
12 | }();
13 | 
14 | #define TRIG_BENCH(name, func) \
15 | void name (benchmark::State& state) \
16 | { \
17 | for (auto _ : state) \
18 | { \
19 | for (auto& x : data) \
20 | { \
21 | auto y = func (x); \
22 | benchmark::DoNotOptimize (y); \
23 | } \
24 | } \
25 | } \
26 | BENCHMARK (name);
27 | 
28 | TRIG_BENCH (cos_std, std::cos)
29 | TRIG_BENCH (cos_approx9, math_approx::cos<9>)
30 | TRIG_BENCH (cos_approx7, math_approx::cos<7>)
31 | TRIG_BENCH (cos_approx5, math_approx::cos<5>)
32 | 
33 | TRIG_BENCH (sin_std, std::sin)
34 | TRIG_BENCH (sin_approx9, math_approx::sin<9>)
35 | TRIG_BENCH (sin_approx7, math_approx::sin<7>)
36 | TRIG_BENCH (sin_approx5, math_approx::sin<5>)
37 | 
38 | TRIG_BENCH (tan_std, std::tan)
39 | TRIG_BENCH (tan_approx13, math_approx::tan<13>)
40 | TRIG_BENCH (tan_approx11, math_approx::tan<11>)
41 | TRIG_BENCH (tan_approx9, math_approx::tan<9>)
42 | TRIG_BENCH (tan_approx7, math_approx::tan<7>)
43 | TRIG_BENCH (tan_approx5, math_approx::tan<5>)
44 | TRIG_BENCH (tan_approx3, math_approx::tan<3>)
45 | 
46 | #define TRIG_SIMD_BENCH(name, func) \
47 | void name (benchmark::State& state) \
48 | { \
49 | for (auto _ : state) \
50 | { \
51 | for (auto& x : data) \
52 | { \
53 | auto y = func (xsimd::broadcast (x)); \
54 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
55 | benchmark::DoNotOptimize (y); \
56 | } \
57 | } \
58 | } \
59 | BENCHMARK (name);
60 | 
61 | TRIG_SIMD_BENCH (sin_xsimd, xsimd::sin)
62 | TRIG_SIMD_BENCH (sin_simd_approx9, math_approx::sin<9>)
63 | TRIG_SIMD_BENCH (sin_simd_approx7, math_approx::sin<7>)
64 | TRIG_SIMD_BENCH (sin_simd_approx5, math_approx::sin<5>)
65 | 
66 | TRIG_SIMD_BENCH (cos_xsimd, xsimd::cos)
67 | TRIG_SIMD_BENCH (cos_simd_approx9, math_approx::cos<9>)
68 | TRIG_SIMD_BENCH (cos_simd_approx7, math_approx::cos<7>)
69 | TRIG_SIMD_BENCH (cos_simd_approx5, math_approx::cos<5>)
70 | 
71 | TRIG_SIMD_BENCH (tan_xsimd, xsimd::tan)
72 | TRIG_SIMD_BENCH (tan_simd_approx13, math_approx::tan<13>)
73 | TRIG_SIMD_BENCH (tan_simd_approx11, math_approx::tan<11>)
74 | TRIG_SIMD_BENCH (tan_simd_approx9, math_approx::tan<9>)
75 | TRIG_SIMD_BENCH (tan_simd_approx7, math_approx::tan<7>)
76 | TRIG_SIMD_BENCH (tan_simd_approx5, math_approx::tan<5>)
77 | TRIG_SIMD_BENCH (tan_simd_approx3, math_approx::tan<3>)
78 | 
79 | BENCHMARK_MAIN();
80 | 


--------------------------------------------------------------------------------
/tools/bench/trig_turns_bench.cpp:
--------------------------------------------------------------------------------
 1 | #include <math_approx/math_approx.hpp>
 2 | #include <benchmark/benchmark.h>
 3 | #include "../test/src/reference/sincospi.hpp"
 4 | 
 5 | static constexpr size_t N = 2000;
 6 | const auto data = []
 7 | {
 8 |     std::vector<float> x;
 9 |     x.resize (N, 0.0f);
10 |     for (size_t i = 0; i < N; ++i)
11 |         x[i] = -10.0f + 20.0f * (float) i / (float) N;
12 |     return x;
13 | }();
14 | 
15 | #define TRIG_BENCH(name, func) \
16 | void name (benchmark::State& state) \
17 | { \
18 | for (auto _ : state) \
19 | { \
20 | for (auto& x : data) \
21 | { \
22 | auto y = func (x); \
23 | benchmark::DoNotOptimize (y); \
24 | } \
25 | } \
26 | } \
27 | BENCHMARK (name);
28 | 
29 | TRIG_BENCH (cos_std, std::cos)
30 | TRIG_BENCH (cos_ref, sincospi::cos2pi)
31 | TRIG_BENCH (cos_turns_approx11, math_approx::cos_turns<11>)
32 | TRIG_BENCH (cos_turns_approx9, math_approx::cos_turns<9>)
33 | TRIG_BENCH (cos_turns_approx7, math_approx::cos_turns<7>)
34 | TRIG_BENCH (cos_turns_approx5, math_approx::cos_turns<5>)
35 | 
36 | TRIG_BENCH (sin_std, std::sin)
37 | TRIG_BENCH (sin_turns_ref, sincospi::sin2pi)
38 | TRIG_BENCH (sin_turns_approx11, math_approx::sin_turns<11>)
39 | TRIG_BENCH (sin_turns_approx9, math_approx::sin_turns<9>)
40 | TRIG_BENCH (sin_turns_approx7, math_approx::sin_turns<7>)
41 | TRIG_BENCH (sin_turns_approx5, math_approx::sin_turns<5>)
42 | 
43 | #define TRIG_SIMD_BENCH(name, func) \
44 | void name (benchmark::State& state) \
45 | { \
46 | for (auto _ : state) \
47 | { \
48 | for (auto& x : data) \
49 | { \
50 | auto y = func (xsimd::broadcast (x)); \
51 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
52 | benchmark::DoNotOptimize (y); \
53 | } \
54 | } \
55 | } \
56 | BENCHMARK (name);
57 | 
58 | TRIG_SIMD_BENCH (sin_xsimd, xsimd::sin)
59 | TRIG_SIMD_BENCH (sin_turns_simd_approx11, math_approx::sin_turns<11>)
60 | TRIG_SIMD_BENCH (sin_turns_simd_approx9, math_approx::sin_turns<9>)
61 | TRIG_SIMD_BENCH (sin_turns_simd_approx7, math_approx::sin_turns<7>)
62 | TRIG_SIMD_BENCH (sin_turns_simd_approx5, math_approx::sin_turns<5>)
63 | 
64 | TRIG_SIMD_BENCH (cos_xsimd, xsimd::cos)
65 | TRIG_SIMD_BENCH (cos_turns_simd_approx11, math_approx::cos_turns<11>)
66 | TRIG_SIMD_BENCH (cos_turns_simd_approx9, math_approx::cos_turns<9>)
67 | TRIG_SIMD_BENCH (cos_turns_simd_approx7, math_approx::cos_turns<7>)
68 | TRIG_SIMD_BENCH (cos_turns_simd_approx5, math_approx::cos_turns<5>)
69 | 
70 | BENCHMARK_MAIN();
71 | 


--------------------------------------------------------------------------------
/tools/bench/wright_omega_bench.cpp:
--------------------------------------------------------------------------------
 1 | #include <math_approx/math_approx.hpp>
 2 | #include <benchmark/benchmark.h>
 3 | 
 4 | #include "../../test/src/reference/toms917.hpp"
 5 | 
 6 | static constexpr size_t N = 2000;
 7 | const auto data = []
 8 | {
 9 |     std::vector<float> x;
10 |     x.resize (N, 0.0f);
11 |     for (size_t i = 0; i < N; ++i)
12 |         x[i] = -10.0f + 40.0f * (float) i / (float) N;
13 |     return x;
14 | }();
15 | 
16 | #define WO_BENCH(name, func) \
17 | void name (benchmark::State& state) \
18 | { \
19 | for (auto _ : state) \
20 | { \
21 | for (auto& x : data) \
22 | { \
23 | auto y = func (x); \
24 | benchmark::DoNotOptimize (y); \
25 | } \
26 | } \
27 | } \
28 | BENCHMARK (name);
29 | WO_BENCH (wright_omega_toms917, toms917::wrightomega)
30 | WO_BENCH (wright_omega_iter3_poly3_logexp5, (math_approx::wright_omega<3, 3, 5>))
31 | WO_BENCH (wright_omega_iter3_poly3, (math_approx::wright_omega<3, 3>))
32 | WO_BENCH (wright_omega_iter2_poly5, (math_approx::wright_omega<2, 5>))
33 | WO_BENCH (wright_omega_iter2_poly3, (math_approx::wright_omega<2, 3>))
34 | WO_BENCH (wright_omega_iter2_poly3_logexp3, (math_approx::wright_omega<2, 3, 3>))
35 | WO_BENCH (wright_omega_iter1_poly5, (math_approx::wright_omega<1, 5>))
36 | WO_BENCH (wright_omega_iter1_poly3, (math_approx::wright_omega<1, 3>))
37 | WO_BENCH (wright_omega_iter0_poly5, (math_approx::wright_omega<0, 5>))
38 | WO_BENCH (wright_omega_iter0_poly3, (math_approx::wright_omega<0, 3>))
39 | WO_BENCH (wright_omega_dangelo2, (math_approx::wright_omega_dangelo<2>))
40 | WO_BENCH (wright_omega_dangelo1, (math_approx::wright_omega_dangelo<1>))
41 | WO_BENCH (wright_omega_dangelo0, (math_approx::wright_omega_dangelo<0>))
42 | 
43 | #define WO_SIMD_BENCH(name, func) \
44 | void name (benchmark::State& state) \
45 | { \
46 | for (auto _ : state) \
47 | { \
48 | for (auto& x : data) \
49 | { \
50 | auto y = func (xsimd::broadcast (x)); \
51 | static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
52 | benchmark::DoNotOptimize (y); \
53 | } \
54 | } \
55 | } \
56 | BENCHMARK (name);
57 | WO_SIMD_BENCH (wright_omega_simd_iter3_poly3_logexp5, (math_approx::wright_omega<3, 3, 5>))
58 | WO_SIMD_BENCH (wright_omega_simd_iter3_poly3, (math_approx::wright_omega<3, 3>))
59 | WO_SIMD_BENCH (wright_omega_simd_iter2_poly5, (math_approx::wright_omega<2, 5>))
60 | WO_SIMD_BENCH (wright_omega_simd_iter2_poly3, (math_approx::wright_omega<2, 3>))
61 | WO_SIMD_BENCH (wright_omega_simd_iter2_poly3_logexp3, (math_approx::wright_omega<2, 3, 3>))
62 | WO_SIMD_BENCH (wright_omega_simd_iter1_poly5, (math_approx::wright_omega<1, 5>))
63 | WO_SIMD_BENCH (wright_omega_simd_iter1_poly3, (math_approx::wright_omega<1, 3>))
64 | WO_SIMD_BENCH (wright_omega_simd_iter0_poly5, (math_approx::wright_omega<0, 5>))
65 | WO_SIMD_BENCH (wright_omega_simd_iter0_poly3, (math_approx::wright_omega<0, 3>))
66 | 
67 | BENCHMARK_MAIN();
68 | 


--------------------------------------------------------------------------------
/tools/plotter/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if($ENV{CI})
 2 |     message(STATUS "Configuring in CI, skipping plotter tool...")
 3 |     return()
 4 | endif()
 5 | 
 6 | CPMAddPackage(
 7 |     NAME matplotlib-cpp
 8 |     GIT_REPOSITORY https://github.com/jatinchowdhury18/matplotlib-cpp
 9 |     GIT_TAG main
10 | )
11 | 
12 | add_executable(math_approx_plotter plotter.cpp)
13 | target_link_libraries(math_approx_plotter PRIVATE matplotlib-cpp math_approx)
14 | target_compile_features(math_approx_plotter PUBLIC cxx_std_20)
15 | target_compile_definitions(math_approx_plotter PUBLIC _USE_MATH_DEFINES=1)
16 | 


--------------------------------------------------------------------------------
/tools/plotter/plotter.cpp:
--------------------------------------------------------------------------------
 1 | #include <cmath>
 2 | #include <iostream>
 3 | #include <span>
 4 | #include <vector>
 5 | 
 6 | #include <plt/matplotlibcpp.h>
 7 | namespace plt = matplotlibcpp;
 8 | 
 9 | #include "../../test/src/reference/polylogarithm.hpp"
10 | #include "../../test/src/reference/toms917.hpp"
11 | #include "../../test/src/reference/sincospi.hpp"
12 | #include "../../test/src/test_helpers.hpp"
13 | #include <math_approx/math_approx.hpp>
14 | 
15 | template <typename F_Approx>
16 | void plot_error (std::span<const float> all_floats,
17 |                  std::span<const float> y_exact,
18 |                  F_Approx&& f_approx,
19 |                  const std::string& name)
20 | {
21 |     const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
22 |     const auto error = test_helpers::compute_error<float> (y_exact, y_approx);
23 |     std::cout << "Max Error: " << test_helpers::abs_max<float> (error) << std::endl;
24 |     plt::named_plot<float, float> (name, all_floats, error);
25 | }
26 | 
27 | template <typename F_Approx>
28 | void plot_rel_error (std::span<const float> all_floats,
29 |                      std::span<const float> y_exact,
30 |                      F_Approx&& f_approx,
31 |                      const std::string& name)
32 | {
33 |     const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
34 |     const auto rel_error = test_helpers::compute_rel_error<float> (y_exact, y_approx);
35 |     std::cout << "Max Relative Error: " << test_helpers::abs_max<float> (rel_error) << std::endl;
36 |     plt::named_plot<float, float> (name, all_floats, rel_error);
37 | }
38 | 
39 | template <typename F_Approx>
40 | void plot_ulp_error (std::span<const float> all_floats,
41 |                      std::span<const float> y_exact,
42 |                      F_Approx&& f_approx,
43 |                      const std::string& name)
44 | {
45 |     const auto y_approx = test_helpers::compute_all<float> (all_floats, f_approx);
46 |     const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx);
47 |     std::cout << "Max Relative Error: " << *std::max_element (ulp_error.begin(), ulp_error.end()) << std::endl;
48 |     plt::named_plot<float, float> (name, all_floats, std::vector<float> { ulp_error.begin(), ulp_error.end() });
49 | }
50 | 
51 | template <typename F>
52 | void plot_function (std::span<const float> all_floats,
53 |                     F&& f,
54 |                     const std::string& name)
55 | {
56 |     const auto y_approx = test_helpers::compute_all (all_floats, f);
57 |     plt::named_plot<float, float> (name, all_floats, y_approx);
58 | }
59 | 
60 | template <typename T>
61 | T sigmoid_ref (T x)
62 | {
63 |     return (T) 1 / ((T) 1 + std::exp (-x));
64 | }
65 | 
66 | template <typename T>
67 | T std_sin_turns (T x)
68 | {
69 |     return std::sin ((T) 2 * (T) M_PI * x);
70 | }
71 | 
72 | #define FLOAT_FUNC(func) [] (float x) { return func (x); }
73 | 
74 | int main()
75 | {
76 |     plt::figure();
77 |     const auto range = std::make_pair (-0.5f, 0.5f);
78 |     static constexpr auto tol = 1.0e-3f;
79 | 
80 |     const auto all_floats = test_helpers::all_32_bit_floats (range.first, range.second, tol);
81 |     const auto y_exact = test_helpers::compute_all<float> (all_floats, FLOAT_FUNC (sincospi::cos2pi));
82 |     // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::sin_turns<5>) ), "sint-5");
83 |     // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::sin_turns<7>) ), "sint-7");
84 |     // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::sin_turns<9>) ), "sint-9");
85 |     plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::cos_turns<11>) ), "cost-11");
86 | 
87 |     plt::legend ({ { "loc", "upper right" } });
88 |     plt::xlim (range.first, range.second);
89 |     plt::grid (true);
90 |     plt::show();
91 | 
92 |     return 0;
93 | }
94 | 


--------------------------------------------------------------------------------