├── .clang-format ├── .github └── workflows │ └── run_tests.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake └── CPM.cmake ├── include └── math_approx │ ├── math_approx.hpp │ └── src │ ├── basic_math.hpp │ ├── hyperbolic_trig_approx.hpp │ ├── inverse_hyperbolic_trig_approx.hpp │ ├── inverse_trig_approx.hpp │ ├── log_approx.hpp │ ├── polylogarithm_approx.hpp │ ├── pow_approx.hpp │ ├── sigmoid_approx.hpp │ ├── trig_approx.hpp │ └── wright_omega_approx.hpp ├── test ├── CMakeLists.txt └── src │ ├── hyperbolic_trig_approx_test.cpp │ ├── inverse_hyperbolic_trig_approx_test.cpp │ ├── inverse_trig_approx_test.cpp │ ├── log_approx_test.cpp │ ├── polylog_approx_test.cpp │ ├── pow_approx_test.cpp │ ├── reference │ ├── polylogarithm.hpp │ ├── sincospi.hpp │ └── toms917.hpp │ ├── sigmoid_approx_test.cpp │ ├── test_helpers.hpp │ ├── trig_approx_test.cpp │ ├── trig_turns_approx_test.cpp │ └── wright_omega_approx_test.cpp └── tools ├── CMakeLists.txt ├── bench ├── CMakeLists.txt ├── hyperbolic_trig_bench.cpp ├── inverse_hyperbolic_trig_bench.cpp ├── inverse_trig_bench.cpp ├── log_bench.cpp ├── polylog_bench.cpp ├── pow_bench.cpp ├── sigmoid_bench.cpp ├── trig_bench.cpp ├── trig_turns_bench.cpp └── wright_omega_bench.cpp └── plotter ├── CMakeLists.txt └── plotter.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | AccessModifierOffset: -4 3 | AlignAfterOpenBracket: Align 4 | AlignConsecutiveAssignments: false 5 | AlignConsecutiveDeclarations: false 6 | AlignEscapedNewlines: Left 7 | AlignOperands: Align 8 | AlignTrailingComments: false 9 | AllowAllParametersOfDeclarationOnNextLine: false 10 | AllowShortBlocksOnASingleLine: Never 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: All 13 | AllowShortIfStatementsOnASingleLine: Never 14 | AllowShortLoopsOnASingleLine: false 15 | AlwaysBreakAfterDefinitionReturnType: None 16 | AlwaysBreakAfterReturnType: None 17 | AlwaysBreakBeforeMultilineStrings: false 18 | AlwaysBreakTemplateDeclarations: Yes 19 | BinPackArguments: false 20 | BinPackParameters: false 21 | BreakAfterJavaFieldAnnotations: false 22 | BreakBeforeBinaryOperators: NonAssignment 23 | BreakBeforeBraces: Allman 24 | BreakBeforeTernaryOperators: true 25 | BreakConstructorInitializersBeforeComma: false 26 | BreakStringLiterals: false 27 | ColumnLimit: 0 28 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 29 | ConstructorInitializerIndentWidth: 4 30 | ContinuationIndentWidth: 4 31 | Cpp11BracedListStyle: false 32 | DerivePointerAlignment: false 33 | DisableFormat: false 34 | ExperimentalAutoDetectBinPacking: false 35 | ForEachMacros: ['forEachXmlChildElement'] 36 | IndentCaseLabels: true 37 | IndentWidth: 4 38 | IndentWrappedFunctionNames: true 39 | KeepEmptyLinesAtTheStartOfBlocks: false 40 | Language: Cpp 41 | MaxEmptyLinesToKeep: 1 42 | NamespaceIndentation: Inner 43 | PointerAlignment: Left 44 | ReflowComments: false 45 | SortIncludes: true 46 | SpaceAfterCStyleCast: true 47 | SpaceAfterLogicalNot: true 48 | SpaceBeforeAssignmentOperators: true 49 | SpaceBeforeCpp11BracedList: true 50 | SpaceBeforeParens: NonEmptyParentheses 51 | SpaceInEmptyParentheses: false 52 | SpaceBeforeInheritanceColon: true 53 | SpacesInAngles: false 54 | SpacesInCStyleCastParentheses: false 55 | SpacesInContainerLiterals: true 56 | SpacesInParentheses: false 57 | SpacesInSquareBrackets: false 58 | Standard: "c++17" 59 | TabWidth: 4 60 | UseTab: Never 61 | --- 62 | Language: ObjC 63 | BasedOnStyle: Chromium 64 | AlignTrailingComments: true 65 | BreakBeforeBraces: Allman 66 | ColumnLimit: 0 67 | IndentWidth: 4 68 | KeepEmptyLinesAtTheStartOfBlocks: false 69 | ObjCSpaceAfterProperty: true 70 | ObjCSpaceBeforeProtocolList: true 71 | PointerAlignment: Left 72 | SpacesBeforeTrailingComments: 1 73 | TabWidth: 4 74 | UseTab: Never 75 | ... 76 | -------------------------------------------------------------------------------- /.github/workflows/run_tests.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | workflow_dispatch: 12 | 13 | 14 | jobs: 15 | build_and_test: 16 | name: "Run tests on ${{ matrix.os }}" 17 | runs-on: ${{ matrix.os }} 18 | strategy: 19 | fail-fast: false # show all errors for each platform (vs. cancel jobs on error) 20 | matrix: 21 | os: [ubuntu-latest, macos-13, windows-latest] 22 | 23 | steps: 24 | # - name: Install Linux Deps 25 | # if: runner.os == 'Linux' 26 | # run: | 27 | # sudo apt-get update 28 | # sudo apt install libasound2-dev libcurl4-openssl-dev libx11-dev libxinerama-dev libxext-dev libfreetype6-dev libwebkit2gtk-4.0-dev libglu1-mesa-dev libsamplerate-dev 29 | 30 | - name: Get latest CMake 31 | uses: lukka/get-cmake@latest 32 | 33 | - name: Checkout code 34 | uses: actions/checkout@v2 35 | 36 | - name: CMake Configure 37 | run: cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo 38 | 39 | - name: CMake Build 40 | run: cmake --build build --config RelWithDebInfo --parallel 41 | 42 | - name: CMake Test 43 | run: | 44 | ctest --test-dir build -C RelWithDebInfo --show-only 45 | ctest --test-dir build -C RelWithDebInfo -j2 --output-on-failure 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .vscode/ 3 | .focus-config 4 | *.raddbg_project 5 | 6 | build*/ 7 | *.zip 8 | 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(math_approx VERSION 1.0.0 LANGUAGES CXX) 3 | 4 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake/) 5 | 6 | if(PROJECT_IS_TOP_LEVEL) 7 | include(CPM) 8 | CPMAddPackage( 9 | NAME xsimd 10 | GIT_REPOSITORY https://github.com/xtensor-stack/xsimd 11 | GIT_TAG master 12 | ) 13 | endif() 14 | 15 | add_library(math_approx INTERFACE) 16 | target_include_directories(math_approx INTERFACE include) 17 | if(MSVC) 18 | target_compile_definitions(math_approx INTERFACE _USE_MATH_DEFINES=1) 19 | if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) 20 | target_compile_options(math_approx INTERFACE -msse4.2) 21 | else() 22 | target_compile_options(math_approx INTERFACE /arch:SSE4.2) 23 | endif() 24 | else() 25 | target_compile_options(math_approx INTERFACE -msse4.2 -Wno-unused-command-line-argument) 26 | endif() 27 | 28 | if (TARGET xsimd) 29 | message(STATUS "math_approx -- Linking with XSIMD...") 30 | target_link_libraries(math_approx INTERFACE xsimd) 31 | target_compile_definitions(math_approx INTERFACE MATH_APPROX_XSIMD_TARGET=1) 32 | endif() 33 | 34 | if(PROJECT_IS_TOP_LEVEL) 35 | include(CTest) 36 | add_subdirectory(test) 37 | add_subdirectory(tools) 38 | endif() 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, jatinchowdhury18 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # math_approx 2 | 3 | [![Test](https://github.com/Chowdhury-DSP/math_approx/actions/workflows/run_tests.yml/badge.svg)](https://github.com/Chowdhury-DSP/math_approx/actions/workflows/run_tests.yml) 4 | 5 | 6 | `math_approx` is a C++ library for math approximations. 7 | 8 | Currently supported: 9 | 10 | - sin/cos/tan 11 | - sin(2*pi*x)/cos(2*pi*x) 12 | - arcsin/arccos/arctan 13 | - exp/exp2/exp10/expm1 14 | - log/log2/log10/log1p 15 | - sinh/cosh/tanh 16 | - arcsinh/arccosh/arctanh 17 | - [Sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function) 18 | - [Wright-Omega function](https://en.wikipedia.org/wiki/Wright_omega_function) 19 | - [Dilogarithm function](https://en.wikipedia.org/wiki/Dilogarithm) 20 | 21 | At the moment, most of these implementations have been "good enough" 22 | for my own use cases (both in terms of performance and accuracy). That 23 | said, I definitely believe that it's possible to achieve better results 24 | for many of these functions. If you have ideas for improving these 25 | approximations, either by: 26 | - Modifying an approximation to achieve better accuracy with the same (or similar) performance 27 | - Modifying an approximation to achieve better performance with the same (or similar) accuracy 28 | 29 | then please get in touch with a GitHub issue or pull request! 30 | 31 | ## Usage 32 | 33 | ### With CMake 34 | 35 | `math_approx` is set up as a CMake `INTERFACE` library. To use it as 36 | such, you'll need to add the following to your `CMakeLists.txt` file: 37 | 38 | ```cmake 39 | add_subdirectory(math_approx) 40 | target_link_libraries( math_approx) 41 | ``` 42 | 43 | And then in your C++ code, you can use the approximations like so: 44 | 45 | ```cpp 46 | #include 47 | 48 | constexpr auto sin_half = math_approx::sin<5> (0.5f); 49 | ``` 50 | 51 | ### Without CMake 52 | 53 | To use `math_approx` without CMake, you'll need to add 54 | `/path/to/repo/include` to your include path. If you're 55 | compiling your program with MSVC, you may also need to 56 | add the pre-processor definition `_USE_MATH_DEFINES`. 57 | 58 | ## A few other thoughts 59 | 60 | ### Accuracy vs. Performance 61 | 62 | Most of the methods in this library are provided with template 63 | arguments which control the "order" of the approximation. The 64 | "order" typically refers to the order of a polynomial used in 65 | the approximation. In general, higher-order approximations will 66 | achieve greater accuracy, while taking longer to compute. 67 | 68 | ### Showing My Work 69 | 70 | Since the approximations in this library are primarily based on 71 | polynomial approximations, I've tried to provide the details 72 | for how those polynomials were derived, by providing a zipped 73 | folder containing the Mathematica notebooks that were used to 74 | derive the polynomials. Since not everyone has access to 75 | Mathematica, the folder also contains a PDF version of each 76 | notebook. At the moment, I'm planning to upload an updated 77 | copy of the zipped folder with each release of the library, 78 | but if I can think of a better method of distribution, that 79 | doesn't involve adding the notebook files to the repository 80 | directly, I'll do that instead. 81 | 82 | ### Measuring Accuracy 83 | 84 | This library uses three approaches for measuring accuracy: 85 | - Absolute error (`Error = |actual - approx|`) 86 | - Relative error (`Error = |(actual - approx) / actual|`) 87 | - [ULP Distance](https://en.wikipedia.org/wiki/Unit_in_the_last_place#:~:text=In%20computer%20science%20and%20numerical,of%20accuracy%20in%20numeric%20calculations.) 88 | 89 | ### Single vs. Double Precision 90 | 91 | At the moment, the approximations in this library have been 92 | primarily tested with single-precision floating-point numbers. 93 | It is possible (maybe even likely) that most of the approximations 94 | do not achieve sufficient accuracy for double-precision computations. 95 | 96 | ### C++ Standard 97 | 98 | The library has been mostly developed and tested with C++20, with 99 | a little bit of effort to provide compatibility with C++17. 100 | Personally, I would rather not extend support to C++14 or earlier. 101 | 102 | ### SIMD 103 | 104 | These approximations are intended to work for both scalar floating-point 105 | data types, as well as SIMD floating-point data types. At the moment, 106 | the library is set up to be compatible with the [XSIMD library](https://github.com/xtensor-stack/xsimd). 107 | That said, I would like to make it as easy as possible to use this 108 | library with other SIMD libraries (or matrix math libraries), so if 109 | anyone has some suggestions, please let me know! 110 | 111 | ### Constexpr 112 | 113 | The majority of the approximations in this library are implemented 114 | so as to be constexpr-compatible. That said, there are some 115 | approximations that are only constexpr if the compiler supports 116 | `std::bit_cast` (typically C++20 and later), and some that cannot 117 | be made constexpr because they depend on `std::sqrt`. If someone 118 | knows of any portable constexpr-compatible implementations of these 119 | methods, I would be happy to add them to the library! 120 | 121 | ## License 122 | 123 | `math_approx` is open source, and is licensed under the 124 | BSD 3-clause license. 125 | 126 | Enjoy! 127 | -------------------------------------------------------------------------------- /cmake/CPM.cmake: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | # 3 | # SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors 4 | 5 | set(CPM_DOWNLOAD_VERSION 0.40.2) 6 | set(CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d") 7 | 8 | if(CPM_SOURCE_CACHE) 9 | set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") 10 | elseif(DEFINED ENV{CPM_SOURCE_CACHE}) 11 | set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") 12 | else() 13 | set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") 14 | endif() 15 | 16 | # Expand relative path. This is important if the provided path contains a tilde (~) 17 | get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) 18 | 19 | file(DOWNLOAD 20 | https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake 21 | ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM} 22 | ) 23 | 24 | include(${CPM_DOWNLOAD_LOCATION}) 25 | -------------------------------------------------------------------------------- /include/math_approx/math_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace math_approx 4 | { 5 | } 6 | 7 | #include "src/basic_math.hpp" 8 | 9 | #include "src/trig_approx.hpp" 10 | #include "src/inverse_trig_approx.hpp" 11 | #include "src/pow_approx.hpp" 12 | #include "src/log_approx.hpp" 13 | #include "src/hyperbolic_trig_approx.hpp" 14 | #include "src/inverse_hyperbolic_trig_approx.hpp" 15 | #include "src/sigmoid_approx.hpp" 16 | #include "src/wright_omega_approx.hpp" 17 | #include "src/polylogarithm_approx.hpp" 18 | -------------------------------------------------------------------------------- /include/math_approx/src/basic_math.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // If MATH_APPROX_XSIMD_TARGET is not defined 4 | // the user can still use XSIMD by manually including 5 | // it before including the math_approx header. 6 | #if MATH_APPROX_XSIMD_TARGET 7 | #include 8 | #endif 9 | 10 | #if ! defined(XSIMD_HPP) 11 | #include 12 | #endif 13 | 14 | #include 15 | #include 16 | 17 | namespace math_approx 18 | { 19 | template 20 | struct scalar_of 21 | { 22 | using type = T; 23 | }; 24 | 25 | /** 26 | * When T is a scalar floating-point type, scalar_of_t is T. 27 | * When T is a SIMD floating-point type, scalar_of_t is the corresponding scalar type. 28 | */ 29 | template 30 | using scalar_of_t = typename scalar_of::type; 31 | 32 | /** Inverse square root */ 33 | template 34 | T rsqrt (T x) 35 | { 36 | // @TODO: figure out a way that we can make this method constexpr 37 | 38 | // sqrtss followed by divss... this seems to measure a bit faster than the rsqrtss plus NR iteration below 39 | return (T) 1 / std::sqrt (x); 40 | 41 | // fast inverse square root (using rsqrtss hardware instruction), plus one Newton-Raphson iteration 42 | // auto r = xsimd::rsqrt (xsimd::broadcast (x)).get (0); 43 | // x *= r; 44 | // x *= r; 45 | // x += -3.0f; 46 | // r *= -0.5f; 47 | // return x * r; 48 | } 49 | 50 | /** Function interface for the ternary operator. */ 51 | template 52 | T select (bool q, T t, T f) 53 | { 54 | return q ? t : f; 55 | } 56 | 57 | #if defined(XSIMD_HPP) 58 | template 59 | struct scalar_of> 60 | { 61 | using type = T; 62 | }; 63 | 64 | /** Inverse square root */ 65 | template 66 | xsimd::batch rsqrt (xsimd::batch x) 67 | { 68 | using S = scalar_of_t; 69 | auto r = xsimd::rsqrt (x); 70 | x *= r; 71 | x *= r; 72 | x += (S) -3; 73 | r *= (S) -0.5; 74 | return x * r; 75 | } 76 | 77 | /** Function interface for the ternary operator. */ 78 | template 79 | xsimd::batch select (xsimd::batch_bool q, xsimd::batch t, xsimd::batch f) 80 | { 81 | return xsimd::select (q, t, f); 82 | } 83 | #endif 84 | 85 | #if ! __cpp_lib_bit_cast 86 | // bit_cast requirement. 87 | template 88 | using is_bitwise_castable = std::integral_constant::value && std::is_trivially_copyable::value>; 90 | 91 | // compiler support is needed for bitwise copy with constexpr. 92 | template 93 | inline typename std::enable_if::value, To>::type bit_cast (const From& from) noexcept 94 | { 95 | union U 96 | { 97 | U() {}; 98 | char storage[sizeof (To)] {}; 99 | typename std::remove_const::type dest; 100 | } u; // instead of To dest; because To doesn't require DefaultConstructible. 101 | std::memcpy (&u.dest, &from, sizeof from); 102 | return u.dest; 103 | } 104 | #else 105 | using std::bit_cast; 106 | #endif 107 | } // namespace math_approx 108 | -------------------------------------------------------------------------------- /include/math_approx/src/hyperbolic_trig_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pow_approx.hpp" 4 | 5 | namespace math_approx 6 | { 7 | // ref: https://en.wikipedia.org/wiki/Hyperbolic_functions#Definitions 8 | // sinh = (e^(2x) - 1) / (2e^x), cosh = (e^(2x) + 1) / (2e^x) 9 | // let B = e^x, then sinh = (B^2 - 1) / (2B), cosh = (B^2 + 1) / (2B) 10 | // simplifying, we get: sinh = 0.5 (B - 1/B), cosh = 0.5 (B + 1/B) 11 | 12 | /** Approximation of sinh(x), using exp(x) internally */ 13 | template 14 | constexpr T sinh (T x) 15 | { 16 | using S = scalar_of_t; 17 | auto B = exp (x); 18 | auto Br = (S) 0.5 / B; 19 | B *= (S) 0.5; 20 | return B - Br; 21 | } 22 | 23 | /** Approximation of cosh(x), using exp(x) internally */ 24 | template 25 | constexpr T cosh (T x) 26 | { 27 | using S = scalar_of_t; 28 | auto B = exp (x); 29 | auto Br = (S) 0.5 / B; 30 | B *= (S) 0.5; 31 | return B + Br; 32 | } 33 | 34 | /** 35 | * Simultaneous pproximation of sinh(x) and cosh(x), 36 | * using exp(x) internally. 37 | * 38 | * For more information see the comments above. 39 | */ 40 | template 41 | constexpr auto sinh_cosh (T x) 42 | { 43 | using S = scalar_of_t; 44 | auto B = exp (x); 45 | auto Br = (S) 0.5 / B; 46 | B *= (S) 0.5; 47 | 48 | auto sinh = B - Br; 49 | auto cosh = B + Br; 50 | 51 | return std::make_pair (sinh, cosh); 52 | } 53 | 54 | namespace tanh_detail 55 | { 56 | // See notebooks/tanh_approx.nb for the derivation of these polynomials 57 | 58 | template 59 | constexpr T tanh_poly_11 (T x) 60 | { 61 | using S = scalar_of_t; 62 | const auto x_sq = x * x; 63 | const auto y_9_11 = (S) 2.63661358122e-6 + (S) 3.33765558362e-8 * x_sq; 64 | const auto y_7_9_11 = (S) 0.000199027336899 + y_9_11 * x_sq; 65 | const auto y_5_7_9_11 = (S) 0.00833223857843 + y_7_9_11 * x_sq; 66 | const auto y_3_5_7_9_11 = (S) 0.166667159320 + y_5_7_9_11 * x_sq; 67 | const auto y_1_3_5_7_9_11 = (S) 1 + y_3_5_7_9_11 * x_sq; 68 | return x * y_1_3_5_7_9_11; 69 | } 70 | 71 | template 72 | constexpr T tanh_poly_9 (T x) 73 | { 74 | using S = scalar_of_t; 75 | const auto x_sq = x * x; 76 | const auto y_7_9 = (S) 0.000192218110330 + (S) 3.54808622170e-6 * x_sq; 77 | const auto y_5_7_9 = (S) 0.00834777254865 + y_7_9 * x_sq; 78 | const auto y_3_5_7_9 = (S) 0.166658873283 + y_5_7_9 * x_sq; 79 | const auto y_1_3_5_7_9 = (S) 1 + y_3_5_7_9 * x_sq; 80 | return x * y_1_3_5_7_9; 81 | } 82 | 83 | template 84 | constexpr T tanh_poly_7 (T x) 85 | { 86 | using S = scalar_of_t; 87 | const auto x_sq = x * x; 88 | const auto y_5_7 = (S) 0.00818199927912 + (S) 0.000243153287690 * x_sq; 89 | const auto y_3_5_7 = (S) 0.166769941467 + y_5_7 * x_sq; 90 | const auto y_1_3_5_7 = (S) 1 + y_3_5_7 * x_sq; 91 | return x * y_1_3_5_7; 92 | } 93 | 94 | template 95 | constexpr T tanh_poly_5 (T x) 96 | { 97 | using S = scalar_of_t; 98 | const auto x_sq = x * x; 99 | const auto y_3_5 = (S) 0.165326984031 + (S) 0.00970240200826 * x_sq; 100 | const auto y_1_3_5 = (S) 1 + y_3_5 * x_sq; 101 | return x * y_1_3_5; 102 | } 103 | 104 | template 105 | constexpr T tanh_poly_3 (T x) 106 | { 107 | using S = scalar_of_t; 108 | const auto x_sq = x * x; 109 | const auto y_1_3 = (S) 1 + (S) 0.183428244899 * x_sq; 110 | return x * y_1_3; 111 | } 112 | } // namespace tanh_detail 113 | 114 | /** 115 | * Approximation of tanh(x), using tanh(x) ≈ p(x) / (p(x)^2 + 1), 116 | * where p(x) is an odd polynomial fit to minimize the maxinimum relative error. 117 | */ 118 | template 119 | T tanh (T x) 120 | { 121 | static_assert (order % 2 == 1 && order <= 11 && order >= 3, "Order must e an odd number within [3, 11]"); 122 | 123 | T x_poly {}; 124 | if constexpr (order == 11) 125 | x_poly = tanh_detail::tanh_poly_11 (x); 126 | else if constexpr (order == 9) 127 | x_poly = tanh_detail::tanh_poly_9 (x); 128 | else if constexpr (order == 7) 129 | x_poly = tanh_detail::tanh_poly_7 (x); 130 | else if constexpr (order == 5) 131 | x_poly = tanh_detail::tanh_poly_5 (x); 132 | else if constexpr (order == 3) 133 | x_poly = tanh_detail::tanh_poly_3 (x); 134 | 135 | using S = scalar_of_t; 136 | return x_poly * rsqrt (x_poly * x_poly + (S) 1); 137 | } 138 | } // namespace math_approx 139 | -------------------------------------------------------------------------------- /include/math_approx/src/inverse_hyperbolic_trig_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "basic_math.hpp" 4 | #include "log_approx.hpp" 5 | 6 | namespace math_approx 7 | { 8 | struct AsinhLog2Provider 9 | { 10 | // for polynomial derivations, see notebooks/asinh_approx.nb 11 | 12 | /** approximation for log2(x), optimized on the range [1, 2], to be used within an asinh(x) computation */ 13 | template 14 | static constexpr T log2_approx (T x) 15 | { 16 | static_assert (order >= 3 && order <= 5); 17 | using S = scalar_of_t; 18 | 19 | const auto x_sq = x * x; 20 | if constexpr (order == 3) 21 | { 22 | const auto x_2_3 = (S) -1.21535595794871 + (S) 0.194363894384581 * x; 23 | const auto x_0_1 = (S) -2.26452854958994 + (S) 3.28552061315407 * x; 24 | return x_0_1 + x_2_3 * x_sq; 25 | } 26 | else if constexpr (order == 4) 27 | { 28 | const auto x_3_4 = (S) 0.770443387059628 + (S) -0.102652345633016 * x; 29 | const auto x_1_2 = (S) 4.33013912645867 + (S) -2.39448588379361 * x; 30 | const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; 31 | return (S) -2.60344428409168 + x_1_2_3_4 * x; 32 | } 33 | else if constexpr (order == 5) 34 | { 35 | const auto x_4_5 = (S) -0.511946284688366 + (S) 0.0578217518982235 * x; 36 | const auto x_2_3 = (S) -3.94632584968643 + (S) 1.90796087279737 * x; 37 | const auto x_0_1 = (S) -2.87748189127908 + (S) 5.36997140095829 * x; 38 | const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; 39 | return x_0_1 + x_2_3_4_5 * x_sq; 40 | } 41 | else 42 | { 43 | return {}; 44 | } 45 | } 46 | }; 47 | 48 | /** 49 | * Approximation of asinh(x) in the full range, using identity 50 | * asinh(x) = log(x + sqrt(x^2 + 1)). 51 | * 52 | * Orders 6 and 7 use an additional Newton-Raphson iteration, 53 | * but for most cases the accuracy improvement is not worth 54 | * the additional cost (when compared to the performance and 55 | * accuracy achieved by the STL implementation). 56 | */ 57 | template 58 | constexpr T asinh (T x) 59 | { 60 | using S = scalar_of_t; 61 | using std::abs, std::sqrt; 62 | #if defined(XSIMD_HPP) 63 | using xsimd::abs, xsimd::sqrt; 64 | #endif 65 | 66 | const auto sign = select (x > (S) 0, (T) (S) 1, select (x < (S) 0, (T) (S) -1, (T) (S) 0)); 67 | x = abs (x); 68 | 69 | const auto log_arg = x + sqrt (x * x + (S) 1); 70 | auto y = log>, std::min (order, 5), false, AsinhLog2Provider> (log_arg); 71 | 72 | if constexpr (order > 5) 73 | { 74 | const auto exp_y = math_approx::exp (y); 75 | y -= (exp_y - log_arg) / exp_y; 76 | } 77 | 78 | return sign * y; 79 | } 80 | 81 | /** 82 | * Approximation of acosh(x) in the full range, using identity 83 | * acosh(x) = log(x + sqrt(x^2 - 1)). 84 | */ 85 | template 86 | constexpr T acosh (T x) 87 | { 88 | using S = scalar_of_t; 89 | using std::sqrt; 90 | #if defined(XSIMD_HPP) 91 | using xsimd::sqrt; 92 | #endif 93 | 94 | const auto z1 = x + sqrt (x * x - (S) 1); 95 | return log (z1); 96 | } 97 | 98 | /** 99 | * Approximation of atanh(x), using identity 100 | * atanh(x) = (1/2) log((x + 1) / (x - 1)). 101 | */ 102 | template 103 | constexpr T atanh (T x) 104 | { 105 | using S = scalar_of_t; 106 | return (S) 0.5 * log (((S) 1 + x) / ((S) 1 - x)); 107 | } 108 | } // namespace math_approx 109 | -------------------------------------------------------------------------------- /include/math_approx/src/inverse_trig_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "basic_math.hpp" 4 | 5 | namespace math_approx 6 | { 7 | namespace inv_trig_detail 8 | { 9 | // for polynomial derivations, see notebooks/asin_acos_approx.nb 10 | 11 | template 12 | constexpr T asin_kernel (T x) 13 | { 14 | using S = scalar_of_t; 15 | static_assert (order >= 1 && order <= 4); 16 | 17 | if constexpr (order == 1) 18 | { 19 | return (S) 0.16443531037029196495 + x * (S) 0.097419577664394046979; 20 | } 21 | else if constexpr (order == 2) 22 | { 23 | return (S) 0.16687742065041710759 + x * ((S) 0.070980446338571381859 + x * (S) 0.066682760821292624831); 24 | } 25 | else if constexpr (order == 3) 26 | { 27 | return (S) 0.16665080061757006624 + x * ((S) 0.075508850204912977833 + x * ((S) 0.039376231206556484843 + x * (S) 0.051275338699694958389)); 28 | } 29 | else if constexpr (order == 4) 30 | { 31 | return (S) 0.16666803275183153521 + x * ((S) 0.074936964020844071266 + x * ((S) 0.045640288439217274741 + x * ((S) 0.023435504410713306478 + x * (S) 0.043323710842752508055))); 32 | } 33 | else 34 | { 35 | return {}; 36 | } 37 | } 38 | 39 | template 40 | constexpr T acos_kernel (T x) 41 | { 42 | using S = scalar_of_t; 43 | static_assert (order >= 1 && order <= 5); 44 | 45 | if constexpr (order == 1) 46 | { 47 | return (S) 0.061454830783555181029 + x * (S) 0.50934149601134137697; 48 | } 49 | else if constexpr (order == 2) 50 | { 51 | return (S) 0.18188825560430002537 + x * ((S) -0.092825628092384385170 + x * (S) 0.48173369928298098719); 52 | } 53 | else if constexpr (order == 3) 54 | { 55 | return (S) 0.16480511788348814473 + x * ((S) 0.11286070199090997290 + x * ((S) -0.18795205899643871450 + x * (S) 0.48108256591693704385)); 56 | } 57 | else if constexpr (order == 4) 58 | { 59 | return (S) 0.16687235373875186628 + x * ((S) 0.068412956842158992310 + x * ((S) 0.11466969910945928879 + x * ((S) -0.27433862418620241774 + x * (S) 0.49517994129072917531))); 60 | } 61 | else if constexpr (order == 5) 62 | { 63 | return (S) 0.16664924406383360700 + x * ((S) 0.075837825275592588015 + x * ((S) 0.030665158374004904823 + x * ((S) 0.13572846625592635550 + x * ((S) -0.34609357317006372856 + x * (S) 0.50800920599560273061)))); 64 | } 65 | else 66 | { 67 | return {}; 68 | } 69 | } 70 | 71 | // for polynomial derivations, see notebooks/arctan_approx.nb 72 | 73 | template 74 | constexpr T atan_kernel (T x) 75 | { 76 | using S = scalar_of_t; 77 | static_assert (order >= 4 && order <= 7); 78 | 79 | if constexpr (order == 4) 80 | { 81 | const auto x_sq = x * x; 82 | const auto num = x + x_sq * (S) 0.498001992540; 83 | const auto den = (S) 1 + x * (S) 0.481844539675 + x_sq * (S) 0.425470835319; 84 | return num / den; 85 | } 86 | else if constexpr (order == 5 || order == 6) 87 | { 88 | const auto x_sq = x * x; 89 | const auto num = (S) 0.177801521472 + x * (S) 0.116983970701; 90 | const auto den = (S) 1 + x * (S) 0.174763903018 + x_sq * (S) 0.473808187566; 91 | return (x + x_sq * num) / den; 92 | } 93 | else if constexpr (order == 7) 94 | { 95 | const auto x_sq = x * x; 96 | const auto num = (S) 0.274959104817 + (S) 0.351814748865 * x + (S) -0.0395798531406 * x_sq; 97 | const auto den = (S) 1 + x * ((S) 0.275079063405 + x * ((S) 0.683311392128 + x * (S) 0.0624877111229)); 98 | return (x + x_sq * num) / den; 99 | } 100 | else 101 | { 102 | return {}; 103 | } 104 | } 105 | } // namespace inv_trig_detail 106 | 107 | /** 108 | * Approximation of asin(x) using asin(x) ≈ p(x^2) * x^3 + x for x in [0, 0.5], 109 | * and asin(x) ≈ pi/2 - p((1-x)/2) * ((1-x)/2)^3/2 + ((1-x)/2)^1/2 for x in [0.5, 1], 110 | * where p(x) is a polynomial fit to achieve the minimum absolute error. 111 | */ 112 | template 113 | T asin (T x) 114 | { 115 | using S = scalar_of_t; 116 | 117 | using std::abs, std::sqrt; 118 | #if defined(XSIMD_HPP) 119 | using xsimd::abs, xsimd::sqrt; 120 | #endif 121 | 122 | const auto abs_x = abs (x); 123 | 124 | const auto reflect = abs_x > (S) 0.5; 125 | auto z0 = select (reflect, (S) 0.5 * ((S) 1 - abs_x), abs_x * abs_x); 126 | 127 | auto x2 = select (reflect, sqrt (z0), abs_x); 128 | auto z1 = inv_trig_detail::asin_kernel (z0); 129 | 130 | auto z2 = z1 * (z0 * x2) + x2; 131 | auto res = select (reflect, (S) M_PI_2 - (z2 + z2), z2); 132 | return select (x > (S) 0, res, -res); 133 | } 134 | 135 | /** 136 | * Approximation of acos(x) using the same approach as asin(x), 137 | * but with a different polynomial fit. 138 | */ 139 | template 140 | T acos (T x) 141 | { 142 | using S = scalar_of_t; 143 | 144 | using std::abs, std::sqrt; 145 | #if defined(XSIMD_HPP) 146 | using xsimd::abs, xsimd::sqrt; 147 | #endif 148 | 149 | const auto abs_x = abs (x); 150 | 151 | const auto reflect = abs_x > (S) 0.5; 152 | auto z0 = select (reflect, (S) 0.5 * ((S) 1 - abs_x), abs_x * abs_x); 153 | 154 | auto x2 = select (reflect, sqrt (z0), abs_x); 155 | auto z1 = inv_trig_detail::acos_kernel (z0); 156 | 157 | auto z2 = z1 * (z0 * x2) + x2; 158 | auto res = select (reflect, (S) M_PI_2 - (z2 + z2), z2); 159 | return (S) M_PI_2 - select (x > (S) 0, res, -res); 160 | } 161 | 162 | /** 163 | * Approximation of atan(x) using a polynomial approximation of arctan(x) on [0, 1], 164 | * and arctan(x) = pi/2 - arctan(1/x) for x > 1. 165 | */ 166 | template 167 | T atan (T x) 168 | { 169 | using S = scalar_of_t; 170 | 171 | using std::abs, std::sqrt; 172 | #if defined(XSIMD_HPP) 173 | using xsimd::abs, xsimd::sqrt; 174 | #endif 175 | 176 | const auto abs_x = abs (x); 177 | const auto reflect = abs_x > (S) 1; 178 | 179 | const auto z = select (reflect, (S) 1 / abs_x, abs_x); 180 | const auto atan_01 = inv_trig_detail::atan_kernel (z); 181 | 182 | const auto res = select (reflect, (S) M_PI_2 - atan_01, atan_01); 183 | return select (x > (S) 0, res, -res); 184 | } 185 | } // namespace math_approx 186 | -------------------------------------------------------------------------------- /include/math_approx/src/log_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "basic_math.hpp" 4 | #include "pow_approx.hpp" 5 | 6 | namespace math_approx 7 | { 8 | namespace log_detail 9 | { 10 | struct Log2Provider 11 | { 12 | // for polynomial derivations, see notebooks/log_approx.nb 13 | 14 | /** approximation for log2(x), optimized on the range [1, 2] */ 15 | template 16 | static constexpr T log2_approx (T x) 17 | { 18 | static_assert (order >= 3 && order <= 6); 19 | using S = scalar_of_t; 20 | 21 | const auto x_sq = x * x; 22 | if constexpr (C1_continuous) 23 | { 24 | if constexpr (order == 3) 25 | { 26 | const auto x_2_3 = (S) -1.09886528622 + (S) 0.164042561333 * x; 27 | const auto x_0_1 = (S) -2.21347520444 + (S) 3.14829792933 * x; 28 | return x_0_1 + x_2_3 * x_sq; 29 | } 30 | else if constexpr (order == 4) 31 | { 32 | const auto x_3_4 = (S) 0.671618567027 + (S) -0.0845960009489 * x; 33 | const auto x_1_2 = (S) 4.16344994072 + (S) -2.19861329856 * x; 34 | const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; 35 | return (S) -2.55185920824 + x_1_2_3_4 * x; 36 | } 37 | else if constexpr (order == 5) 38 | { 39 | const auto x_4_5 = (S) -0.432338320780 + (S) 0.0464481811023 * x; 40 | const auto x_2_3 = (S) -3.65368350361 + (S) 1.68976432066 * x; 41 | const auto x_0_1 = (S) -2.82807214111 + (S) 5.17788146374 * x; 42 | const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; 43 | return x_0_1 + x_2_3_4_5 * x_sq; 44 | } 45 | else if constexpr (order == 6) 46 | { 47 | const auto x_5_6 = (S) 0.284794437502 + (S) -0.0265448504094 * x; 48 | const auto x_3_4 = (S) 3.38542517475 + (S) -1.31007090775 * x; 49 | const auto x_1_2 = (S) 6.19242937536 + (S) -5.46521465640 * x; 50 | const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; 51 | const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; 52 | return (S) -3.06081857306 + x_1_2_3_4_5_6 * x; 53 | } 54 | else 55 | { 56 | return {}; 57 | } 58 | } 59 | else 60 | { 61 | if constexpr (order == 3) 62 | { 63 | const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x; 64 | const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x; 65 | return x_0_1 + x_2_3 * x_sq; 66 | } 67 | else if constexpr (order == 4) 68 | { 69 | const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x; 70 | const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x; 71 | const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; 72 | return (S) -2.51982743265 + x_1_2_3_4 * x; 73 | } 74 | else if constexpr (order == 5) 75 | { 76 | const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x; 77 | const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x; 78 | const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x; 79 | const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; 80 | return x_0_1 + x_2_3_4_5 * x_sq; 81 | } 82 | else if constexpr (order == 6) 83 | { 84 | const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x; 85 | const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x; 86 | const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x; 87 | const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; 88 | const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; 89 | return (S) -3.04376925958 + x_1_2_3_4_5_6 * x; 90 | } 91 | else 92 | { 93 | return {}; 94 | } 95 | } 96 | } 97 | }; 98 | } 99 | 100 | #if defined(__GNUC__) 101 | #pragma GCC diagnostic push 102 | #pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning 103 | #pragma GCC diagnostic ignored "-Wuninitialized" 104 | #endif 105 | 106 | /** approximation for log(Base, x) (32-bit) */ 107 | template 108 | constexpr float log (float x) 109 | { 110 | const auto vi = bit_cast (x); 111 | const auto ex = vi & 0x7f800000; 112 | const auto e = (ex >> 23) - 127; 113 | const auto vfi = (vi - ex) | 0x3f800000; 114 | const auto vf = bit_cast (vfi); 115 | 116 | constexpr auto log2_base_r = 1.0f / Base::log2_base; 117 | return log2_base_r * ((float) e + Log2ProviderType::template log2_approx (vf)); 118 | } 119 | 120 | /** approximation for log(x) (64-bit) */ 121 | template 122 | constexpr double log (double x) 123 | { 124 | const auto vi = bit_cast (x); 125 | const auto ex = vi & 0x7ff0000000000000; 126 | const auto e = (ex >> 52) - 1023; 127 | const auto vfi = (vi - ex) | 0x3ff0000000000000; 128 | const auto vf = bit_cast (vfi); 129 | 130 | constexpr auto log2_base_r = 1.0 / Base::log2_base; 131 | return log2_base_r * ((double) e + Log2ProviderType::template log2_approx (vf)); 132 | } 133 | 134 | #if defined(XSIMD_HPP) 135 | /** approximation for pow(Base, x) (32-bit SIMD) */ 136 | template 137 | xsimd::batch log (xsimd::batch x) 138 | { 139 | const auto vi = xsimd::bit_cast> (x); 140 | const auto ex = vi & 0x7f800000; 141 | const auto e = (ex >> 23) - 127; 142 | const auto vfi = (vi - ex) | 0x3f800000; 143 | const auto vf = xsimd::bit_cast> (vfi); 144 | 145 | static constexpr auto log2_base_r = 1.0f / Base::log2_base; 146 | return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx, order, C1_continuous> (vf)); 147 | } 148 | 149 | /** approximation for pow(Base, x) (64-bit SIMD) */ 150 | template 151 | xsimd::batch log (xsimd::batch x) 152 | { 153 | const auto vi = xsimd::bit_cast> (x); 154 | const auto ex = vi & 0x7ff0000000000000; 155 | const auto e = (ex >> 52) - 1023; 156 | const auto vfi = (vi - ex) | 0x3ff0000000000000; 157 | const auto vf = xsimd::bit_cast> (vfi); 158 | 159 | static constexpr auto log2_base_r = 1.0 / Base::log2_base; 160 | return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx, order, C1_continuous> (vf)); 161 | } 162 | #endif 163 | 164 | #if defined(__GNUC__) 165 | #pragma GCC diagnostic pop // end ignore strict-aliasing warnings 166 | #endif 167 | 168 | /** 169 | * Approximation of log(x), using 170 | * log(x) = (1 / log2(e)) * (Exponent(x) + log2(1 + Mantissa(x)) 171 | */ 172 | template 173 | constexpr T log (T x) 174 | { 175 | return log>, order, C1_continuous> (x); 176 | } 177 | 178 | /** 179 | * Approximation of log2(x), using 180 | * log2(x) = Exponent(x) + log2(1 + Mantissa(x) 181 | */ 182 | template 183 | constexpr T log2 (T x) 184 | { 185 | return log>, order, C1_continuous> (x); 186 | } 187 | 188 | /** 189 | * Approximation of log10(x), using 190 | * log10(x) = (1 / log2(10)) * (Exponent(x) + log2(1 + Mantissa(x)) 191 | */ 192 | template 193 | constexpr T log10 (T x) 194 | { 195 | return log>, order, C1_continuous> (x); 196 | } 197 | 198 | /** Approximation of log(1 + x), using math_approx::log(x) */ 199 | template 200 | constexpr T log1p (T x) 201 | { 202 | return log>, order, C1_continuous> ((T) 1 + x); 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /include/math_approx/src/polylogarithm_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "basic_math.hpp" 4 | 5 | namespace math_approx 6 | { 7 | /** 8 | * Approximation of the "dilogarithm" function for inputs 9 | * in the range [0, 1/2]. This method does not do any 10 | * bounds-checking. 11 | * 12 | * Orders higher than 3 are generally not recommended for 13 | * single-precision floating-point types, since they don't 14 | * improve the accuracy very much. 15 | * 16 | * For derivations, see notebooks/li2_approx.nb 17 | */ 18 | template 19 | constexpr T li2_0_half (T x) 20 | { 21 | static_assert (order >= 1 && order <= 6); 22 | using S = scalar_of_t; 23 | 24 | if constexpr (order == 1) 25 | { 26 | const auto n_0 = (S) 0.996460629617; 27 | const auto d_0_1 = (S) 1 + (S) -0.288575624121 * x; 28 | return x * n_0 / d_0_1; 29 | } 30 | else if constexpr (order == 2) 31 | { 32 | const auto n_0_1 = (S) 0.999994847641 + (S) -0.546961998015 * x; 33 | const auto d_1_2 = (S) -0.797206910618 + (S) 0.0899936224040 * x; 34 | const auto d_0_1_2 = (S) 1 + d_1_2 * x; 35 | return x * n_0_1 / d_0_1_2; 36 | } 37 | else if constexpr (order == 3) 38 | { 39 | const auto x_sq = x * x; 40 | const auto n_0_2 = (S) 0.999999991192 + (S) 0.231155739205 * x_sq; 41 | const auto n_0_1_2 = n_0_2 + (S) -1.07612533343 * x; 42 | const auto d_2_3 = (S) 0.451592861555 + (S) -0.0281544399023 * x; 43 | const auto d_0_1 = (S) 1 + (S) -1.32612627824 * x; 44 | const auto d_0_1_2_3 = d_0_1 + d_2_3 * x_sq; 45 | return x * n_0_1_2 / d_0_1_2_3; 46 | } 47 | else if constexpr (order == 4) 48 | { 49 | const auto x_sq = x * x; 50 | const auto n_2_3 = (S) 0.74425269014090502911555775982556365472 + (S) -0.08749607277005140673532964399704145939 * x; 51 | const auto n_0_1 = (S) 0.99999999998544094594795118478024862055 + (S) -1.6098648159028159794757437744309391591 * x; 52 | const auto n_0_1_2_3 = n_0_1 + n_2_3 * x_sq; 53 | const auto d_3_4 = (S) -0.21787247785577362691148412819704459614 + (S) 0.00870385570778120787932426702624346169 * x; 54 | const auto d_1_2 = (S) -1.85986481869406218896935179306183665107 + (S) 1.09810787318601772062220747277929300408 * x; 55 | const auto d_1_2_3_4 = d_1_2 + d_3_4 * x_sq; 56 | const auto d_0_1_2_3_4 = (S) 1 + d_1_2_3_4 * x; 57 | return x * n_0_1_2_3 / d_0_1_2_3_4; 58 | } 59 | else if constexpr (order == 5) 60 | { 61 | const auto x_sq = x * x; 62 | 63 | const auto n_3_4 = (S) -0.41945653857264507277532555842378439927 + (S) 0.03140351694981020435408321943912212079 * x; 64 | const auto n_1_2 = (S) -2.14843104749890205674150618938194330623 + (S) 1.54956546570292751217524363072830456069 * x; 65 | const auto n_1_2_3_4 = n_1_2 + n_3_4 * x_sq; 66 | const auto n_0_1_2_3_4 = (S) 0.99999999999997312289180148636206726177 + n_1_2_3_4 * x; 67 | 68 | const auto d_4_5 = (S) 0.09609912057603552016206051904306797162 + (S) -0.00269129500193871901659324657805482418 * x; 69 | const auto d_2_3 = (S) 2.03806211686824385201410542913121040892 + (S) -0.72497973694183708484311198715866984035 * x; 70 | const auto d_0_1 = (S) 1 + (S) -2.398431047506893407956406025441134862 * x; 71 | const auto d_2_3_4_5 = d_2_3 + d_4_5 * x_sq; 72 | const auto d_0_1_2_3_4_5 = d_0_1 + d_2_3_4_5 * x_sq; 73 | 74 | return x * n_0_1_2_3_4 / d_0_1_2_3_4_5; 75 | } 76 | else if constexpr (order == 6) 77 | { 78 | const auto x_sq = x * x; 79 | 80 | const auto n_4_5 = (S) 0.20885966267164674441979654645138181067 + (S) -0.01085968986663512120143497781484214416 * x; 81 | const auto n_2_3 = (S) 2.64771686149306717256638234054408732899 + (S) -1.15385196641292513334184445301529897694 * x; 82 | const auto n_0_1 = (S) 0.99999999999999995022522902211061062582 + (S) -2.6883902117841251600624689886592808124 * x; 83 | const auto n_2_3_4_5 = n_2_3 + n_4_5 * x_sq; 84 | const auto n_0_1_2_3_4_5 = n_0_1 + n_2_3_4_5 * x_sq; 85 | 86 | const auto d_5_6 = (S) -0.03980108270103465616851961097089502921 + (S) 0.00082742905522813187941384917520432493 * x; 87 | const auto d_3_4 = (S) -1.70766499097900947314107956633154245176 + (S) 0.41595826557420951684124942212799147948 * x; 88 | const auto d_1_2 = (S) -2.93839021178414636324893816529360171731 + (S) 3.27120330332951521662427278605230451458 * x; 89 | const auto d_3_4_5_6 = d_3_4 + d_5_6 * x_sq; 90 | const auto d_0_1_2 = (S) 1 + d_1_2 * x; 91 | const auto d_0_1_2_3_4_5_6 = d_0_1_2 + d_3_4_5_6 * x_sq * x; 92 | 93 | return x * n_0_1_2_3_4_5 / d_0_1_2_3_4_5_6; 94 | } 95 | else 96 | { 97 | return {}; 98 | } 99 | } 100 | 101 | /** 102 | * Approximation of the "dilogarithm" function for all inputs. 103 | * 104 | * Orders higher than 3 are generally not recommended for 105 | * single-precision floating-point types, since they don't 106 | * improve the accuracy very much. 107 | */ 108 | template = 5), typename T> 109 | constexpr T li2 (T x) 110 | { 111 | const auto x_r = (T) 1 / x; 112 | const auto x_r1 = (T) 1 / (x - (T) 1); 113 | 114 | constexpr auto pisq_o_6 = (T) M_PI * (T) M_PI / (T) 6; 115 | constexpr auto pisq_o_3 = (T) M_PI * (T) M_PI / (T) 3; 116 | 117 | T y, r; 118 | bool sign = true; 119 | if (x < (T) -1) 120 | { 121 | y = -x_r1; 122 | const auto l = log ((T) 1 - x); 123 | r = -pisq_o_6 + l * ((T) 0.5 * l - log (-x)); 124 | } 125 | else if (x < (T) 0) 126 | { 127 | y = x * x_r1; 128 | const auto l = log ((T) 1 - x); 129 | r = (T) -0.5 * l * l; 130 | sign = false; 131 | } 132 | else if (x < (T) 0.5) 133 | { 134 | y = x; 135 | r = {}; 136 | } 137 | else if (x < (T) 1) 138 | { 139 | y = (T) 1 - x; 140 | r = pisq_o_6 - log (x) * log (y); 141 | sign = false; 142 | } 143 | else if (x < (T) 2) 144 | { 145 | y = (T) 1 - x_r; 146 | const auto l = log (x); 147 | r = pisq_o_6 - l * (log (y) + (T) 0.5 * l); 148 | } 149 | else 150 | { 151 | y = x_r; 152 | const auto l = log (x); 153 | r = pisq_o_3 - (T) 0.5 * l * l; 154 | sign = false; 155 | } 156 | 157 | const auto li2_reduce = li2_0_half (y); 158 | return r + select (sign, li2_reduce, -li2_reduce); 159 | } 160 | 161 | #if defined(XSIMD_HPP) 162 | /** 163 | * Approximation of the "dilogarithm" function for all inputs. 164 | * 165 | * Orders higher than 3 are generally not recommended for 166 | * single-precision floating-point types, since they don't 167 | * improve the accuracy very much. 168 | */ 169 | template = 5), typename T> 170 | xsimd::batch li2 (const xsimd::batch& x) 171 | { 172 | // x < -1: 173 | // - log(-x) -> [1, inf] 174 | // - log(1-x) -> [2, inf] 175 | // x < 0: 176 | // - NOP 177 | // - log(1-x) -> [1, 2] 178 | // x < 1/2: 179 | // - NOP 180 | // - NOP 181 | // x < 1: 182 | // - log(x) -> [1/2, 1] 183 | // - log(1-x) -> [0, 1/2] 184 | // x < 2: 185 | // - log(x) -> [1, 2] 186 | // - log(1-1/x) -> [0, 1/2] 187 | // x >= 2: 188 | // - log(x) -> [2, inf] 189 | // - NOP 190 | 191 | const auto x_r = (T) 1 / x; 192 | const auto x_r1 = (T) 1 / (x - (T) 1); 193 | const auto log_arg1 = select (x < (T) -1, -x, select (x < (T) 0.5, xsimd::broadcast ((T) 1), x)); 194 | const auto log_arg2 = select (x < (T) 1, (T) 1 - x, (T) 1 - x_r); 195 | 196 | const auto log1 = log (log_arg1); 197 | const auto log2 = log (log_arg2); 198 | 199 | // clang-format off 200 | const auto y = select (x < (T) -1, (T) -1 * x_r1, 201 | select (x < (T) 0, x * x_r1, 202 | select (x < (T) 0.5, x, 203 | select (x < (T) 1, (T) 1 - x, 204 | select (x < (T) 2, (T) 1 - x_r, 205 | x_r))))); 206 | const auto sign = x < (T) -1 || (x >= (T) 0 && x < (T) 0.5) || (x >= (T) 1 && x < (T) 2); 207 | 208 | static constexpr auto pisq_o_6 = (T) M_PI * (T) M_PI / (T) 6; 209 | static constexpr auto pisq_o_3 = (T) M_PI * (T) M_PI / (T) 3; 210 | const auto log1_log2 = log1 * log2; 211 | const auto half_log1_sq = (T) 0.5 * log1 * log1; 212 | const auto half_log2_sq = (T) 0.5 * log2 * log2; 213 | const auto r = select (x < (T) -1, -pisq_o_6 + half_log2_sq - log1_log2, 214 | select (x < (T) 0, -half_log2_sq, 215 | select (x < (T) 0.5, xsimd::broadcast ((T) 0), 216 | select (x < (T) 1, pisq_o_6 - log1_log2, 217 | select (x < (T) 2, pisq_o_6 - log1_log2 - half_log1_sq, 218 | pisq_o_3 - half_log1_sq))))); 219 | //clang-format on 220 | 221 | const auto li2_reduce = li2_0_half (y); 222 | return r + select (sign, li2_reduce, -li2_reduce); 223 | } 224 | #endif 225 | } // namespace math_approx 226 | -------------------------------------------------------------------------------- /include/math_approx/src/pow_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "basic_math.hpp" 4 | 5 | namespace math_approx 6 | { 7 | namespace pow_detail 8 | { 9 | // for polynomial derivations, see notebooks/exp_approx.nb 10 | 11 | /** approximation for 2^x, optimized on the range [0, 1] */ 12 | template 13 | constexpr T pow2_approx (T x) 14 | { 15 | static_assert (order >= 3 && order <= 7); 16 | using S = scalar_of_t; 17 | 18 | const auto x_sq = x * x; 19 | if constexpr (C1_continuous) 20 | { 21 | if constexpr (order == 3) 22 | { 23 | const auto x_2_3 = (S) 0.227411277760 + (S) 0.0794415416798 * x; 24 | const auto x_0_1 = (S) 1 + (S) 0.693147180560 * x; 25 | return x_0_1 + x_2_3 * x_sq; 26 | } 27 | else if constexpr (order == 4) 28 | { 29 | const auto x_3_4 = (S) 0.0521277476109 + (S) 0.0136568970345 * x; 30 | const auto x_1_2 = (S) 0.693147180560 + (S) 0.241068174795 * x; 31 | const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; 32 | return (S) 1 + x_1_2_3_4 * x; 33 | } 34 | else if constexpr (order == 5) 35 | { 36 | const auto x_4_5 = (S) 0.00899838527231 + (S) 0.00188723482038 * x; 37 | const auto x_2_3 = (S) 0.240184132673 + (S) 0.0557830666741 * x; 38 | const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; 39 | const auto x_0_1 = (S) 1 + (S) 0.693147180560 * x; 40 | return x_0_1 + x_2_3_4_5 * x_sq; 41 | } 42 | else if constexpr (order == 6) 43 | { 44 | const auto x_5_6 = (S) 0.00124453797252 + (S) 0.000217714753229 * x; 45 | const auto x_3_4 = (S) 0.0554875633068 + (S) 0.00967475272129 * x; 46 | const auto x_1_2 = (S) 0.693147180560 + (S) 0.240228250686 * x; 47 | const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; 48 | const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; 49 | return (S) 1 + x_1_2_3_4_5_6 * x; 50 | } 51 | else if constexpr (order == 7) 52 | { 53 | // doesn't seem to help at single-precision 54 | const auto x_6_7 = (S) 0.000133154170702612 + (S) 0.0000245778949916153 * x; 55 | const auto x_4_5 = (S) 0.00960612128901630 + (S) 0.00135551454943593 * x; 56 | const auto x_2_3 = (S) 0.240226202240181 + (S) 0.0555072492957270 * x; 57 | const auto x_0_1 = (S) 1 + (S) 0.693147180559945 * x; 58 | const auto x_4_5_6_7 = x_4_5 + x_6_7 * x_sq; 59 | const auto x_0_1_2_3 = x_0_1 + x_2_3 * x_sq; 60 | return x_0_1_2_3 + x_4_5_6_7 * x_sq * x_sq; 61 | } 62 | else 63 | { 64 | return {}; 65 | } 66 | } 67 | else 68 | { 69 | if constexpr (order == 3) 70 | { 71 | const auto x_2_3 = (S) 0.226307586882 + (S) 0.0782680256330 * x; 72 | const auto x_0_1 = (S) 1 + (S) 0.695424387485 * x; 73 | return x_0_1 + x_2_3 * x_sq; 74 | } 75 | else if constexpr (order == 4) 76 | { 77 | const auto x_3_4 = (S) 0.0520324008177 + (S) 0.0135557244044 * x; 78 | const auto x_1_2 = (S) 0.693032120001 + (S) 0.241379754777 * x; 79 | const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; 80 | return (S) 1 + x_1_2_3_4 * x; 81 | } 82 | else if constexpr (order == 5) 83 | { 84 | const auto x_4_5 = (S) 0.00899009909264 + (S) 0.00187839071291 * x; 85 | const auto x_2_3 = (S) 0.240156326598 + (S) 0.0558229130202 * x; 86 | const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; 87 | const auto x_0_1 = (S) 1 + (S) 0.693152270576 * x; 88 | return x_0_1 + x_2_3_4_5 * x_sq; 89 | } 90 | else if constexpr (order == 6) 91 | { 92 | const auto x_5_6 = (S) 0.00124359387839 + (S) 0.000217187820427 * x; 93 | const auto x_3_4 = (S) 0.0554833098983 + (S) 0.00967911763840 * x; 94 | const auto x_1_2 = (S) 0.693147003658 + (S) 0.240229787107 * x; 95 | const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; 96 | const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; 97 | return (S) 1 + x_1_2_3_4_5_6 * x; 98 | } 99 | else if constexpr (order == 7) 100 | { 101 | // doesn't seem to help at single-precision 102 | const auto x_6_7 = (S) 0.000136898688977877 + (S) 0.0000234440812713967 * x; 103 | const auto x_4_5 = (S) 0.00960825566419915 + (S) 0.00135107295099880 * x; 104 | const auto x_2_3 = (S) 0.240226092549669 + (S) 0.0555070350342468 * x; 105 | const auto x_0_1 = (S) 1 + (S) 0.693147201030637 * x; 106 | const auto x_4_5_6_7 = x_4_5 + x_6_7 * x_sq; 107 | const auto x_0_1_2_3 = x_0_1 + x_2_3 * x_sq; 108 | return x_0_1_2_3 + x_4_5_6_7 * x_sq * x_sq; 109 | } 110 | else 111 | { 112 | return {}; 113 | } 114 | } 115 | } 116 | 117 | template 118 | struct BaseE 119 | { 120 | static constexpr auto log2_base = (T) 1.4426950408889634074; 121 | }; 122 | 123 | template 124 | struct Base2 125 | { 126 | static constexpr auto log2_base = (T) 1; 127 | }; 128 | 129 | template 130 | struct Base10 131 | { 132 | static constexpr auto log2_base = (T) 3.3219280948873623479; 133 | }; 134 | } 135 | 136 | #if defined(__GNUC__) 137 | #pragma GCC diagnostic push 138 | #pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning 139 | #pragma GCC diagnostic ignored "-Wuninitialized" 140 | #endif 141 | 142 | /** approximation for pow(Base, x) (32-bit) */ 143 | template 144 | constexpr float pow (float x) 145 | { 146 | x = std::max (-126.0f, Base::log2_base * x); 147 | 148 | const auto xi = (int32_t) x; 149 | const auto l = x < 0.0f ? xi - 1 : xi; 150 | const auto f = x - (float) l; 151 | const auto vi = (l + 127) << 23; 152 | 153 | return bit_cast (vi) * pow_detail::pow2_approx (f); 154 | } 155 | 156 | /** approximation for pow(Base, x) (64-bit) */ 157 | template 158 | constexpr double pow (double x) 159 | { 160 | x = std::max (-1022.0, Base::log2_base * x); 161 | 162 | const auto xi = (int64_t) x; 163 | const auto l = x < 0.0 ? xi - 1 : xi; 164 | const auto d = x - (double) l; 165 | const auto vi = (l + 1023) << 52; 166 | 167 | return bit_cast (vi) * pow_detail::pow2_approx (d); 168 | } 169 | 170 | #if defined(XSIMD_HPP) 171 | /** approximation for pow(Base, x) (32-bit SIMD) */ 172 | template 173 | xsimd::batch pow (xsimd::batch x) 174 | { 175 | x = xsimd::max (xsimd::broadcast (-126.0f), Base::log2_base * x); 176 | 177 | const auto xi = xsimd::to_int (x); 178 | const auto l = xsimd::select (xsimd::batch_bool_cast (x < 0.0f), xi - 1, xi); 179 | const auto f = x - xsimd::to_float (l); 180 | const auto vi = (l + 127) << 23; 181 | 182 | return xsimd::bit_cast> (vi) * pow_detail::pow2_approx, order, C1_continuous> (f); 183 | } 184 | 185 | /** approximation for pow(Base, x) (64-bit SIMD) */ 186 | template 187 | xsimd::batch pow (xsimd::batch x) 188 | { 189 | x = xsimd::max (-1022.0, Base::log2_base * x); 190 | 191 | const auto xi = xsimd::to_int (x); 192 | const auto l = xsimd::select (xsimd::batch_bool_cast (x < 0.0), xi - 1, xi); 193 | const auto d = x - xsimd::to_float (l); 194 | const auto vi = (l + 1023) << 52; 195 | 196 | return xsimd::bit_cast> (vi) * pow_detail::pow2_approx, order, C1_continuous> (d); 197 | } 198 | #endif 199 | 200 | #if defined(__GNUC__) 201 | #pragma GCC diagnostic pop // end ignore strict-aliasing warnings 202 | #endif 203 | 204 | /** Approximation of exp(x), using exp(x) = 2^floor(x * log2(e)) * 2^frac(x * log2(e)) */ 205 | template 206 | constexpr T exp (T x) 207 | { 208 | return pow>, order, C1_continuous> (x); 209 | } 210 | 211 | /** Approximation of exp2(x), using exp(x) = 2^floor(x) * 2^frac(x) */ 212 | template 213 | constexpr T exp2 (T x) 214 | { 215 | return pow>, order, C1_continuous> (x); 216 | } 217 | 218 | /** Approximation of exp(x), using exp10(x) = 2^floor(x * log2(10)) * 2^frac(x * log2(10)) */ 219 | template 220 | constexpr T exp10 (T x) 221 | { 222 | return pow>, order, C1_continuous> (x); 223 | } 224 | 225 | /** Approximation of exp(1) - 1, using math_approx::exp(x) */ 226 | template 227 | constexpr T expm1 (T x) 228 | { 229 | return pow>, order, C1_continuous> (x) - (T) 1; 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /include/math_approx/src/sigmoid_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "basic_math.hpp" 4 | 5 | namespace math_approx 6 | { 7 | namespace sigmoid_detail 8 | { 9 | // for polynomial derivations, see notebooks/sigmoid_approx.nb 10 | 11 | template 12 | constexpr T sig_poly_9 (T x) 13 | { 14 | using S = scalar_of_t; 15 | const auto x_sq = x * x; 16 | const auto y_7_9 = (S) 1.50024356624e-6 + (S) 6.92468584642e-9 * x_sq; 17 | const auto y_5_7_9 = (S) 0.000260923534301 + y_7_9 * x_sq; 18 | const auto y_3_5_7_9 = (S) 0.0208320229264 + y_5_7_9 * x_sq; 19 | const auto y_1_3_5_7_9 = (S) 0.5 + y_3_5_7_9 * x_sq; 20 | return x * y_1_3_5_7_9; 21 | } 22 | 23 | template 24 | constexpr T sig_poly_7 (T x) 25 | { 26 | using S = scalar_of_t; 27 | const auto x_sq = x * x; 28 | const auto y_5_7 = (S) 0.000255174491559 + (S) 1.90805380557e-6 * x_sq; 29 | const auto y_3_5_7 = (S) 0.0208503675870 + y_5_7 * x_sq; 30 | const auto y_1_3_5_7 = (S) 0.5 + y_3_5_7 * x_sq; 31 | return x * y_1_3_5_7; 32 | } 33 | 34 | template 35 | constexpr T sig_poly_5 (T x) 36 | { 37 | using S = scalar_of_t; 38 | const auto x_sq = x * x; 39 | const auto y_3_5 = (S) 0.0206108521251 + (S) 0.000307906311109 * x_sq; 40 | const auto y_1_3_5 = (S) 0.5 + y_3_5 * x_sq; 41 | return x * y_1_3_5; 42 | } 43 | 44 | template 45 | constexpr T sig_poly_3 (T x) 46 | { 47 | using S = scalar_of_t; 48 | const auto x_sq = x * x; 49 | const auto y_1_3 = (S) 0.5 + (S) 0.0233402955195 * x_sq; 50 | return x * y_1_3; 51 | } 52 | } // namespace sigmoid_detail 53 | 54 | /** 55 | * Approximation of sigmoid(x) := 1 / (1 + e^-x), 56 | * using sigmoid(x) ≈ (1/2) p(x) / (p(x)^2 + 1) + (1/2), 57 | * where p(x) is an odd polynomial fit to minimize the maxinimum relative error. 58 | */ 59 | template 60 | T sigmoid (T x) 61 | { 62 | static_assert (order % 2 == 1 && order <= 9 && order >= 3, "Order must e an odd number within [3, 9]"); 63 | 64 | T x_poly {}; 65 | if constexpr (order == 9) 66 | x_poly = sigmoid_detail::sig_poly_9 (x); 67 | else if constexpr (order == 7) 68 | x_poly = sigmoid_detail::sig_poly_7 (x); 69 | else if constexpr (order == 5) 70 | x_poly = sigmoid_detail::sig_poly_5 (x); 71 | else if constexpr (order == 3) 72 | x_poly = sigmoid_detail::sig_poly_3 (x); 73 | 74 | using S = scalar_of_t; 75 | return (S) 0.5 * x_poly * rsqrt (x_poly * x_poly + (S) 1) + (S) 0.5; 76 | } 77 | 78 | 79 | /** 80 | * Approximation of sigmoid(x) := 1 / (1 + e^-x), 81 | * using math_approx::exp (x). 82 | * 83 | * So far this has tested slower than the above approximation 84 | * for similar absolute error, but has better relative error 85 | * characteristics. 86 | */ 87 | template 88 | T sigmoid_exp (T x) 89 | { 90 | return (T) 1 / ((T) 1 + math_approx::exp (-x)); 91 | } 92 | } // namespace math_approx 93 | -------------------------------------------------------------------------------- /include/math_approx/src/trig_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "basic_math.hpp" 4 | 5 | namespace math_approx 6 | { 7 | namespace trig_detail 8 | { 9 | template 10 | constexpr T truncate (T x) 11 | { 12 | return static_cast (static_cast (x)); 13 | } 14 | 15 | #if defined(XSIMD_HPP) 16 | template 17 | xsimd::batch truncate (xsimd::batch x) 18 | { 19 | return xsimd::to_float (xsimd::to_int (x)); 20 | } 21 | #endif 22 | 23 | /** Fast method to wrap a value into the range [-pi, pi] */ 24 | template 25 | constexpr T fast_mod_mpi_pi (T x) 26 | { 27 | using S = scalar_of_t; 28 | constexpr auto pi = static_cast (M_PI); 29 | constexpr auto two_pi = static_cast (2.0 * M_PI); 30 | constexpr auto recip_two_pi = static_cast (1) / two_pi; 31 | 32 | x += pi; 33 | const auto mod = x - two_pi * truncate (x * recip_two_pi); 34 | return select (x >= (T) 0, mod, mod + two_pi) - pi; 35 | } 36 | 37 | /** Fast method to wrap a value into the range [-pi/2, pi/2] */ 38 | template 39 | constexpr T fast_mod_mhalfpi_halfpi (T x) 40 | { 41 | using S = scalar_of_t; 42 | constexpr auto half_pi = static_cast (M_PI) * (S) 0.5; 43 | constexpr auto pi = static_cast (M_PI); 44 | constexpr auto recip_pi = (S) 1 / pi; 45 | 46 | x += half_pi; 47 | const auto mod = x - pi * truncate (x * recip_pi); 48 | return select (x >= (T) 0, mod, mod + pi) - half_pi; 49 | } 50 | 51 | // Polynomials were derived using the method presented in 52 | // https://mooooo.ooo/chebyshev-sine-approximation/ 53 | // and then adapted for various (odd) orders. 54 | 55 | template 56 | constexpr T sin_poly_9 (T x, T x_sq) 57 | { 58 | using S = scalar_of_t; 59 | const auto x_7_9 = (S) -2.49397084313e-6 + (S) 2.00382818811e-8 * x_sq; 60 | const auto x_5_7_9 = (S) 0.000173405228576 + x_7_9 * x_sq; 61 | const auto x_3_5_7_9 = (S) -0.00662075636230 + x_5_7_9 * x_sq; 62 | const auto x_1_3_5_7_9 = (S) 0.101321159036 + x_3_5_7_9 * x_sq; 63 | return x * x_1_3_5_7_9; 64 | } 65 | 66 | template 67 | constexpr T sin_poly_7 (T x, T x_sq) 68 | { 69 | using S = scalar_of_t; 70 | const auto x_5_7 = (S) 0.000170965340046 + (S) -2.09843101304e-6 * x_sq; 71 | const auto x_3_5_7 = (S) -0.00661594021539 + x_5_7 * x_sq; 72 | const auto x_1_3_5_7 = (S) 0.101319673615 + x_3_5_7 * x_sq; 73 | return x * x_1_3_5_7; 74 | } 75 | 76 | template 77 | constexpr T sin_poly_5 (T x, T x_sq) 78 | { 79 | using S = scalar_of_t; 80 | const auto x_3_5 = (S) -0.00650096169550 + (S) 0.000139899314103 * x_sq; 81 | const auto x_1_3_5 = (S) 0.101256629587 + x_3_5 * x_sq; 82 | return x * x_1_3_5; 83 | } 84 | } // namespace trig_detail 85 | 86 | /** Polynomial approximation of sin(x) on the range [-pi, pi] */ 87 | template 88 | constexpr T sin_mpi_pi (T x) 89 | { 90 | static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must be an odd number within [5, 9]"); 91 | 92 | using S = scalar_of_t; 93 | constexpr auto pi = static_cast (M_PI); 94 | constexpr auto pi_sq = pi * pi; 95 | const auto x_sq = x * x; 96 | 97 | T x_poly {}; 98 | if constexpr (order == 9) 99 | x_poly = trig_detail::sin_poly_9 (x, x_sq); 100 | else if constexpr (order == 7) 101 | x_poly = trig_detail::sin_poly_7 (x, x_sq); 102 | else if constexpr (order == 5) 103 | x_poly = trig_detail::sin_poly_5 (x, x_sq); 104 | 105 | return (pi_sq - x_sq) * x_poly; 106 | } 107 | 108 | /** Full range approximation of sin(x) */ 109 | template 110 | constexpr T sin (T x) 111 | { 112 | return sin_mpi_pi (trig_detail::fast_mod_mpi_pi (x)); 113 | } 114 | 115 | /** 116 | * Polynomial approximation of cos(x) on the range [-pi, pi], 117 | * using a range-shifted approximation of sin(x). 118 | */ 119 | template 120 | constexpr T cos_mpi_pi (T x) 121 | { 122 | static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must be an odd number within [5, 9]"); 123 | 124 | using S = scalar_of_t; 125 | constexpr auto pi = static_cast (M_PI); 126 | constexpr auto pi_sq = pi * pi; 127 | constexpr auto pi_o_2 = pi * (S) 0.5; 128 | 129 | using std::abs; 130 | #if defined(XSIMD_HPP) 131 | using xsimd::abs; 132 | #endif 133 | x = abs (x); 134 | 135 | const auto hpmx = pi_o_2 - x; 136 | const auto hpmx_sq = hpmx * hpmx; 137 | 138 | T x_poly {}; 139 | if constexpr (order == 9) 140 | x_poly = trig_detail::sin_poly_9 (hpmx, hpmx_sq); 141 | else if constexpr (order == 7) 142 | x_poly = trig_detail::sin_poly_7 (hpmx, hpmx_sq); 143 | else if constexpr (order == 5) 144 | x_poly = trig_detail::sin_poly_5 (hpmx, hpmx_sq); 145 | 146 | return (pi_sq - hpmx_sq) * x_poly; 147 | } 148 | 149 | /** Full range approximation of cos(x) */ 150 | template 151 | constexpr T cos (T x) 152 | { 153 | return cos_mpi_pi (trig_detail::fast_mod_mpi_pi (x)); 154 | } 155 | 156 | /** Polynomial approximation of tan(x) on the range [-pi/4, pi/4] */ 157 | template 158 | constexpr T tan_mquarterpi_quarterpi (T x) 159 | { 160 | static_assert (order % 2 == 1 && order >= 3 && order <= 15, "Order must be an odd number within [3, 15]"); 161 | 162 | // for polynomial derivation, see notebooks/tan_approx.nb 163 | 164 | using S = scalar_of_t; 165 | const auto x_sq = x * x; 166 | if constexpr (order == 3) 167 | { 168 | const auto x_1_3 = (S) 1 + (S) 0.442959265447 * x_sq; 169 | return x * x_1_3; 170 | } 171 | else if constexpr (order == 5) 172 | { 173 | const auto x_3_5 = (S) 0.317574684334 + (S) 0.203265826702 * x_sq; 174 | const auto x_1_3_5 = (S) 1 + x_3_5 * x_sq; 175 | return x * x_1_3_5; 176 | } 177 | else if constexpr (order == 7) 178 | { 179 | const auto x_5_7 = (S) 0.116406244996 + (S) 0.0944480566104 * x_sq; 180 | const auto x_1_3 = (S) 1 + (S) 0.335216153138 * x_sq; 181 | const auto x_1_3_5_7 = x_1_3 + x_5_7 * x_sq * x_sq; 182 | return x * x_1_3_5_7; 183 | } 184 | else if constexpr (order == 9) 185 | { 186 | const auto x_7_9 = (S) 0.0405232529373 + (S) 0.0439292071029 * x_sq; 187 | const auto x_3_5 = (S) 0.333131667276 + (S) 0.136333765649 * x_sq; 188 | const auto x_3_5_7_9 = x_3_5 + x_7_9 * x_sq * x_sq; 189 | return x * ((S) 1 + x_3_5_7_9 * x_sq); 190 | } 191 | else if constexpr (order == 11) 192 | { 193 | const auto x_q = x_sq * x_sq; 194 | const auto x_9_11 = (S) 0.0126603694551 + (S) 0.0203633469693 * x_sq; 195 | const auto x_5_7 = (S) 0.132897195017 + (S) 0.0570525279731 * x_sq; 196 | const auto x_1_3 = (S) 1 + (S) 0.333353019629 * x_sq; 197 | const auto x_5_7_9_11 = x_5_7 + x_9_11 * x_q; 198 | const auto x_1_3_5_7_9_11 = x_1_3 + x_5_7_9_11 * x_q; 199 | return x * x_1_3_5_7_9_11; 200 | } 201 | else if constexpr (order == 13) 202 | { 203 | const auto x_q = x_sq * x_sq; 204 | const auto x_6 = x_q * x_sq; 205 | const auto x_11_13 = (S) 0.00343732283737 + (S) 0.00921082294855 * x_sq; 206 | const auto x_7_9 = (S) 0.0534743904687 + (S) 0.0242183751709 * x_sq; 207 | const auto x_3_5 = (S) 0.333331890901 + (S) 0.133379954680 * x_sq; 208 | const auto x_7_9_11_13 = x_7_9 + x_11_13 * x_q; 209 | const auto x_1_3_5 = (S) 1 + x_3_5 * x_sq; 210 | return x * (x_1_3_5 + x_7_9_11_13 * x_6); 211 | } 212 | else if constexpr (order == 15) 213 | { 214 | // doesn't seem to help much at single-precision, but here it is: 215 | const auto x_q = x_sq * x_sq; 216 | const auto x_8 = x_q * x_q; 217 | const auto x_13_15 = (S) 0.000292958045126 + (S) 0.00427933470414 * x_sq; 218 | const auto x_9_11 = (S) 0.0213477960960 + (S) 0.0106702896251 * x_sq; 219 | const auto x_5_7 = (S) 0.133327796402 + (S) 0.0540469276103 * x_sq; 220 | const auto x_1_3 = (S) 1 + (S) 0.333333463757 * x_sq; 221 | const auto x_9_11_13_15 = x_9_11 + x_13_15 * x_q; 222 | const auto x_1_3_5_7 = x_1_3 + x_5_7 * x_q; 223 | const auto x_1_3_5_7_9_11_13_15 = x_1_3_5_7 + x_9_11_13_15 * x_8; 224 | return x * x_1_3_5_7_9_11_13_15; 225 | } 226 | else 227 | { 228 | return {}; 229 | } 230 | } 231 | 232 | /** 233 | * Approximation of tan(x) on the range [-pi/2, pi/2], 234 | * using the tangent half-angle formula. 235 | * 236 | * Accuracy may suffer as x approaches ±pi/2. 237 | */ 238 | template 239 | constexpr T tan_mhalfpi_halfpi (T x) 240 | { 241 | using S = scalar_of_t; 242 | const auto h_x = tan_mquarterpi_quarterpi ((S) 0.5 * x); 243 | return (S) 2 * h_x / ((S) 1 - h_x * h_x); 244 | } 245 | 246 | /** 247 | * Full-range approximation of tan(x) 248 | * 249 | * Accuracy may suffer as x approaches values for which tan(x) approaches ±Inf. 250 | */ 251 | template 252 | constexpr T tan (T x) 253 | { 254 | return tan_mhalfpi_halfpi (trig_detail::fast_mod_mhalfpi_halfpi (x)); 255 | } 256 | 257 | //=============================================================================== 258 | namespace trig_turns_detail 259 | { 260 | using namespace trig_detail; 261 | 262 | /** Fast method to wrap a value into the range [-pi, pi] */ 263 | template 264 | constexpr T fast_mod_mhalf_half (T x) 265 | { 266 | if constexpr (std::is_same_v) 267 | { 268 | #if defined(__SSE4_1__) || defined(_MSC_VER) 269 | auto y = _mm_round_ss (_mm_load_ps1 (&x), _mm_load_ps1 (&x), 12); 270 | return x - reinterpret_cast (y); 271 | #else 272 | using std::nearbyint; 273 | #if defined(XSIMD_HPP) 274 | using xsimd::nearbyint; 275 | #endif 276 | return x - nearbyint (x); 277 | #endif 278 | } 279 | else 280 | { 281 | using std::nearbyint; 282 | #if defined(XSIMD_HPP) 283 | using xsimd::nearbyint; 284 | #endif 285 | return x - nearbyint (x); 286 | } 287 | } 288 | } // namespace trig_turns_detail 289 | 290 | /** Polynomial approximation of sin(2*pi*x) on the range [-pi/2, pi/2] */ 291 | template 292 | constexpr T sin_turns_mhalfpi_halfpi (T x) 293 | { 294 | static_assert (order % 2 == 1 && order <= 11 && order >= 5, "Order must be an odd number within [5, 11]"); 295 | 296 | using S = scalar_of_t; 297 | const auto x_sq = x * x; 298 | T y; 299 | if constexpr (order == 11) 300 | { 301 | // -25.1327411554 x + 64.8358228565 x^3 - 67.0766273790 x^5 + 38.4958788775 x^7 - 14.0496638478 x^9 + 3.16160207407 302 | const auto x_q = x_sq * x_sq; 303 | const auto x_9_11 = (S) -14.0496638478f + (S) 3.16160207407f * x_sq; 304 | const auto x_5_7 = (S) -67.0766273790f + (S) 38.4958788775f * x_sq; 305 | const auto x_1_3 = (S) -25.1327411554f + (S) 64.8358228565f * x_sq; 306 | const auto x_5_7_9_11 = x_5_7 + x_9_11 * x_q; 307 | const auto x_1_3_5_7_9_11 = x_1_3 + x_5_7_9_11 * x_q; 308 | y = x * x_1_3_5_7_9_11; 309 | } 310 | else if constexpr (order == 9) 311 | { 312 | const auto x_q = x_sq * x_sq; 313 | const auto x_7_9 = (S) 38.0636285939f - (S) 12.0736625515f * x_sq; 314 | const auto x_3_5 = (S) 64.8346168010f - (S) 67.0380336036f * x_sq; 315 | const auto x_3_5_7_9 = x_3_5 + x_7_9 * x_q; 316 | const auto x_1_3_5_7_9 = (S) -25.1327351251f + x_3_5_7_9 * x_sq; 317 | y = x * x_1_3_5_7_9; 318 | } 319 | else if constexpr (order == 7) 320 | { 321 | // -25.1323666662 x + 64.7874540567 x^3 - 66.0947787168 x^5 + 32.0267973181 x^7 322 | const auto x_q = x_sq * x_sq; 323 | const auto x_5_7 = (S) -66.0947787168f + (S) 32.0267973181f * x_sq; 324 | const auto x_1_3 = (S) -25.1323666662f + (S) 64.7874540567f * x_sq; 325 | const auto x_1_3_5_7 = x_1_3 + x_5_7 * x_q; 326 | y = x * x_1_3_5_7; 327 | } 328 | else if constexpr (order == 5) 329 | { 330 | // -25.1167285815 x + 63.6615119634 x^3 - 54.0847297225 x^5 331 | const auto x_3_5 = (S) 63.6615119634f + (S) -54.0847297225f * x_sq; 332 | const auto x_1_3_5 = (S) -25.1167285815f + x_3_5 * x_sq; 333 | y = x * x_1_3_5; 334 | } 335 | 336 | return y * (x + 0.5f) * (x - 0.5f); 337 | // return y * (x_sq - 0.25f); // this costs us a lot of precision :( 338 | } 339 | 340 | /** 341 | * Full-range approximation of sin(2*pi*x) 342 | */ 343 | template 344 | constexpr T sin_turns (T x) 345 | { 346 | return sin_turns_mhalfpi_halfpi (trig_turns_detail::fast_mod_mhalf_half (x)); 347 | } 348 | 349 | /** Polynomial approximation of cos(2*pi*x) on the range [-pi/2, pi/2] */ 350 | template 351 | constexpr T cos_turns_mhalfpi_halfpi (T x) 352 | { 353 | using S = scalar_of_t; 354 | using std::abs; 355 | #if defined(XSIMD_HPP) 356 | using xsimd::abs; 357 | #endif 358 | return sin_turns_mhalfpi_halfpi ((S) 0.25 - abs (x)); 359 | } 360 | 361 | /** 362 | * Full-range approximation of cos(2*pi*x) 363 | */ 364 | template 365 | constexpr T cos_turns (T x) 366 | { 367 | return cos_turns_mhalfpi_halfpi (trig_turns_detail::fast_mod_mhalf_half (x)); 368 | } 369 | } // namespace math_approx 370 | -------------------------------------------------------------------------------- /include/math_approx/src/wright_omega_approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "basic_math.hpp" 4 | 5 | namespace math_approx 6 | { 7 | /** 8 | * Approximation of the Wright-Omega function, using 9 | * w(x) ≈ 0 for x < -3 10 | * w(x) ≈ p(x) for -3 <= x < e 11 | * w(x) ≈ x - log(x) + alpha * exp(-beta * x) for x >= e, 12 | * where p(x) is a polynomial, and alpha and beta are coefficients, 13 | * all fit to minimize the maximum absolute error. 14 | * 15 | * The above fit is optionally followed by some number of Newton-Raphson iterations. 16 | */ 17 | template 18 | constexpr T wright_omega (T x) 19 | { 20 | static_assert (poly_order == 3 || poly_order == 5); 21 | 22 | using S = scalar_of_t; 23 | constexpr auto E = (S) 2.7182818284590452354; 24 | 25 | const auto x1 = [] (T _x) 26 | { 27 | const auto x_sq = _x * _x; 28 | if constexpr (poly_order == 3) 29 | { 30 | const auto y_2_3 = (S) 0.0534379648805832 + (S) -0.00251076420630778 * _x; 31 | const auto y_0_1 = (S) 0.616522951065868 + (S) 0.388418422853809 * _x; 32 | return y_0_1 + y_2_3 * x_sq; 33 | } 34 | else if constexpr (poly_order == 5) 35 | { 36 | const auto y_4_5 = (S) -0.00156418794118294 + (S) -0.00151562297325209 * _x; 37 | const auto y_2_3 = (S) 0.0719291313363515 + (S) 0.0216881206167543 * _x; 38 | const auto y_0_1 = (S) 0.569291529016010 + (S) 0.290890537885083 * _x; 39 | const auto y_2_3_4_5 = y_2_3 + y_4_5 * x_sq; 40 | return y_0_1 + y_2_3_4_5 * x_sq; 41 | } 42 | else 43 | { 44 | return T {}; 45 | } 46 | }(x); 47 | const auto x2 = x - log (x) + (S) 0.32352057096397160124 * exp ((S) -0.029614177658043381316 * x); 48 | 49 | auto y = select (x < (S) -3, T {}, select (x < (S) E, x1, x2)); 50 | 51 | const auto nr_update = [] (T _x, T _y) 52 | { 53 | return _y - (_y - exp (_x - _y)) / (_y + (S) 1); 54 | }; 55 | 56 | for (int i = 0; i < num_nr_iters; ++i) 57 | y = nr_update (x, y); 58 | 59 | return y; 60 | } 61 | 62 | /** 63 | * Wright-Omega function using Stephano D'Angelo's derivation (https://www.dafx.de/paper-archive/2019/DAFx2019_paper_5.pdf) 64 | * With `num_nr_iters == 0`, this is the fastest implementation, but the least accurate. 65 | * With `num_nr_iters == 1`, this is faster than the other implementation with 0 iterations, and little bit more accurate. 66 | * For more accuracy, use the other implementation with at least 1 NR iteration. 67 | */ 68 | template 69 | constexpr T wright_omega_dangelo (T x) 70 | { 71 | using S = scalar_of_t; 72 | 73 | const auto x1 = [] (T _x) 74 | { 75 | const auto x_sq = _x * _x; 76 | const auto y_2_3 = (S) 4.775931364975583e-2 + (S) -1.314293149877800e-3 * _x; 77 | const auto y_0_1 = (S) 6.313183464296682e-1 + (S) 3.631952663804445e-1 * _x; 78 | return y_0_1 + y_2_3 * x_sq; 79 | }(x); 80 | const auto x2 = x - log (x); 81 | 82 | auto y = select (x < (S) -3.341459552768620, T {}, select (x < (S) 8, x1, x2)); 83 | 84 | const auto nr_update = [] (T _x, T _y) 85 | { 86 | return _y - (_y - exp (_x - _y)) / (_y + (S) 1); 87 | }; 88 | 89 | for (int i = 0; i < num_nr_iters; ++i) 90 | y = nr_update (x, y); 91 | 92 | return y; 93 | } 94 | } // namespace math_approx 95 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | message(STATUS "math_approx -- Configuring tests...") 2 | 3 | CPMAddPackage("gh:catchorg/Catch2@3.8.1") 4 | include(${Catch2_SOURCE_DIR}/extras/Catch.cmake) 5 | 6 | function(setup_catch_test target) 7 | add_executable(${target}) 8 | target_sources(${target} PRIVATE src/${target}.cpp) 9 | target_include_directories(${target} PRIVATE ${CMAKE_SOURCE_DIR}/tests/test_utils) 10 | target_link_libraries(${target} 11 | PRIVATE 12 | Catch2::Catch2WithMain 13 | math_approx 14 | ) 15 | target_compile_features(${target} PUBLIC cxx_std_20) 16 | target_compile_definitions(${target} PUBLIC _USE_MATH_DEFINES=1) 17 | target_compile_options(${target} PRIVATE 18 | $<$:/W4 /WX> 19 | $<$>:-Wall -Wextra -Wpedantic -Werror> 20 | ) 21 | 22 | add_custom_command(TARGET ${target} 23 | POST_BUILD 24 | WORKING_DIRECTORY ${CMAKE_BINARY_DIR} 25 | COMMAND ${CMAKE_COMMAND} -E echo "Copying $ to test-binary" 26 | COMMAND ${CMAKE_COMMAND} -E make_directory test-binary 27 | COMMAND ${CMAKE_COMMAND} -E copy "$" test-binary 28 | ) 29 | 30 | catch_discover_tests(${target} TEST_PREFIX ${target}_) 31 | endfunction(setup_catch_test) 32 | 33 | setup_catch_test(trig_approx_test) 34 | setup_catch_test(trig_turns_approx_test) 35 | setup_catch_test(inverse_trig_approx_test) 36 | setup_catch_test(pow_approx_test) 37 | setup_catch_test(log_approx_test) 38 | setup_catch_test(hyperbolic_trig_approx_test) 39 | setup_catch_test(inverse_hyperbolic_trig_approx_test) 40 | setup_catch_test(sigmoid_approx_test) 41 | setup_catch_test(wright_omega_approx_test) 42 | setup_catch_test(polylog_approx_test) 43 | -------------------------------------------------------------------------------- /test/src/hyperbolic_trig_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test_helpers.hpp" 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | TEST_CASE ("Sinh Approx Test") 8 | { 9 | #if ! defined(WIN32) 10 | const auto all_floats = test_helpers::all_32_bit_floats (-3.5f, 3.5f, 1.0e-3f); 11 | #else 12 | const auto all_floats = test_helpers::all_32_bit_floats (-3.5f, 3.5f, 1.0e-1f); 13 | #endif 14 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 15 | { return std::sinh (x); }); 16 | 17 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) 18 | { 19 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 20 | 21 | const auto error = test_helpers::compute_error (y_exact, y_approx); 22 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 23 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 24 | 25 | const auto max_error = test_helpers::abs_max (error); 26 | const auto max_rel_error = test_helpers::abs_max (rel_error); 27 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 28 | 29 | std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; 30 | REQUIRE (std::abs (max_error) < err_bound); 31 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 32 | if (ulp_bound > 0) 33 | REQUIRE (max_ulp_error < ulp_bound); 34 | }; 35 | 36 | SECTION ("6th-Order simul.") 37 | { 38 | test_approx ([] (auto x) 39 | { return math_approx::sinh_cosh<6> (x).first; }, 40 | 6.0e-6f, 41 | 0.00012f, 42 | 1050); 43 | } 44 | SECTION ("6th-Order") 45 | { 46 | test_approx ([] (auto x) 47 | { return math_approx::sinh<6> (x); }, 48 | 6.0e-6f, 49 | 0.00012f, 50 | 1050); 51 | } 52 | SECTION ("5th-Order") 53 | { 54 | test_approx ([] (auto x) 55 | { return math_approx::sinh<5> (x); }, 56 | 8.0e-6f, 57 | 0.00015f, 58 | 0); 59 | } 60 | SECTION ("4th-Order") 61 | { 62 | test_approx ([] (auto x) 63 | { return math_approx::sinh<4> (x); }, 64 | 6.0e-5f, 65 | 0.00027f, 66 | 0); 67 | } 68 | SECTION ("3rd-Order") 69 | { 70 | test_approx ([] (auto x) 71 | { return math_approx::sinh<3> (x); }, 72 | 0.002f, 73 | 0.0035f, 74 | 0); 75 | } 76 | } 77 | 78 | TEST_CASE ("Cosh Approx Test") 79 | { 80 | #if ! defined(WIN32) 81 | const auto all_floats = test_helpers::all_32_bit_floats (-5.0f, 5.0f, 1.0e-3f); 82 | #else 83 | const auto all_floats = test_helpers::all_32_bit_floats (-5.0f, 5.0f, 1.0e-1f); 84 | #endif 85 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 86 | { return std::cosh (x); }); 87 | 88 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) 89 | { 90 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 91 | 92 | const auto error = test_helpers::compute_error (y_exact, y_approx); 93 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 94 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 95 | 96 | const auto max_error = test_helpers::abs_max (error); 97 | const auto max_rel_error = test_helpers::abs_max (rel_error); 98 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 99 | 100 | std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; 101 | REQUIRE (std::abs (max_error) < err_bound); 102 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 103 | if (ulp_bound > 0) 104 | REQUIRE (max_ulp_error < ulp_bound); 105 | }; 106 | 107 | SECTION ("6th-Order simul.") 108 | { 109 | test_approx ([] (auto x) 110 | { return math_approx::sinh_cosh<6> (x).second; }, 111 | 2.5e-5f, 112 | 4.5e-7f, 113 | 8); 114 | } 115 | SECTION ("6th-Order") 116 | { 117 | test_approx ([] (auto x) 118 | { return math_approx::cosh<6> (x); }, 119 | 2.5e-5f, 120 | 4.5e-7f, 121 | 8); 122 | } 123 | SECTION ("5th-Order") 124 | { 125 | test_approx ([] (auto x) 126 | { return math_approx::cosh<5> (x); }, 127 | 3.5e-5f, 128 | 5.5e-7f, 129 | 10); 130 | } 131 | SECTION ("4th-Order") 132 | { 133 | test_approx ([] (auto x) 134 | { return math_approx::cosh<4> (x); }, 135 | 0.0003f, 136 | 4.0e-6f, 137 | 60); 138 | } 139 | SECTION ("3rd-Order") 140 | { 141 | test_approx ([] (auto x) 142 | { return math_approx::cosh<3> (x); }, 143 | 0.0075f, 144 | 0.00015f, 145 | 0); 146 | } 147 | } 148 | 149 | TEST_CASE ("Tanh Approx Test") 150 | { 151 | #if ! defined(WIN32) 152 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-2f); 153 | #else 154 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 155 | #endif 156 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 157 | { return std::tanh (x); }); 158 | 159 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) 160 | { 161 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 162 | 163 | const auto error = test_helpers::compute_error (y_exact, y_approx); 164 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 165 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 166 | 167 | const auto max_error = test_helpers::abs_max (error); 168 | const auto max_rel_error = test_helpers::abs_max (rel_error); 169 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 170 | 171 | std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; 172 | REQUIRE (std::abs (max_error) < err_bound); 173 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 174 | if (ulp_bound > 0) 175 | REQUIRE (max_ulp_error < ulp_bound); 176 | }; 177 | 178 | SECTION ("11th-Order") 179 | { 180 | test_approx ([] (auto x) 181 | { return math_approx::tanh<11> (x); }, 182 | 2.5e-7f, 183 | 4.0e-7f, 184 | 7); 185 | } 186 | SECTION ("9th-Order") 187 | { 188 | test_approx ([] (auto x) 189 | { return math_approx::tanh<9> (x); }, 190 | 1.5e-6f, 191 | 1.5e-6f, 192 | 20); 193 | } 194 | SECTION ("7th-Order") 195 | { 196 | test_approx ([] (auto x) 197 | { return math_approx::tanh<7> (x); }, 198 | 1.5e-5f, 199 | 1.5e-5f, 200 | 230); 201 | } 202 | SECTION ("5th-Order") 203 | { 204 | test_approx ([] (auto x) 205 | { return math_approx::tanh<5> (x); }, 206 | 2.5e-4f, 207 | 2.5e-4f, 208 | 0); 209 | } 210 | SECTION ("3th-Order") 211 | { 212 | test_approx ([] (auto x) 213 | { return math_approx::tanh<3> (x); }, 214 | 4.0e-3f, 215 | 4.0e-3f, 216 | 0); 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /test/src/inverse_hyperbolic_trig_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test_helpers.hpp" 2 | #include "catch2/catch_template_test_macros.hpp" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | template 10 | void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float err_bound) 11 | { 12 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 13 | const auto error = test_helpers::compute_error (y_exact, y_approx); 14 | const auto max_error = test_helpers::abs_max (error); 15 | 16 | std::cout << max_error << std::endl; 17 | REQUIRE (std::abs (max_error) < err_bound); 18 | } 19 | 20 | TEMPLATE_TEST_CASE ("Asinh Approx Test", "", float, double) 21 | { 22 | #if ! defined(WIN32) 23 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-2f); 24 | #else 25 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 26 | #endif 27 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 28 | { return std::asinh (x); }); 29 | 30 | SECTION ("6th-Order") 31 | { 32 | test_approx (all_floats, y_exact, [] (auto x) 33 | { return math_approx::asinh<6> (x); }, 34 | 5.0e-7f); 35 | } 36 | SECTION ("5th-Order") 37 | { 38 | test_approx (all_floats, y_exact, [] (auto x) 39 | { return math_approx::asinh<5> (x); }, 40 | 6.0e-5f); 41 | } 42 | SECTION ("4th-Order") 43 | { 44 | test_approx (all_floats, y_exact, [] (auto x) 45 | { return math_approx::asinh<4> (x); }, 46 | 3.5e-4f); 47 | } 48 | SECTION ("3th-Order") 49 | { 50 | test_approx (all_floats, y_exact, [] (auto x) 51 | { return math_approx::asinh<3> (x); }, 52 | 2.5e-3f); 53 | } 54 | } 55 | 56 | TEMPLATE_TEST_CASE ("Acosh Approx Test", "", float, double) 57 | { 58 | const auto all_floats = test_helpers::all_32_bit_floats (1.0f, 10.0f, 1.0e-2f); 59 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 60 | { return std::acosh (x); }); 61 | 62 | SECTION ("6th-Order") 63 | { 64 | test_approx (all_floats, y_exact, [] (auto x) 65 | { return math_approx::acosh<6> (x); }, 66 | 4.5e-6f); 67 | } 68 | SECTION ("5th-Order") 69 | { 70 | test_approx (all_floats, y_exact, [] (auto x) 71 | { return math_approx::acosh<5> (x); }, 72 | 1.5e-5f); 73 | } 74 | SECTION ("4th-Order") 75 | { 76 | test_approx (all_floats, y_exact, [] (auto x) 77 | { return math_approx::acosh<4> (x); }, 78 | 8.5e-5f); 79 | } 80 | SECTION ("3th-Order") 81 | { 82 | test_approx (all_floats, y_exact, [] (auto x) 83 | { return math_approx::acosh<3> (x); }, 84 | 6.5e-4f); 85 | } 86 | } 87 | 88 | TEMPLATE_TEST_CASE ("Atanh Approx Test", "", float, double) 89 | { 90 | const auto all_floats = test_helpers::all_32_bit_floats (-0.9999f, 0.9999f, 1.0e-2f); 91 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 92 | { return std::atanh (x); }); 93 | 94 | SECTION ("6th-Order") 95 | { 96 | test_approx (all_floats, y_exact, [] (auto x) 97 | { return math_approx::atanh<6> (x); }, 98 | 2.5e-6f); 99 | } 100 | SECTION ("5th-Order") 101 | { 102 | test_approx (all_floats, y_exact, [] (auto x) 103 | { return math_approx::atanh<5> (x); }, 104 | 6.5e-6f); 105 | } 106 | SECTION ("4th-Order") 107 | { 108 | test_approx (all_floats, y_exact, [] (auto x) 109 | { return math_approx::atanh<4> (x); }, 110 | 4.5e-5f); 111 | } 112 | SECTION ("3th-Order") 113 | { 114 | test_approx (all_floats, y_exact, [] (auto x) 115 | { return math_approx::atanh<3> (x); }, 116 | 3.5e-4f); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /test/src/inverse_trig_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test_helpers.hpp" 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | TEST_CASE ("Asin Approx Test") 8 | { 9 | #if ! defined(WIN32) 10 | const auto all_floats = test_helpers::all_32_bit_floats (-1.0f, 1.0f, 1.0e-2f); 11 | #else 12 | const auto all_floats = test_helpers::all_32_bit_floats (-1.0f, 1.0f, 1.0e-1f); 13 | #endif 14 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 15 | { return std::asin (x); }); 16 | 17 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) 18 | { 19 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 20 | 21 | const auto error = test_helpers::compute_error (y_exact, y_approx); 22 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 23 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 24 | 25 | const auto max_error = test_helpers::abs_max (error); 26 | const auto max_rel_error = test_helpers::abs_max (rel_error); 27 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 28 | 29 | std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; 30 | REQUIRE (std::abs (max_error) < err_bound); 31 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 32 | if (ulp_bound > 0) 33 | REQUIRE (max_ulp_error < ulp_bound); 34 | }; 35 | 36 | SECTION ("4th-Order") 37 | { 38 | test_approx ([] (auto x) 39 | { return math_approx::asin<4> (x); }, 40 | 2.5e-7f, 41 | 4.0e-7f, 42 | 4); 43 | } 44 | SECTION ("3rd-Order") 45 | { 46 | test_approx ([] (auto x) 47 | { return math_approx::asin<3> (x); }, 48 | 3.0e-7f, 49 | 5.0e-7f, 50 | 5); 51 | } 52 | SECTION ("2nd-Order") 53 | { 54 | test_approx ([] (auto x) 55 | { return math_approx::asin<2> (x); }, 56 | 2.0e-6f, 57 | 4.0e-6f, 58 | 50); 59 | } 60 | SECTION ("1st-Order") 61 | { 62 | test_approx ([] (auto x) 63 | { return math_approx::asin<1> (x); }, 64 | 4.0e-5f, 65 | 6.5e-5f, 66 | 0); 67 | } 68 | } 69 | 70 | TEST_CASE ("Acos Approx Test") 71 | { 72 | #if ! defined(WIN32) 73 | const auto all_floats = test_helpers::all_32_bit_floats (-1.0f, 1.0f, 1.0e-2f); 74 | #else 75 | const auto all_floats = test_helpers::all_32_bit_floats (-1.0f, 1.0f, 1.0e-1f); 76 | #endif 77 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 78 | { return std::acos (x); }); 79 | 80 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound) 81 | { 82 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 83 | 84 | const auto error = test_helpers::compute_error (y_exact, y_approx); 85 | 86 | const auto max_error = test_helpers::abs_max (error); 87 | 88 | std::cout << max_error << std::endl; 89 | REQUIRE (std::abs (max_error) < err_bound); 90 | }; 91 | 92 | SECTION ("5th-Order") 93 | { 94 | test_approx ([] (auto x) 95 | { return math_approx::acos<5> (x); }, 96 | 5.0e-7f); 97 | } 98 | SECTION ("4th-Order") 99 | { 100 | test_approx ([] (auto x) 101 | { return math_approx::acos<4> (x); }, 102 | 1.0e-6f); 103 | } 104 | SECTION ("3rd-Order") 105 | { 106 | test_approx ([] (auto x) 107 | { return math_approx::acos<3> (x); }, 108 | 1.5e-5f); 109 | } 110 | SECTION ("2nd-Order") 111 | { 112 | test_approx ([] (auto x) 113 | { return math_approx::acos<2> (x); }, 114 | 2.5e-4f); 115 | } 116 | SECTION ("1st-Order") 117 | { 118 | test_approx ([] (auto x) 119 | { return math_approx::acos<1> (x); }, 120 | 5.0e-3f); 121 | } 122 | } 123 | 124 | TEST_CASE ("Atan Approx Test") 125 | { 126 | #if ! defined(WIN32) 127 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-2f); 128 | #else 129 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 130 | #endif 131 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 132 | { return std::atan (x); }); 133 | 134 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) 135 | { 136 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 137 | 138 | const auto error = test_helpers::compute_error (y_exact, y_approx); 139 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 140 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 141 | 142 | const auto max_error = test_helpers::abs_max (error); 143 | const auto max_rel_error = test_helpers::abs_max (rel_error); 144 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 145 | 146 | std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; 147 | REQUIRE (std::abs (max_error) < err_bound); 148 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 149 | if (ulp_bound > 0) 150 | REQUIRE (max_ulp_error < ulp_bound); 151 | }; 152 | 153 | SECTION ("7th-Order") 154 | { 155 | test_approx ([] (auto x) 156 | { return math_approx::atan<7> (x); }, 157 | 4.0e-7f, 158 | 3.0e-6f, 159 | 45); 160 | } 161 | SECTION ("5th-Order") 162 | { 163 | test_approx ([] (auto x) 164 | { return math_approx::atan<5> (x); }, 165 | 2.0e-5f, 166 | 1.5e-4f, 167 | 0); 168 | } 169 | SECTION ("4th-Order") 170 | { 171 | test_approx ([] (auto x) 172 | { return math_approx::atan<4> (x); }, 173 | 1.5e-4f, 174 | 8.5e-4f, 175 | 0); 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /test/src/log_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test_helpers.hpp" 2 | #include "catch2/catch_template_test_macros.hpp" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | template 10 | void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float err_bound) 11 | { 12 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 13 | const auto error = test_helpers::compute_error (y_exact, y_approx); 14 | const auto max_error = test_helpers::abs_max (error); 15 | 16 | std::cout << max_error << std::endl; 17 | REQUIRE (std::abs (max_error) < err_bound); 18 | } 19 | 20 | 21 | TEMPLATE_TEST_CASE ("Log Approx Test", "", float, double) 22 | { 23 | const auto all_floats = test_helpers::all_32_bit_floats (0.01f, 10.0f, 1.0e-3f); 24 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 25 | { return std::log (x); }); 26 | 27 | SECTION ("6th-Order") 28 | { 29 | test_approx (all_floats, y_exact, [] (auto x) 30 | { return math_approx::log<6> (x); }, 31 | 4.5e-6f); 32 | } 33 | SECTION ("6th-Order (C1-cont)") 34 | { 35 | test_approx (all_floats, y_exact, [] (auto x) 36 | { return math_approx::log<6, true> (x); }, 37 | 6.5e-6f); 38 | } 39 | SECTION ("5th-Order") 40 | { 41 | test_approx (all_floats, y_exact, [] (auto x) 42 | { return math_approx::log<5> (x); }, 43 | 1.5e-5f); 44 | } 45 | SECTION ("5th-Order (C1-cont)") 46 | { 47 | test_approx (all_floats, y_exact, [] (auto x) 48 | { return math_approx::log<5, true> (x); }, 49 | 3.5e-5f); 50 | } 51 | SECTION ("4th-Order") 52 | { 53 | test_approx (all_floats, y_exact, [] (auto x) 54 | { return math_approx::log<4> (x); }, 55 | 8.5e-5f); 56 | } 57 | SECTION ("4th-Order (C1-cont)") 58 | { 59 | test_approx (all_floats, y_exact, [] (auto x) 60 | { return math_approx::log<4, true> (x); }, 61 | 3.0e-4f); 62 | } 63 | SECTION ("3th-Order") 64 | { 65 | test_approx (all_floats, y_exact, [] (auto x) 66 | { return math_approx::log<3> (x); }, 67 | 6.5e-4f); 68 | } 69 | SECTION ("3th-Order (C1-cont)") 70 | { 71 | test_approx (all_floats, y_exact, [] (auto x) 72 | { return math_approx::log<3, true> (x); }, 73 | 4.0e-3f); 74 | } 75 | } 76 | 77 | TEMPLATE_TEST_CASE ("Log2 Approx Test", "", float, double) 78 | { 79 | const auto all_floats = test_helpers::all_32_bit_floats (0.01f, 10.0f, 1.0e-3f); 80 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 81 | { return std::log2 (x); }); 82 | 83 | SECTION ("6th-Order") 84 | { 85 | test_approx (all_floats, y_exact, [] (auto x) 86 | { return math_approx::log2<6> (x); }, 87 | 6.0e-6f); 88 | } 89 | SECTION ("6th-Order (C1-cont)") 90 | { 91 | test_approx (all_floats, y_exact, [] (auto x) 92 | { return math_approx::log2<6, true> (x); }, 93 | 8.5e-6f); 94 | } 95 | SECTION ("5th-Order") 96 | { 97 | test_approx (all_floats, y_exact, [] (auto x) 98 | { return math_approx::log2<5> (x); }, 99 | 2.0e-5f); 100 | } 101 | SECTION ("5th-Order (C1-cont)") 102 | { 103 | test_approx (all_floats, y_exact, [] (auto x) 104 | { return math_approx::log2<5, true> (x); }, 105 | 5.0e-5f); 106 | } 107 | SECTION ("4th-Order") 108 | { 109 | test_approx (all_floats, y_exact, [] (auto x) 110 | { return math_approx::log2<4> (x); }, 111 | 1.5e-4f); 112 | } 113 | SECTION ("4th-Order (C1-cont)") 114 | { 115 | test_approx (all_floats, y_exact, [] (auto x) 116 | { return math_approx::log2<4, true> (x); }, 117 | 4.5e-4f); 118 | } 119 | SECTION ("3th-Order") 120 | { 121 | test_approx (all_floats, y_exact, [] (auto x) 122 | { return math_approx::log2<3> (x); }, 123 | 9.0e-4f); 124 | } 125 | SECTION ("3th-Order (C1-cont)") 126 | { 127 | test_approx (all_floats, y_exact, [] (auto x) 128 | { return math_approx::log2<3, true> (x); }, 129 | 5.5e-3f); 130 | } 131 | } 132 | 133 | TEMPLATE_TEST_CASE ("Log10 Approx Test", "", float, double) 134 | { 135 | const auto all_floats = test_helpers::all_32_bit_floats (0.01f, 10.0f, 1.0e-3f); 136 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 137 | { return std::log10 (x); }); 138 | 139 | SECTION ("6th-Order") 140 | { 141 | test_approx (all_floats, y_exact, [] (auto x) 142 | { return math_approx::log10<6> (x); }, 143 | 2.0e-6f); 144 | } 145 | SECTION ("6th-Order (C1-cont)") 146 | { 147 | test_approx (all_floats, y_exact, [] (auto x) 148 | { return math_approx::log10<6, true> (x); }, 149 | 3.0e-6f); 150 | } 151 | SECTION ("5th-Order") 152 | { 153 | test_approx (all_floats, y_exact, [] (auto x) 154 | { return math_approx::log10<5> (x); }, 155 | 6.0e-6f); 156 | } 157 | SECTION ("5th-Order (C1-cont)") 158 | { 159 | test_approx (all_floats, y_exact, [] (auto x) 160 | { return math_approx::log10<5, true> (x); }, 161 | 1.5e-5f); 162 | } 163 | SECTION ("4th-Order") 164 | { 165 | test_approx (all_floats, y_exact, [] (auto x) 166 | { return math_approx::log10<4> (x); }, 167 | 4.0e-5f); 168 | } 169 | SECTION ("4th-Order (C1-cont)") 170 | { 171 | test_approx (all_floats, y_exact, [] (auto x) 172 | { return math_approx::log10<4, true> (x); }, 173 | 1.5e-4f); 174 | } 175 | SECTION ("3th-Order") 176 | { 177 | test_approx (all_floats, y_exact, [] (auto x) 178 | { return math_approx::log10<3> (x); }, 179 | 3.0e-4f); 180 | } 181 | SECTION ("3th-Order (C1-cont)") 182 | { 183 | test_approx (all_floats, y_exact, [] (auto x) 184 | { return math_approx::log10<3, true> (x); }, 185 | 2.0e-3f); 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /test/src/polylog_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test_helpers.hpp" 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include "reference/polylogarithm.hpp" 8 | 9 | TEST_CASE ("Li2 Approx Test") 10 | { 11 | #if ! defined(WIN32) 12 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-2f); 13 | #else 14 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 15 | #endif 16 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 17 | { return polylogarithm::Li2 (x); }); 18 | 19 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) 20 | { 21 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 22 | 23 | const auto error = test_helpers::compute_error (y_exact, y_approx); 24 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 25 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 26 | 27 | const auto max_error = test_helpers::abs_max (error); 28 | const auto max_rel_error = test_helpers::abs_max (rel_error); 29 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 30 | 31 | std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; 32 | REQUIRE (std::abs (max_error) < err_bound); 33 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 34 | if (ulp_bound > 0) 35 | REQUIRE (max_ulp_error < ulp_bound); 36 | }; 37 | 38 | SECTION ("3rd-Order_Log-6") 39 | { 40 | test_approx ([] (auto x) 41 | { return math_approx::li2<3, 6> (x); }, 42 | 2.5e-5f, 43 | 1.5e-5f, 44 | 200); 45 | } 46 | SECTION ("3rd-Order") 47 | { 48 | test_approx ([] (auto x) 49 | { return math_approx::li2<3> (x); }, 50 | 8.0e-5f, 51 | 1.5e-4f, 52 | 0); 53 | } 54 | SECTION ("2nd-Order") 55 | { 56 | test_approx ([] (auto x) 57 | { return math_approx::li2<2> (x); }, 58 | 3.0e-4f, 59 | 3.0e-4f, 60 | 0); 61 | } 62 | SECTION ("1st-Order") 63 | { 64 | test_approx ([] (auto x) 65 | { return math_approx::li2<1> (x); }, 66 | 2.5e-3f, 67 | 4.0e-3f, 68 | 0); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /test/src/pow_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "catch2/catch_template_test_macros.hpp" 2 | #include "test_helpers.hpp" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | template 10 | void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float rel_err_bound, uint32_t ulp_bound) 11 | { 12 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 13 | 14 | const auto error = test_helpers::compute_error (y_exact, y_approx); 15 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 16 | const auto ulp_error = [&] 17 | { 18 | if constexpr (std::is_same_v) 19 | return test_helpers::compute_ulp_error (y_exact, y_approx); 20 | else 21 | return std::vector {}; 22 | }(); 23 | 24 | const auto max_rel_error = test_helpers::abs_max (rel_error); 25 | const auto max_ulp_error = std::is_same_v ? *std::max_element (ulp_error.begin(), ulp_error.end()) : 0; 26 | 27 | std::cout << max_rel_error << ", " << max_ulp_error << std::endl; 28 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 29 | if (ulp_bound > 0) 30 | REQUIRE (max_ulp_error < ulp_bound); 31 | } 32 | 33 | TEMPLATE_TEST_CASE ("Exp Approx Test", "", float, double) 34 | { 35 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 2.5e-1f); 36 | const auto y_exact = test_helpers::compute_all (all_floats, 37 | [] (auto x) 38 | { 39 | return std::exp (x); 40 | }); 41 | 42 | SECTION ("6th-Order") 43 | { 44 | test_approx (all_floats, y_exact, [] (auto x) 45 | { return math_approx::exp<6> (x); }, 46 | 6.0e-7f, 47 | 10); 48 | } 49 | SECTION ("6th-Order (C1-cont)") 50 | { 51 | test_approx (all_floats, y_exact, [] (auto x) 52 | { return math_approx::exp<6, true> (x); }, 53 | 6.0e-7f, 54 | 10); 55 | } 56 | SECTION ("5th-Order") 57 | { 58 | test_approx (all_floats, y_exact, [] (auto x) 59 | { return math_approx::exp<5> (x); }, 60 | 7.5e-7f, 61 | 15); 62 | } 63 | SECTION ("5th-Order (C1-cont)") 64 | { 65 | test_approx (all_floats, y_exact, [] (auto x) 66 | { return math_approx::exp<5, true> (x); }, 67 | 9.0e-7f, 68 | 15); 69 | } 70 | SECTION ("4th-Order") 71 | { 72 | test_approx (all_floats, y_exact, [] (auto x) 73 | { return math_approx::exp<4> (x); }, 74 | 4.0e-6f, 75 | 80); 76 | } 77 | SECTION ("4th-Order (C1-cont)") 78 | { 79 | test_approx (all_floats, y_exact, [] (auto x) 80 | { return math_approx::exp<4, true> (x); }, 81 | 1.5e-5f, 82 | 180); 83 | } 84 | SECTION ("3th-Order") 85 | { 86 | test_approx (all_floats, y_exact, [] (auto x) 87 | { return math_approx::exp<3> (x); }, 88 | 1.5e-4f, 89 | 0); 90 | } 91 | SECTION ("3th-Order (C1-cont)") 92 | { 93 | test_approx (all_floats, y_exact, [] (auto x) 94 | { return math_approx::exp<3, true> (x); }, 95 | 6.5e-4f, 96 | 0); 97 | } 98 | } 99 | 100 | TEMPLATE_TEST_CASE ("Exp2 Approx Test", "", float, double) 101 | { 102 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 2.5e-1f); 103 | const auto y_exact = test_helpers::compute_all (all_floats, 104 | [] (auto x) 105 | { 106 | return std::exp2 (x); 107 | }); 108 | 109 | SECTION ("6th-Order") 110 | { 111 | test_approx (all_floats, y_exact, [] (auto x) 112 | { return math_approx::exp2<6> (x); }, 113 | 3.0e-7f, 114 | 4); 115 | } 116 | SECTION ("6th-Order (C1-cont)") 117 | { 118 | test_approx (all_floats, y_exact, [] (auto x) 119 | { return math_approx::exp2<6, true> (x); }, 120 | 3.0e-7f, 121 | 4); 122 | } 123 | SECTION ("5th-Order") 124 | { 125 | test_approx (all_floats, y_exact, [] (auto x) 126 | { return math_approx::exp2<5> (x); }, 127 | 4.0e-7f, 128 | 5); 129 | } 130 | SECTION ("5th-Order (C1-cont)") 131 | { 132 | test_approx (all_floats, y_exact, [] (auto x) 133 | { return math_approx::exp2<5, true> (x); }, 134 | 5.0e-7f, 135 | 8); 136 | } 137 | SECTION ("4th-Order") 138 | { 139 | test_approx (all_floats, y_exact, [] (auto x) 140 | { return math_approx::exp2<4> (x); }, 141 | 4.0e-6f, 142 | 70); 143 | } 144 | SECTION ("4th-Order (C1-cont)") 145 | { 146 | test_approx (all_floats, y_exact, [] (auto x) 147 | { return math_approx::exp2<4, true> (x); }, 148 | 1.5e-5f, 149 | 175); 150 | } 151 | SECTION ("3th-Order") 152 | { 153 | test_approx (all_floats, y_exact, [] (auto x) 154 | { return math_approx::exp2<3> (x); }, 155 | 1.5e-4f, 156 | 0); 157 | } 158 | SECTION ("3th-Order (C1-cont)") 159 | { 160 | test_approx (all_floats, y_exact, [] (auto x) 161 | { return math_approx::exp2<3, true> (x); }, 162 | 6.5e-4f, 163 | 0); 164 | } 165 | } 166 | 167 | TEMPLATE_TEST_CASE ("Exp10 Approx Test", "", float, double) 168 | { 169 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 2.5e-1f); 170 | const auto y_exact = test_helpers::compute_all (all_floats, 171 | [] (auto x) 172 | { 173 | return std::pow (10.0f, x); 174 | }); 175 | 176 | SECTION ("6th-Order") 177 | { 178 | test_approx (all_floats, y_exact, [] (auto x) 179 | { return math_approx::exp10<6> (x); }, 180 | 2.0e-6f, 181 | 32); 182 | } 183 | SECTION ("6th-Order (C1-cont)") 184 | { 185 | test_approx (all_floats, y_exact, [] (auto x) 186 | { return math_approx::exp10<6, true> (x); }, 187 | 6.0e-6f, 188 | 32); 189 | } 190 | SECTION ("5th-Order") 191 | { 192 | test_approx (all_floats, y_exact, [] (auto x) 193 | { return math_approx::exp10<5> (x); }, 194 | 2.5e-6f, 195 | 35); 196 | } 197 | SECTION ("5th-Order (C1-cont)") 198 | { 199 | test_approx (all_floats, y_exact, [] (auto x) 200 | { return math_approx::exp10<5, true> (x); }, 201 | 2.5e-6f, 202 | 35); 203 | } 204 | SECTION ("4th-Order") 205 | { 206 | test_approx (all_floats, y_exact, [] (auto x) 207 | { return math_approx::exp10<4> (x); }, 208 | 5.5e-6f, 209 | 90); 210 | } 211 | SECTION ("4th-Order (C1-cont)") 212 | { 213 | test_approx (all_floats, y_exact, [] (auto x) 214 | { return math_approx::exp10<4, true> (x); }, 215 | 1.5e-5f, 216 | 200); 217 | } 218 | SECTION ("3th-Order") 219 | { 220 | test_approx (all_floats, y_exact, [] (auto x) 221 | { return math_approx::exp10<3> (x); }, 222 | 1.5e-4f, 223 | 0); 224 | } 225 | SECTION ("3th-Order (C1-cont)") 226 | { 227 | test_approx (all_floats, y_exact, [] (auto x) 228 | { return math_approx::exp10<3, true> (x); }, 229 | 6.5e-4f, 230 | 0); 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /test/src/reference/polylogarithm.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /** 6 | * Implementations of polylogarithm functions. 7 | * 8 | * Based on the implementations found at: https://github.com/Expander/polylogarithm 9 | */ 10 | namespace polylogarithm 11 | { 12 | /** real polylogarithm with n=2 (dilogarithm). */ 13 | template 14 | inline T Li2 (T x) noexcept 15 | { 16 | constexpr auto PI_ = static_cast (M_PI); 17 | constexpr T P[] = { 18 | (T) 0.9999999999999999502e+0, 19 | (T) -2.6883926818565423430e+0, 20 | (T) 2.6477222699473109692e+0, 21 | (T) -1.1538559607887416355e+0, 22 | (T) 2.0886077795020607837e-1, 23 | (T) -1.0859777134152463084e-2 24 | }; 25 | constexpr T Q[] = { 26 | (T) 1.0000000000000000000e+0, 27 | (T) -2.9383926818565635485e+0, 28 | (T) 3.2712093293018635389e+0, 29 | (T) -1.7076702173954289421e+0, 30 | (T) 4.1596017228400603836e-1, 31 | (T) -3.9801343754084482956e-2, 32 | (T) 8.2743668974466659035e-4 33 | }; 34 | 35 | T y = 0, r = 0, s = 1; 36 | 37 | // transform to [0, 1/2] 38 | if (x < (T) -1) 39 | { 40 | const auto l = std::log ((T) 1 - x); 41 | y = (T) 1 / ((T) 1 - x); 42 | r = -PI_ * PI_ / (T) 6 + l * ((T) 0.5 * l - std::log (-x)); 43 | s = (T) 1; 44 | } 45 | else if (x == (T) -1) 46 | { 47 | return -PI_ * PI_ / (T) 12; 48 | } 49 | else if (x < (T) 0) 50 | { 51 | const auto l = std::log1p (-x); 52 | y = x / (x - (T) 1); 53 | r = (T) -0.5 * l * l; 54 | s = (T) -1; 55 | } 56 | else if (x == (T) 0) 57 | { 58 | return (T) 0; 59 | } 60 | else if (x < (T) 0.5) 61 | { 62 | y = x; 63 | r = (T) 0; 64 | s = (T) 1; 65 | } 66 | else if (x < (T) 1) 67 | { 68 | y = (T) 1 - x; 69 | r = PI_ * PI_ / (T) 6 - std::log (x) * std::log (y); 70 | s = (T) -1; 71 | } 72 | else if (x == (T) 1) 73 | { 74 | return PI_ * PI_ / (T) 6; 75 | } 76 | else if (x < (T) 2) 77 | { 78 | const auto l = std::log (x); 79 | y = (T) 1 - (T) 1 / x; 80 | r = PI_ * PI_ / (T) 6 - l * (std::log (y) + (T) 0.5 * l); 81 | s = (T) 1; 82 | } 83 | else 84 | { 85 | const auto l = std::log (x); 86 | y = (T) 1 / x; 87 | r = PI_ * PI_ / (T) 3 - (T) 0.5 * l * l; 88 | s = (T) -1; 89 | } 90 | 91 | const auto y2 = y * y; 92 | const auto y4 = y2 * y2; 93 | const auto p = P[0] + y * P[1] + y2 * (P[2] + y * P[3]) + y4 * (P[4] + y * P[5]); 94 | const auto q = Q[0] + y * Q[1] + y2 * (Q[2] + y * Q[3]) + y4 * (Q[4] + y * Q[5] + y2 * Q[6]); 95 | 96 | return r + s * y * p / q; 97 | } 98 | } // namespace polylogarithm 99 | -------------------------------------------------------------------------------- /test/src/reference/sincospi.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace sincospi 6 | { 7 | /* Writes result sine result sin(πa) to the location pointed to by sp 8 | Writes result cosine result cos(πa) to the location pointed to by cp 9 | 10 | In exhaustive testing, the maximum error in sine results was 0.96677 ulp, 11 | the maximum error in cosine results was 0.96563 ulp, meaning results are 12 | faithfully rounded. 13 | 14 | Copied from: https://stackoverflow.com/questions/42792939/implementation-of-sinpi-and-cospi-using-standard-c-math-library 15 | */ 16 | void sincospif (float a, float *sp, float *cp) 17 | { 18 | float az, t, c, r, s; 19 | int32_t i; 20 | 21 | az = a * 0.0f; // must be evaluated with IEEE-754 semantics 22 | /* for |a| > 2**24, cospi(a) = 1.0f, but cospi(Inf) = NaN */ 23 | a = (fabsf (a) < 0x1.0p24f) ? a : az; 24 | r = nearbyintf (a + a); // must use IEEE-754 "to nearest" rounding 25 | i = (int32_t)r; 26 | t = fmaf (-0.5f, r, a); 27 | /* compute core approximations */ 28 | s = t * t; 29 | /* Approximate cos(pi*x) for x in [-0.25,0.25] */ 30 | r = 0x1.d9e000p-3f; 31 | r = fmaf (r, s, -0x1.55c400p+0f); 32 | r = fmaf (r, s, 0x1.03c1cep+2f); 33 | r = fmaf (r, s, -0x1.3bd3ccp+2f); 34 | c = fmaf (r, s, 0x1.000000p+0f); 35 | /* Approximate sin(pi*x) for x in [-0.25,0.25] */ 36 | r = -0x1.310000p-1f; 37 | r = fmaf (r, s, 0x1.46737ep+1f); 38 | r = fmaf (r, s, -0x1.4abbfep+2f); 39 | r = (t * s) * r; 40 | s = fmaf (t, 0x1.921fb6p+1f, r); 41 | if (i & 2) { 42 | s = 0.0f - s; // must be evaluated with IEEE-754 semantics 43 | c = 0.0f - c; // must be evaluated with IEEE-754 semantics 44 | } 45 | if (i & 1) { 46 | t = 0.0f - s; // must be evaluated with IEEE-754 semantics 47 | s = c; 48 | c = t; 49 | } 50 | /* IEEE-754: sinPi(+n) is +0 and sinPi(-n) is -0 for positive integers n */ 51 | if (a == floorf (a)) s = az; 52 | *sp = s; 53 | *cp = c; 54 | } 55 | 56 | float sin2pi (float x) 57 | { 58 | float s, c; 59 | sincospif (2.0f * x, &s, &c); 60 | return s; 61 | } 62 | 63 | float cos2pi (float x) 64 | { 65 | float s, c; 66 | sincospif (2.0f * x, &s, &c); 67 | return c; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /test/src/reference/toms917.hpp: -------------------------------------------------------------------------------- 1 | # include 2 | # include 3 | # include 4 | # include 5 | # include 6 | # include 7 | # include 8 | # include 9 | 10 | namespace toms917 11 | { 12 | using namespace std; 13 | // 14 | // DBL_EPSILON, provided by , is twice the machine epsilon for 15 | // double precision arithmetic. 16 | // 17 | # define TWOITERTOL DBL_EPSILON 18 | 19 | //****************************************************************************80 20 | 21 | inline int wrightomega_ext ( complex z, complex &w, 22 | complex &e, complex &r, complex &cond ) 23 | 24 | //****************************************************************************80 25 | // 26 | // Purpose: 27 | // 28 | // WRIGHTOMEGA_EXT computes the Wright Omega function with extra information. 29 | // 30 | // Discussion: 31 | // 32 | // WRIGHTOMEGA_EXT is the extended routine for evaluating the Wright 33 | // Omega function with the option of extracting the last update step, 34 | // the penultimate residual and the condition number estimate. 35 | // 36 | // Modified: 37 | // 38 | // 14 May 2016 39 | // 40 | // Author: 41 | // 42 | // Piers Lawrence, Robert Corless, David Jeffrey 43 | // 44 | // Reference: 45 | // 46 | // Piers Lawrence, Robert Corless, David Jeffrey, 47 | // Algorithm 917: Complex Double-Precision Evaluation of the Wright Omega 48 | // Function, 49 | // ACM Transactions on Mathematical Software, 50 | // Volume 38, Number 3, Article 20, April 2012, 17 pages. 51 | // 52 | // Parameters: 53 | // 54 | // Input, complex Z, value at which to evaluate Wrightomega(). 55 | // 56 | // Output, complex &W, the value of Wrightomega(z). 57 | // 58 | // Output, complex &E, the last update step in the iterative scheme. 59 | // 60 | // Output, complex &R, the penultimate residual, 61 | // r_k = z - w_k - log(w_k) 62 | // 63 | // Output, complex &COND, the condition number estimate. 64 | // 65 | // Output, int WRIGHTOMEGA_EXT, error flag; 66 | // 0, successful computation. 67 | // nonzero, the computation failed. 68 | // 69 | { 70 | double near; 71 | double pi = M_PI; 72 | complex pz; 73 | double s = 1.0; 74 | complex t; 75 | complex wp1; 76 | double x; 77 | double y; 78 | double ympi; 79 | double yppi; 80 | // 81 | // Extract real and imaginary parts of Z. 82 | // 83 | x = real ( z ); 84 | y = imag ( z ); 85 | // 86 | // Compute if we are near the branch cuts. 87 | // 88 | ympi = y - pi; 89 | yppi = y + pi; 90 | near = 0.01; 91 | // 92 | // Test for floating point exceptions: 93 | // 94 | 95 | // 96 | // NaN output for NaN input. 97 | // 98 | if ( isnan ( x ) || isnan ( y ) ) 99 | { 100 | // w = complex ( ( 0.0 / 0.0 ), ( 0.0 / 0.0 ) ); 101 | // e = complex ( 0.0, 0.0 ); 102 | // r = complex ( 0.0, 0.0 ); 103 | return 0; 104 | } 105 | // 106 | // Signed zeros between branches. 107 | // 108 | else if ( isinf ( x ) && ( x < 0.0 ) && ( - pi < y ) && ( y <= pi ) ) 109 | { 110 | if ( fabs ( y ) <= pi / 2.0 ) 111 | { 112 | w = + 0.0; 113 | } 114 | else 115 | { 116 | w = - 0.0; 117 | } 118 | 119 | if ( 0.0 <= y ) 120 | { 121 | w = w + complex ( 0.0, 0.0 ); 122 | } 123 | else 124 | { 125 | w = w + complex ( 0.0, - 1.0 * 0.0 ); 126 | } 127 | 128 | e = complex ( 0.0, 0.0 ); 129 | r = complex ( 0.0, 0.0 ); 130 | return 0; 131 | } 132 | // 133 | // Asymptotic for large z. 134 | // 135 | else if ( isinf ( x ) || isinf ( y ) ) 136 | { 137 | w = complex ( x, y ); 138 | e = complex ( 0.0, 0.0 ); 139 | r = complex ( 0.0, 0.0 ); 140 | return 0; 141 | } 142 | // 143 | // Test if exactly on the singular points. 144 | // 145 | if ( ( x == - 1.0 ) && ( fabs ( y ) == pi ) ) 146 | { 147 | w = complex ( - 1.0, 0.0 ); 148 | e = complex ( 0.0, 0.0 ); 149 | r = complex ( 0.0, 0.0 ); 150 | return 0; 151 | } 152 | // 153 | // Choose approximation based on region. 154 | // 155 | 156 | // 157 | // Region 1: upper branch point. 158 | // Series about z=-1+Pi*I. 159 | // 160 | if ( ( - 2.0 < x && x <= 1.0 && 1.0 < y && y < 2.0 * pi ) ) 161 | { 162 | pz = conj ( sqrt ( conj ( 2.0 * ( z + complex ( 1.0, - pi ) ) ) ) ); 163 | 164 | w = - 1.0 165 | + ( complex ( 0.0, 1.0 ) 166 | + ( 1.0 / 3.0 167 | + ( - 1.0 / 36.0 * complex ( 0.0, 1.0 ) 168 | + ( 1.0 / 270.0 + 1.0 / 4320.0 * complex ( 0.0, 1.0 ) * pz ) 169 | * pz ) * pz ) * pz ) * pz; 170 | } 171 | // 172 | // Region 2: lower branch point. 173 | // Series about z=-1-Pi*I. 174 | // 175 | else if ( ( - 2.0 < x && x <= 1.0 && - 2.0 * pi < y && y <- 1.0 ) ) 176 | { 177 | pz = conj ( sqrt ( conj ( 2.0 * ( z + 1.0 + complex ( 0.0, pi ) ) ) ) ); 178 | 179 | w = - 1.0 180 | + ( - complex ( 0.0, 1.0 ) + ( 1.0 / 3.0 181 | + ( 1.0 / 36.0 * complex ( 0.0, 1.0 ) 182 | + ( 1.0 / 270.0 - 1.0 / 4320.0 * complex ( 0.0, 1.0 ) * pz ) 183 | * pz ) * pz ) * pz ) * pz; 184 | } 185 | // 186 | // Region 3: between branch cuts. 187 | // Series: About -infinity. 188 | // 189 | else if ( x <= - 2.0 && - pi < y && y <= pi ) 190 | { 191 | pz = exp ( z ); 192 | w = ( 1.0 193 | + ( - 1.0 194 | + ( 3.0 / 2.0 195 | + ( - 8.0 / 3.0 196 | + 125.0 / 24.0 * pz ) * pz ) * pz ) * pz ) * pz; 197 | } 198 | // 199 | // Region 4: Mushroom. 200 | // Series about z=1. 201 | // 202 | else if ( ( ( - 2.0 < x ) && ( x <= 1.0 ) && ( - 1.0 <= y ) && ( y <= 1.0 ) ) 203 | || ( ( - 2.0 < x ) && ( x - 1.0 ) * ( x - 1.0 ) + y * y <= pi * pi ) ) 204 | { 205 | pz = z - 1.0; 206 | w = 1.0 / 2.0 + 1.0 / 2.0 * z 207 | + ( 1.0 / 16.0 208 | + ( - 1.0 / 192.0 209 | + ( - 1.0 / 3072.0 + 13.0 / 61440.0 * pz ) * pz ) * pz ) * pz * pz; 210 | } 211 | // 212 | // Region 5: Top wing. 213 | // Negative log series. 214 | // 215 | else if ( x <= - 1.05 && pi < y && y - pi <= - 0.75 * ( x + 1.0 ) ) 216 | { 217 | t = z - complex ( 0.0, pi ); 218 | pz = log ( - t ); 219 | w = ( ( 1.0 + ( - 3.0 / 2.0 + 1.0 / 3.0 * pz ) * pz ) * pz 220 | + ( ( -1.0 + 1.0 / 2.0 * pz ) * pz + ( pz + ( - pz + t ) * t ) * t ) * t ) 221 | / ( t * t * t ); 222 | } 223 | // 224 | // Region 6: Bottom wing. 225 | // Negative log series. 226 | // 227 | else if ( x <= - 1.05 && 0.75 * ( x + 1.0 ) < y + pi && y + pi <= 0.0 ) 228 | { 229 | t = z + complex ( 0.0, pi ); 230 | pz = log ( - t ); 231 | w = ( ( 1.0 + ( - 3.0 / 2.0 + 1.0 / 3.0 * pz ) * pz ) * pz 232 | + ( ( - 1.0 + 1.0 / 2.0 * pz ) * pz + ( pz + ( - pz + t ) * t ) * t ) * t ) 233 | / ( t * t * t ); 234 | } 235 | // 236 | // Region 7: Everywhere else. 237 | // Series solution about infinity. 238 | // 239 | else 240 | { 241 | pz = log ( z ); 242 | w = ( ( 1.0 + ( - 3.0 / 2.0 + 1.0 / 3.0 * pz ) * pz ) * pz 243 | + ( ( - 1.0 + 1.0 / 2.0 * pz ) * pz + ( pz + ( - pz + z ) * z ) * z ) * z ) 244 | / ( z * z * z ); 245 | } 246 | // 247 | // Regularize if near branch cuts. 248 | /// 249 | if ( x <= - 1.0 + near && ( fabs ( ympi ) <= near || fabs ( yppi ) <= near ) ) 250 | { 251 | s = - 1.0; 252 | if ( fabs ( ympi ) <= near ) 253 | { 254 | // 255 | // Recompute ympi with directed rounding. 256 | // 257 | fesetround ( FE_UPWARD ); 258 | ympi = y - pi; 259 | 260 | if ( ympi <= 0.0 ) 261 | { 262 | fesetround ( FE_DOWNWARD ); 263 | ympi = y - pi; 264 | } 265 | 266 | z = complex ( x, ympi ); 267 | // 268 | // Return rounding to default. 269 | // 270 | fesetround ( FE_TONEAREST ); 271 | } 272 | else 273 | { 274 | // 275 | // Recompute yppi with directed rounding. 276 | // 277 | fesetround ( FE_UPWARD ); 278 | yppi = y + pi; 279 | 280 | if ( yppi <= 0.0 ) 281 | { 282 | fesetround ( FE_DOWNWARD ); 283 | yppi = y + pi; 284 | } 285 | 286 | z = complex ( x, yppi ); 287 | // 288 | // Return rounding to default. 289 | // 290 | fesetround ( FE_TONEAREST ); 291 | } 292 | } 293 | // 294 | // Iteration one. 295 | // 296 | w = s * w; 297 | r = z - s * w - log ( w ); 298 | wp1 = s * w + 1.0; 299 | e = r / wp1 * ( 2.0 * wp1 * ( wp1 + 2.0 / 3.0 * r ) - r ) 300 | / ( 2.0 * wp1 * ( wp1 + 2.0 / 3.0 * r ) - 2.0 * r ); 301 | w = w * ( 1.0 + e ); 302 | // 303 | // Iteration two. 304 | // 305 | if ( abs ( ( 2.0 * w * w - 8.0 * w - 1.0 ) * pow ( abs ( r ), 4.0 ) ) 306 | >= TWOITERTOL * 72.0 * pow ( abs ( wp1 ), 6.0 ) ) 307 | { 308 | r = z - s * w - log ( w ); 309 | wp1 = s * w + 1.0; 310 | e = r / wp1 * ( 2.0 * wp1 * ( wp1 + 2.0 / 3.0 * r ) - r ) 311 | / ( 2.0 * wp1 * ( wp1 + 2.0 / 3.0 * r ) - 2.0 * r ); 312 | w = w * ( 1.0 + e ); 313 | } 314 | // 315 | // Undo regularization. 316 | // 317 | w = s * w; 318 | // 319 | // Provide condition number estimate. 320 | // 321 | cond = z / ( 1.0 + w ); 322 | 323 | return 0; 324 | } 325 | //****************************************************************************80 326 | 327 | //****************************************************************************80 328 | 329 | inline complex wrightomega ( complex z ) 330 | 331 | //****************************************************************************80 332 | // 333 | // Purpose: 334 | // 335 | // WRIGHTOMEGA is the simple routine for evaluating the Wright Omega function. 336 | // 337 | // Discussion: 338 | // 339 | // This function is called by: 340 | // 341 | // w = wrightomega ( z ) 342 | // 343 | // This function makes a call to the more powerful wrightomega_ext() function. 344 | // 345 | // Modified: 346 | // 347 | // 14 May 2016 348 | // 349 | // Author: 350 | // 351 | // Piers Lawrence, Robert Corless, David Jeffrey 352 | // 353 | // Reference: 354 | // 355 | // Piers Lawrence, Robert Corless, David Jeffrey, 356 | // Algorithm 917: Complex Double-Precision Evaluation of the Wright Omega 357 | // Function, 358 | // ACM Transactions on Mathematical Software, 359 | // Volume 38, Number 3, Article 20, April 2012, 17 pages. 360 | // 361 | // Parameters: 362 | // 363 | // Input, complex Z, the argument. 364 | // 365 | // Output, complex WRIGHTOMEGA, the value of the Wright Omega 366 | // function of Z. 367 | // 368 | { 369 | complex cond; 370 | complex e; 371 | complex r; 372 | complex w; 373 | 374 | wrightomega_ext ( z, w, e, r, cond ); 375 | 376 | return w; 377 | } 378 | 379 | inline float wrightomega ( float z ) 380 | { 381 | return (float) std::real (wrightomega (std::complex { double (z), 0.0 })); 382 | } 383 | } 384 | -------------------------------------------------------------------------------- /test/src/sigmoid_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test_helpers.hpp" 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | TEST_CASE ("Sigmoid Approx Test") 8 | { 9 | #if ! defined(WIN32) 10 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f); 11 | #else 12 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 13 | #endif 14 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 15 | { return 1.0f / (1.0f + std::exp (-x)); }); 16 | 17 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound) 18 | { 19 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 20 | 21 | const auto error = test_helpers::compute_error (y_exact, y_approx); 22 | const auto max_error = test_helpers::abs_max (error); 23 | 24 | std::cout << max_error << std::endl; 25 | REQUIRE (std::abs (max_error) < err_bound); 26 | }; 27 | 28 | SECTION ("9th-Order") 29 | { 30 | test_approx ([] (auto x) 31 | { return math_approx::sigmoid<9> (x); }, 32 | 6.5e-7f); 33 | } 34 | SECTION ("7th-Order") 35 | { 36 | test_approx ([] (auto x) 37 | { return math_approx::sigmoid<7> (x); }, 38 | 7.0e-6f); 39 | } 40 | SECTION ("5th-Order") 41 | { 42 | test_approx ([] (auto x) 43 | { return math_approx::sigmoid<5> (x); }, 44 | 1.0e-4f); 45 | } 46 | SECTION ("3th-Order") 47 | { 48 | test_approx ([] (auto x) 49 | { return math_approx::sigmoid<3> (x); }, 50 | 2.0e-3f); 51 | } 52 | } 53 | 54 | TEST_CASE ("Sigmoid (Exp) Approx Test") 55 | { 56 | #if ! defined(WIN32) 57 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f); 58 | #else 59 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 60 | #endif 61 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 62 | { return 1.0f / (1.0f + std::exp (-x)); }); 63 | 64 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) 65 | { 66 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 67 | 68 | const auto error = test_helpers::compute_error (y_exact, y_approx); 69 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 70 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 71 | 72 | const auto max_error = test_helpers::abs_max (error); 73 | const auto max_rel_error = test_helpers::abs_max (rel_error); 74 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 75 | 76 | std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; 77 | REQUIRE (std::abs (max_error) < err_bound); 78 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 79 | if (ulp_bound > 0) 80 | REQUIRE (max_ulp_error < ulp_bound); 81 | }; 82 | 83 | SECTION ("6th-Order (Exp)") 84 | { 85 | test_approx ([] (auto x) 86 | { return math_approx::sigmoid_exp<6> (x); }, 87 | 1.5e-7f, 88 | 6.5e-7f, 89 | 12); 90 | } 91 | 92 | SECTION ("5th-Order (Exp)") 93 | { 94 | test_approx ([] (auto x) 95 | { return math_approx::sigmoid_exp<5> (x); }, 96 | 1.5e-7f, 97 | 7.5e-7f, 98 | 12); 99 | } 100 | 101 | SECTION ("4th-Order (Exp)") 102 | { 103 | test_approx ([] (auto x) 104 | { return math_approx::sigmoid_exp<4> (x); }, 105 | 9.5e-7f, 106 | 4.5e-6f, 107 | 65); 108 | } 109 | 110 | SECTION ("3rd-Order (Exp)") 111 | { 112 | test_approx ([] (auto x) 113 | { return math_approx::sigmoid_exp<3> (x); }, 114 | 3.0e-4f, 115 | 1.5e-4f, 116 | 0); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /test/src/test_helpers.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace test_helpers 12 | { 13 | template 14 | inline auto all_32_bit_floats (float begin, float end, float tol = 1.0e-10f) 15 | { 16 | std::vector vec; 17 | vec.reserve (1 << 20); 18 | begin = (float) vec.emplace_back (static_cast (begin)); 19 | while (begin < end) 20 | { 21 | if (std::abs (begin) < tol) 22 | { 23 | begin = (float) vec.emplace_back (static_cast (0)); 24 | begin = (float) vec.emplace_back (static_cast (tol)); 25 | } 26 | begin = (float) vec.emplace_back (static_cast (std::nextafter (begin, end))); 27 | } 28 | 29 | return vec; 30 | } 31 | 32 | template 33 | auto compute_all (std::span all_floats, 34 | F&& f) 35 | { 36 | std::vector y; 37 | y.resize (all_floats.size()); 38 | for (size_t i = 0; i < all_floats.size(); ++i) 39 | y[i] = f (all_floats[i]); 40 | 41 | return y; 42 | } 43 | 44 | template 45 | inline std::vector compute_error (std::span actual, std::span approx) 46 | { 47 | std::vector err; 48 | err.resize (actual.size()); 49 | for (size_t i = 0; i < actual.size(); ++i) 50 | err[i] = (actual[i] - approx[i]); 51 | return err; 52 | } 53 | 54 | template 55 | inline std::vector compute_rel_error (std::span actual, std::span approx) 56 | { 57 | std::vector err; 58 | err.resize (actual.size()); 59 | for (size_t i = 0; i < actual.size(); ++i) 60 | err[i] = (actual[i] - approx[i]) / (actual[i] + std::numeric_limits::epsilon()); 61 | return err; 62 | } 63 | 64 | // mostly borrowed from Catch2 65 | inline uint32_t f32_ulp_dist (float lhs, float rhs) // NOLINT 66 | { 67 | // We want X == Y to imply 0 ULP distance even if X and Y aren't 68 | // bit-equal (-0 and 0), or X - Y != 0 (same sign infinities). 69 | if (lhs == rhs) 70 | return 0; 71 | 72 | // We need a properly typed positive zero for type inference. 73 | static constexpr float positive_zero {}; 74 | 75 | // We want to ensure that +/- 0 is always represented as positive zero 76 | if (lhs == positive_zero) 77 | lhs = positive_zero; 78 | if (rhs == positive_zero) 79 | rhs = positive_zero; 80 | 81 | // If arguments have different signs, we can handle them by summing 82 | // how far are they from 0 each. 83 | if (std::signbit (lhs) != std::signbit (rhs)) 84 | { 85 | return f32_ulp_dist (std::abs (lhs), positive_zero) 86 | + f32_ulp_dist (std::abs (rhs), positive_zero); 87 | } 88 | 89 | // get the bit pattern of 'x' 90 | const auto f32_to_bits = [] (float x) -> uint32_t 91 | { 92 | uint32_t u; 93 | memcpy (&u, &x, 4); 94 | return u; 95 | }; 96 | 97 | // When both lhs and rhs are of the same sign, we can just 98 | // read the numbers bitwise as integers, and then subtract them 99 | // (assuming IEEE). 100 | uint32_t lc = f32_to_bits (lhs); 101 | uint32_t rc = f32_to_bits (rhs); 102 | 103 | // The ulp distance between two numbers is symmetric, so to avoid 104 | // dealing with overflows we want the bigger converted number on the lhs 105 | if (lc < rc) 106 | std::swap (lc, rc); 107 | 108 | return lc - rc; 109 | }; 110 | 111 | inline auto compute_ulp_error (std::span actual, std::span approx) 112 | { 113 | 114 | 115 | std::vector err; 116 | err.resize (actual.size()); 117 | for (size_t i = 0; i < actual.size(); ++i) 118 | err[i] = f32_ulp_dist (actual[i], approx[i]); 119 | return err; 120 | } 121 | 122 | template 123 | inline T abs_max (std::span x) 124 | { 125 | const auto [min, max] = std::minmax_element (x.begin(), x.end()); 126 | 127 | if (std::abs (*min) > std::abs (*max)) 128 | return *min; 129 | return *max; 130 | } 131 | } // namespace test_helpers 132 | -------------------------------------------------------------------------------- /test/src/trig_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test_helpers.hpp" 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | TEST_CASE ("Sine Approx Test") 8 | { 9 | #if ! defined(WIN32) 10 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f); 11 | #else 12 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 13 | #endif 14 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 15 | { return std::sin (x); }); 16 | 17 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound) 18 | { 19 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 20 | 21 | const auto error = test_helpers::compute_error (y_exact, y_approx); 22 | const auto max_error = test_helpers::abs_max (error); 23 | 24 | std::cout << max_error << std::endl; 25 | REQUIRE (std::abs (max_error) < err_bound); 26 | }; 27 | 28 | SECTION ("9th-Order") 29 | { 30 | test_approx ([] (auto x) 31 | { return math_approx::sin<9> (x); }, 32 | 8.5e-7f); 33 | } 34 | SECTION ("7th-Order") 35 | { 36 | test_approx ([] (auto x) 37 | { return math_approx::sin<7> (x); }, 38 | 1.8e-5f); 39 | } 40 | SECTION ("5th-Order") 41 | { 42 | test_approx ([] (auto x) 43 | { return math_approx::sin<5> (x); }, 44 | 7.5e-4f); 45 | } 46 | } 47 | 48 | TEST_CASE ("Cosine Approx Test") 49 | { 50 | #if ! defined(WIN32) 51 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f); 52 | #else 53 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 54 | #endif 55 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 56 | { return std::cos (x); }); 57 | 58 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound) 59 | { 60 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 61 | 62 | const auto error = test_helpers::compute_error (y_exact, y_approx); 63 | const auto max_error = test_helpers::abs_max (error); 64 | 65 | std::cout << max_error << std::endl; 66 | REQUIRE (std::abs (max_error) < err_bound); 67 | }; 68 | 69 | SECTION ("9th-Order") 70 | { 71 | test_approx ([] (auto x) 72 | { return math_approx::cos<9> (x); }, 73 | 7.5e-7f); 74 | } 75 | SECTION ("7th-Order") 76 | { 77 | test_approx ([] (auto x) 78 | { return math_approx::cos<7> (x); }, 79 | 1.8e-5f); 80 | } 81 | SECTION ("5th-Order") 82 | { 83 | test_approx ([] (auto x) 84 | { return math_approx::cos<5> (x); }, 85 | 7.5e-4f); 86 | } 87 | } 88 | 89 | TEST_CASE ("Tan Approx Test") 90 | { 91 | #if ! defined(WIN32) 92 | const auto all_floats = test_helpers::all_32_bit_floats (-1.5f, 1.5f, 1.0e-3f); 93 | #else 94 | const auto all_floats = test_helpers::all_32_bit_floats (-1.5f, 1.5f, 1.0e-1f); 95 | #endif 96 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 97 | { return std::tan (x); }); 98 | 99 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) 100 | { 101 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 102 | 103 | const auto error = test_helpers::compute_error (y_exact, y_approx); 104 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 105 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 106 | 107 | const auto max_error = test_helpers::abs_max (error); 108 | const auto max_rel_error = test_helpers::abs_max (rel_error); 109 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 110 | 111 | std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; 112 | REQUIRE (std::abs (max_error) < err_bound); 113 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 114 | if (ulp_bound > 0) 115 | REQUIRE (max_ulp_error < ulp_bound); 116 | }; 117 | 118 | SECTION ("13th-Order") 119 | { 120 | test_approx ([] (auto x) 121 | { return math_approx::tan<13> (x); }, 122 | 5.5e-5f, 123 | 6.0e-5f, 124 | 520); 125 | } 126 | SECTION ("11th-Order") 127 | { 128 | test_approx ([] (auto x) 129 | { return math_approx::tan<11> (x); }, 130 | 9.5e-5f, 131 | 6.0e-5f, 132 | 520); 133 | } 134 | SECTION ("9th-Order") 135 | { 136 | test_approx ([] (auto x) 137 | { return math_approx::tan<9> (x); }, 138 | 0.0009f, 139 | 6.0e-5f, 140 | 900); 141 | } 142 | SECTION ("7th-Order") 143 | { 144 | test_approx ([] (auto x) 145 | { return math_approx::tan<7> (x); }, 146 | 0.015f, 147 | 0.0009f, 148 | 0); 149 | } 150 | SECTION ("5th-Order") 151 | { 152 | test_approx ([] (auto x) 153 | { return math_approx::tan<5> (x); }, 154 | 0.14f, 155 | 0.01f, 156 | 0); 157 | } 158 | SECTION ("3rd-Order") 159 | { 160 | test_approx ([] (auto x) 161 | { return math_approx::tan<3> (x); }, 162 | 1.5f, 163 | 0.09f, 164 | 0); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /test/src/trig_turns_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test_helpers.hpp" 2 | #include 3 | #include 4 | 5 | #include 6 | #include "reference/sincospi.hpp" 7 | 8 | TEST_CASE ("Sine Approx Test") 9 | { 10 | #if ! defined(WIN32) 11 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f); 12 | #else 13 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 14 | #endif 15 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 16 | { return sincospi::sin2pi (x); }); 17 | 18 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float rel_err_bound, uint32_t ulp_err_bound) 19 | { 20 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 21 | 22 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 23 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 24 | 25 | const auto max_rel_error = test_helpers::abs_max (rel_error); 26 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 27 | 28 | std::cout << max_rel_error << ", " << max_ulp_error << std::endl; 29 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 30 | REQUIRE (max_ulp_error < ulp_err_bound); 31 | }; 32 | 33 | SECTION ("11th-Order") 34 | { 35 | test_approx ([] (auto x) 36 | { return math_approx::sin_turns<11> (x); }, 37 | 5.0e-7f, 38 | 6); 39 | } 40 | SECTION ("9th-Order") 41 | { 42 | test_approx ([] (auto x) 43 | { return math_approx::sin_turns<9> (x); }, 44 | 2.0e-6f, 45 | 14); 46 | } 47 | SECTION ("7th-Order") 48 | { 49 | test_approx ([] (auto x) 50 | { return math_approx::sin_turns<7> (x); }, 51 | 9.0e-5f, 52 | 490); 53 | } 54 | SECTION ("5th-Order") 55 | { 56 | test_approx ([] (auto x) 57 | { return math_approx::sin_turns<5> (x); }, 58 | 5.0e-3f, 59 | 22'000); 60 | } 61 | } 62 | 63 | TEST_CASE ("Cosine Approx Test") 64 | { 65 | #if ! defined(WIN32) 66 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-3f); 67 | #else 68 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); 69 | #endif 70 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 71 | { return sincospi::cos2pi (x); }); 72 | 73 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float rel_err_bound, uint32_t ulp_err_bound) 74 | { 75 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 76 | 77 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 78 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 79 | 80 | const auto max_rel_error = test_helpers::abs_max (rel_error); 81 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 82 | 83 | std::cout << max_rel_error << ", " << max_ulp_error << std::endl; 84 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 85 | REQUIRE (max_ulp_error < ulp_err_bound); 86 | }; 87 | 88 | SECTION ("11th-Order") 89 | { 90 | test_approx ([] (auto x) 91 | { return math_approx::cos_turns<11> (x); }, 92 | 5.0e-7f, 93 | 6); 94 | } 95 | SECTION ("9th-Order") 96 | { 97 | test_approx ([] (auto x) 98 | { return math_approx::cos_turns<9> (x); }, 99 | 2.0e-6f, 100 | 10); 101 | } 102 | SECTION ("7th-Order") 103 | { 104 | test_approx ([] (auto x) 105 | { return math_approx::cos_turns<7> (x); }, 106 | 6.0e-5f, 107 | 270); 108 | } 109 | SECTION ("5th-Order") 110 | { 111 | test_approx ([] (auto x) 112 | { return math_approx::cos_turns<5> (x); }, 113 | 3.0e-3f, 114 | 14'000); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /test/src/wright_omega_approx_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test_helpers.hpp" 2 | #include 3 | #include 4 | 5 | #include 6 | #include "reference/toms917.hpp" 7 | 8 | TEST_CASE ("Wright-Omega Approx Test") 9 | { 10 | #if ! defined(WIN32) 11 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 30.0f, 1.0e-1f); 12 | #else 13 | const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 30.0f, 5.0e-1f); 14 | #endif 15 | const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) 16 | { return toms917::wrightomega (x); }); 17 | 18 | const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) 19 | { 20 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 21 | 22 | const auto error = test_helpers::compute_error (y_exact, y_approx); 23 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 24 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 25 | 26 | const auto max_error = test_helpers::abs_max (error); 27 | const auto max_rel_error = test_helpers::abs_max (rel_error); 28 | const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); 29 | 30 | std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; 31 | REQUIRE (std::abs (max_error) < err_bound); 32 | REQUIRE (std::abs (max_rel_error) < rel_err_bound); 33 | if (ulp_bound > 0) 34 | REQUIRE (max_ulp_error < ulp_bound); 35 | }; 36 | 37 | SECTION ("Iter-3_Poly-3_LogExp-5") 38 | { 39 | test_approx ([] (auto x) 40 | { return math_approx::wright_omega<3, 3, 5> (x); }, 41 | 2.0e-6f, 42 | 1.5e-6f, 43 | 20); 44 | } 45 | SECTION ("Iter-3_Poly-3") 46 | { 47 | test_approx ([] (auto x) 48 | { return math_approx::wright_omega<3, 3> (x); }, 49 | 4.0e-6f, 50 | 4.5e-6f, 51 | 70); 52 | } 53 | SECTION ("Iter-2_Poly-5") 54 | { 55 | test_approx ([] (auto x) 56 | { return math_approx::wright_omega<2, 5> (x); }, 57 | 7.0e-6f, 58 | 1.5e-4f, 59 | 0); 60 | } 61 | SECTION ("Iter-2_Poly-3") 62 | { 63 | test_approx ([] (auto x) 64 | { return math_approx::wright_omega<2, 3> (x); }, 65 | 1.5e-5f, 66 | 2.0e-4f, 67 | 0); 68 | } 69 | SECTION ("Iter-2_Poly-3_LogExp-3") 70 | { 71 | test_approx ([] (auto x) 72 | { return math_approx::wright_omega<2, 3, 3> (x); }, 73 | 1.0e-4f, 74 | 3.0e-4f, 75 | 0); 76 | } 77 | SECTION ("Iter-1_Poly-5") 78 | { 79 | test_approx ([] (auto x) 80 | { return math_approx::wright_omega<1, 5> (x); }, 81 | 3.0e-3f, 82 | 5.1e-2f, 83 | 0); 84 | } 85 | SECTION ("Iter-1_Poly-3") 86 | { 87 | test_approx ([] (auto x) 88 | { return math_approx::wright_omega<1, 3> (x); }, 89 | 3.5e-3f, 90 | 5.5e-2f, 91 | 0); 92 | } 93 | SECTION ("Iter-0_Poly-5") 94 | { 95 | test_approx ([] (auto x) 96 | { return math_approx::wright_omega<0, 5> (x); }, 97 | 5.5e-2f, 98 | 2.0f, 99 | 0); 100 | } 101 | SECTION ("Iter-0_Poly-3") 102 | { 103 | test_approx ([] (auto x) 104 | { return math_approx::wright_omega<0, 3> (x); }, 105 | 6.0e-2f, 106 | 2.0f, 107 | 0); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(plotter) 2 | add_subdirectory(bench) 3 | -------------------------------------------------------------------------------- /tools/bench/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if($ENV{CI}) 2 | message(STATUS "Configuring in CI, skipping benchmarks...") 3 | return() 4 | endif() 5 | 6 | CPMAddPackage( 7 | NAME benchmark 8 | GITHUB_REPOSITORY google/benchmark 9 | VERSION 1.9.4 10 | OPTIONS "BENCHMARK_ENABLE_TESTING Off" 11 | ) 12 | 13 | function(setup_bench target src) 14 | add_executable(${target} ${src}) 15 | target_link_libraries(${target} PRIVATE benchmark::benchmark math_approx) 16 | target_compile_features(${target} PUBLIC cxx_std_20) 17 | target_compile_definitions(${target} PUBLIC _USE_MATH_DEFINES=1) 18 | endfunction(setup_bench) 19 | 20 | setup_bench(trig_approx_bench trig_bench.cpp) 21 | setup_bench(inverse_trig_approx_bench inverse_trig_bench.cpp) 22 | setup_bench(pow_approx_bench pow_bench.cpp) 23 | setup_bench(log_approx_bench log_bench.cpp) 24 | setup_bench(hyperbolic_trig_approx_bench hyperbolic_trig_bench.cpp) 25 | setup_bench(inverse_hyperbolic_trig_approx_bench inverse_hyperbolic_trig_bench.cpp) 26 | setup_bench(sigmoid_approx_bench sigmoid_bench.cpp) 27 | setup_bench(wright_omega_approx_bench wright_omega_bench.cpp) 28 | setup_bench(polylog_approx_bench polylog_bench.cpp) 29 | setup_bench(trig_turns_approx_bench trig_turns_bench.cpp) 30 | -------------------------------------------------------------------------------- /tools/bench/hyperbolic_trig_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static constexpr size_t N = 2000; 5 | const auto data = [] 6 | { 7 | std::vector x; 8 | x.resize (N, 0.0f); 9 | for (size_t i = 0; i < N; ++i) 10 | x[i] = -10.0f + 20.0f * (float) i / (float) N; 11 | return x; 12 | }(); 13 | 14 | #define HTRIG_BENCH(name, func) \ 15 | void name (benchmark::State& state) \ 16 | { \ 17 | for (auto _ : state) \ 18 | { \ 19 | for (auto& x : data) \ 20 | { \ 21 | auto y = func (x); \ 22 | benchmark::DoNotOptimize (y); \ 23 | } \ 24 | } \ 25 | } \ 26 | BENCHMARK (name); 27 | HTRIG_BENCH (sinh_std, std::sinh) 28 | HTRIG_BENCH (sinh_approx6, math_approx::sinh<6>) 29 | HTRIG_BENCH (sinh_approx5, math_approx::sinh<5>) 30 | HTRIG_BENCH (sinh_approx4, math_approx::sinh<4>) 31 | HTRIG_BENCH (sinh_approx3, math_approx::sinh<3>) 32 | 33 | HTRIG_BENCH (cosh_std, std::sinh) 34 | HTRIG_BENCH (cosh_approx6, math_approx::cosh<6>) 35 | HTRIG_BENCH (cosh_approx5, math_approx::cosh<5>) 36 | HTRIG_BENCH (cosh_approx4, math_approx::cosh<4>) 37 | HTRIG_BENCH (cosh_approx3, math_approx::cosh<3>) 38 | 39 | HTRIG_BENCH (tanh_std, std::tanh) 40 | HTRIG_BENCH (tanh_approx11, math_approx::tanh<11>) 41 | HTRIG_BENCH (tanh_approx9, math_approx::tanh<9>) 42 | HTRIG_BENCH (tanh_approx7, math_approx::tanh<7>) 43 | HTRIG_BENCH (tanh_approx5, math_approx::tanh<5>) 44 | 45 | #define HTRIG_SIMD_BENCH(name, func) \ 46 | void name (benchmark::State& state) \ 47 | { \ 48 | for (auto _ : state) \ 49 | { \ 50 | for (auto& x : data) \ 51 | { \ 52 | auto y = func (xsimd::broadcast (x)); \ 53 | static_assert (std::is_same_v, decltype(y)>); \ 54 | benchmark::DoNotOptimize (y); \ 55 | } \ 56 | } \ 57 | } \ 58 | BENCHMARK (name); 59 | HTRIG_SIMD_BENCH (sinh_xsimd, xsimd::tanh) 60 | HTRIG_SIMD_BENCH (sinh_simd_approx6, math_approx::sinh<6>) 61 | HTRIG_SIMD_BENCH (sinh_simd_approx5, math_approx::sinh<5>) 62 | HTRIG_SIMD_BENCH (sinh_simd_approx4, math_approx::sinh<4>) 63 | HTRIG_SIMD_BENCH (sinh_simd_approx3, math_approx::sinh<3>) 64 | 65 | HTRIG_SIMD_BENCH (cosh_xsimd, xsimd::tanh) 66 | HTRIG_SIMD_BENCH (cosh_simd_approx6, math_approx::cosh<6>) 67 | HTRIG_SIMD_BENCH (cosh_simd_approx5, math_approx::cosh<5>) 68 | HTRIG_SIMD_BENCH (cosh_simd_approx4, math_approx::cosh<4>) 69 | HTRIG_SIMD_BENCH (cosh_simd_approx3, math_approx::cosh<3>) 70 | 71 | HTRIG_SIMD_BENCH (tanh_xsimd, xsimd::tanh) 72 | HTRIG_SIMD_BENCH (tanh_simd_approx11, math_approx::tanh<11>) 73 | HTRIG_SIMD_BENCH (tanh_simd_approx9, math_approx::tanh<9>) 74 | HTRIG_SIMD_BENCH (tanh_simd_approx7, math_approx::tanh<7>) 75 | HTRIG_SIMD_BENCH (tanh_simd_approx5, math_approx::tanh<5>) 76 | 77 | BENCHMARK_MAIN(); 78 | -------------------------------------------------------------------------------- /tools/bench/inverse_hyperbolic_trig_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static constexpr size_t N = 2000; 5 | const auto data_asinh = [] 6 | { 7 | std::vector x; 8 | x.resize (N, 0.0f); 9 | for (size_t i = 0; i < N; ++i) 10 | x[i] = -10.0f + 20.0f * (float) i / (float) N; 11 | return x; 12 | }(); 13 | 14 | const auto data_acosh = [] 15 | { 16 | std::vector x; 17 | x.resize (N, 0.0f); 18 | for (size_t i = 0; i < N; ++i) 19 | x[i] = 1.0f + 9.0f * (float) i / (float) N; 20 | return x; 21 | }(); 22 | 23 | const auto data_atanh = [] 24 | { 25 | std::vector x; 26 | x.resize (N, 0.0f); 27 | for (size_t i = 0; i < N; ++i) 28 | x[i] = -1.0f + 2.0f * (float) i / (float) N; 29 | return x; 30 | }(); 31 | 32 | #define INV_HTRIG_BENCH(name, func, data) \ 33 | void name (benchmark::State& state) \ 34 | { \ 35 | for (auto _ : state) \ 36 | { \ 37 | for (auto& x : data) \ 38 | { \ 39 | auto y = func (x); \ 40 | benchmark::DoNotOptimize (y); \ 41 | } \ 42 | } \ 43 | } \ 44 | BENCHMARK (name); 45 | 46 | INV_HTRIG_BENCH (asinh_std, std::asinh, data_asinh) 47 | INV_HTRIG_BENCH (asinh_approx7, math_approx::asinh<7>, data_asinh) 48 | INV_HTRIG_BENCH (asinh_approx6, math_approx::asinh<6>, data_asinh) 49 | INV_HTRIG_BENCH (asinh_approx5, math_approx::asinh<5>, data_asinh) 50 | INV_HTRIG_BENCH (asinh_approx4, math_approx::asinh<4>, data_asinh) 51 | INV_HTRIG_BENCH (asinh_approx3, math_approx::asinh<3>, data_asinh) 52 | 53 | INV_HTRIG_BENCH (acosh_std, std::acosh, data_acosh) 54 | INV_HTRIG_BENCH (acosh_approx6, math_approx::acosh<6>, data_acosh) 55 | INV_HTRIG_BENCH (acosh_approx5, math_approx::acosh<5>, data_acosh) 56 | INV_HTRIG_BENCH (acosh_approx4, math_approx::acosh<4>, data_acosh) 57 | INV_HTRIG_BENCH (acosh_approx3, math_approx::acosh<3>, data_acosh) 58 | 59 | INV_HTRIG_BENCH (atanh_std, std::atanh, data_atanh) 60 | INV_HTRIG_BENCH (atanh_approx6, math_approx::atanh<6>, data_atanh) 61 | INV_HTRIG_BENCH (atanh_approx5, math_approx::atanh<5>, data_atanh) 62 | INV_HTRIG_BENCH (atanh_approx4, math_approx::atanh<4>, data_atanh) 63 | INV_HTRIG_BENCH (atanh_approx3, math_approx::atanh<3>, data_atanh) 64 | 65 | #define INV_HTRIG_SIMD_BENCH(name, func, data) \ 66 | void name (benchmark::State& state) \ 67 | { \ 68 | for (auto _ : state) \ 69 | { \ 70 | for (auto& x : data) \ 71 | { \ 72 | auto y = func (xsimd::broadcast (x)); \ 73 | static_assert (std::is_same_v, decltype(y)>); \ 74 | benchmark::DoNotOptimize (y); \ 75 | } \ 76 | } \ 77 | } \ 78 | BENCHMARK (name); 79 | 80 | INV_HTRIG_SIMD_BENCH (asinh_xsimd, xsimd::asinh, data_asinh) 81 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx7, math_approx::asinh<7>, data_asinh) 82 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx6, math_approx::asinh<6>, data_asinh) 83 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx5, math_approx::asinh<5>, data_asinh) 84 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx4, math_approx::asinh<4>, data_asinh) 85 | INV_HTRIG_SIMD_BENCH (asinh_simd_approx3, math_approx::asinh<3>, data_asinh) 86 | 87 | INV_HTRIG_SIMD_BENCH (acosh_xsimd, xsimd::acosh, data_acosh) 88 | INV_HTRIG_SIMD_BENCH (acosh_simd_approx6, math_approx::acosh<6>, data_acosh) 89 | INV_HTRIG_SIMD_BENCH (acosh_simd_approx5, math_approx::acosh<5>, data_acosh) 90 | INV_HTRIG_SIMD_BENCH (acosh_simd_approx4, math_approx::acosh<4>, data_acosh) 91 | INV_HTRIG_SIMD_BENCH (acosh_simd_approx3, math_approx::acosh<3>, data_acosh) 92 | 93 | INV_HTRIG_SIMD_BENCH (atanh_xsimd, xsimd::atanh, data_atanh) 94 | INV_HTRIG_SIMD_BENCH (atanh_simd_approx6, math_approx::atanh<6>, data_atanh) 95 | INV_HTRIG_SIMD_BENCH (atanh_simd_approx5, math_approx::atanh<5>, data_atanh) 96 | INV_HTRIG_SIMD_BENCH (atanh_simd_approx4, math_approx::atanh<4>, data_atanh) 97 | INV_HTRIG_SIMD_BENCH (atanh_simd_approx3, math_approx::atanh<3>, data_atanh) 98 | 99 | BENCHMARK_MAIN(); 100 | -------------------------------------------------------------------------------- /tools/bench/inverse_trig_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static constexpr size_t N = 1000; 5 | const auto data = [] 6 | { 7 | std::vector x; 8 | x.resize (N, 0.0f); 9 | for (size_t i = 0; i < N; ++i) 10 | x[i] = -1.0f + 2.0f * (float) i / (float) N; 11 | return x; 12 | }(); 13 | 14 | #define TRIG_BENCH(name, func) \ 15 | void name (benchmark::State& state) \ 16 | { \ 17 | for (auto _ : state) \ 18 | { \ 19 | for (auto& x : data) \ 20 | { \ 21 | auto y = func (x); \ 22 | benchmark::DoNotOptimize (y); \ 23 | } \ 24 | } \ 25 | } \ 26 | BENCHMARK (name); 27 | 28 | TRIG_BENCH (asin_std, std::asin) 29 | TRIG_BENCH (asin_approx4, math_approx::asin<4>) 30 | TRIG_BENCH (asin_approx3, math_approx::asin<3>) 31 | TRIG_BENCH (asin_approx2, math_approx::asin<2>) 32 | TRIG_BENCH (asin_approx1, math_approx::asin<1>) 33 | 34 | TRIG_BENCH (acos_std, std::acos) 35 | TRIG_BENCH (acos_approx5, math_approx::acos<5>) 36 | TRIG_BENCH (acos_approx4, math_approx::acos<4>) 37 | TRIG_BENCH (acos_approx3, math_approx::acos<3>) 38 | TRIG_BENCH (acos_approx2, math_approx::acos<2>) 39 | TRIG_BENCH (acos_approx1, math_approx::acos<1>) 40 | 41 | TRIG_BENCH (atan_std, std::atan) 42 | TRIG_BENCH (atan_approx7, math_approx::atan<7>) 43 | TRIG_BENCH (atan_approx5, math_approx::atan<5>) 44 | TRIG_BENCH (atan_approx4, math_approx::atan<4>) 45 | 46 | #define TRIG_SIMD_BENCH(name, func) \ 47 | void name (benchmark::State& state) \ 48 | { \ 49 | for (auto _ : state) \ 50 | { \ 51 | for (auto& x : data) \ 52 | { \ 53 | auto y = func (xsimd::broadcast (x)); \ 54 | static_assert (std::is_same_v, decltype(y)>); \ 55 | benchmark::DoNotOptimize (y); \ 56 | } \ 57 | } \ 58 | } \ 59 | BENCHMARK (name); 60 | 61 | TRIG_SIMD_BENCH (asin_xsimd, xsimd::asin) 62 | TRIG_SIMD_BENCH (asin_simd_approx4, math_approx::asin<4>) 63 | TRIG_SIMD_BENCH (asin_simd_approx3, math_approx::asin<3>) 64 | TRIG_SIMD_BENCH (asin_simd_approx2, math_approx::asin<2>) 65 | TRIG_SIMD_BENCH (asin_simd_approx1, math_approx::asin<1>) 66 | 67 | TRIG_SIMD_BENCH (acos_xsimd, xsimd::acos) 68 | TRIG_SIMD_BENCH (acos_simd_approx5, math_approx::acos<5>) 69 | TRIG_SIMD_BENCH (acos_simd_approx4, math_approx::acos<4>) 70 | TRIG_SIMD_BENCH (acos_simd_approx3, math_approx::acos<3>) 71 | TRIG_SIMD_BENCH (acos_simd_approx2, math_approx::acos<2>) 72 | TRIG_SIMD_BENCH (acos_simd_approx1, math_approx::acos<1>) 73 | 74 | TRIG_SIMD_BENCH (atan_xsimd, xsimd::atan) 75 | TRIG_SIMD_BENCH (atan_simd_approx7, math_approx::atan<7>) 76 | TRIG_SIMD_BENCH (atan_simd_approx5, math_approx::atan<5>) 77 | TRIG_SIMD_BENCH (atan_simd_approx4, math_approx::atan<4>) 78 | 79 | BENCHMARK_MAIN(); 80 | -------------------------------------------------------------------------------- /tools/bench/log_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static constexpr size_t N = 2000; 5 | const auto data = [] 6 | { 7 | std::vector x; 8 | x.resize (N, 0.0f); 9 | for (size_t i = 0; i < N; ++i) 10 | x[i] = -10.0f + 20.0f * (float) i / (float) N; 11 | return x; 12 | }(); 13 | 14 | #define LOG_BENCH(name, func) \ 15 | void name (benchmark::State& state) \ 16 | { \ 17 | for (auto _ : state) \ 18 | { \ 19 | for (auto& x : data) \ 20 | { \ 21 | auto y = func (x); \ 22 | benchmark::DoNotOptimize (y); \ 23 | } \ 24 | } \ 25 | } \ 26 | BENCHMARK (name); 27 | LOG_BENCH (log_std, std::log) 28 | LOG_BENCH (log_approx6, math_approx::log<6>) 29 | LOG_BENCH (log_approx5, math_approx::log<5>) 30 | LOG_BENCH (log_approx4, math_approx::log<4>) 31 | LOG_BENCH (log_approx3, math_approx::log<3>) 32 | 33 | LOG_BENCH (log2_std, std::log2) 34 | LOG_BENCH (log2_approx6, math_approx::log2<6>) 35 | LOG_BENCH (log2_approx5, math_approx::log2<5>) 36 | LOG_BENCH (log2_approx4, math_approx::log2<4>) 37 | LOG_BENCH (log2_approx3, math_approx::log2<3>) 38 | 39 | LOG_BENCH (log10_std, std::log10) 40 | LOG_BENCH (log10_approx6, math_approx::log10<6>) 41 | LOG_BENCH (log10_approx5, math_approx::log10<5>) 42 | LOG_BENCH (log10_approx4, math_approx::log10<4>) 43 | LOG_BENCH (log10_approx3, math_approx::log10<3>) 44 | 45 | #define LOG_SIMD_BENCH(name, func) \ 46 | void name (benchmark::State& state) \ 47 | { \ 48 | for (auto _ : state) \ 49 | { \ 50 | for (auto& x : data) \ 51 | { \ 52 | auto y = func (xsimd::broadcast (x)); \ 53 | static_assert (std::is_same_v, decltype(y)>); \ 54 | benchmark::DoNotOptimize (y); \ 55 | } \ 56 | } \ 57 | } \ 58 | BENCHMARK (name); 59 | LOG_SIMD_BENCH (log_xsimd, xsimd::log) 60 | LOG_SIMD_BENCH (log_simd_approx6, math_approx::log<6>) 61 | LOG_SIMD_BENCH (log_simd_approx5, math_approx::log<5>) 62 | LOG_SIMD_BENCH (log_simd_approx4, math_approx::log<4>) 63 | LOG_SIMD_BENCH (log_simd_approx3, math_approx::log<3>) 64 | 65 | LOG_SIMD_BENCH (log2_xsimd, xsimd::log2) 66 | LOG_SIMD_BENCH (log2_simd_approx6, math_approx::log2<6>) 67 | LOG_SIMD_BENCH (log2_simd_approx5, math_approx::log2<5>) 68 | LOG_SIMD_BENCH (log2_simd_approx4, math_approx::log2<4>) 69 | LOG_SIMD_BENCH (log2_simd_approx3, math_approx::log2<3>) 70 | 71 | LOG_SIMD_BENCH (log10_xsimd, xsimd::log10) 72 | LOG_SIMD_BENCH (log10_simd_approx6, math_approx::log10<6>) 73 | LOG_SIMD_BENCH (log10_simd_approx5, math_approx::log10<5>) 74 | LOG_SIMD_BENCH (log10_simd_approx4, math_approx::log10<4>) 75 | LOG_SIMD_BENCH (log10_simd_approx3, math_approx::log10<3>) 76 | 77 | BENCHMARK_MAIN(); 78 | -------------------------------------------------------------------------------- /tools/bench/polylog_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../test/src/reference/polylogarithm.hpp" 4 | 5 | static constexpr size_t N = 2000; 6 | const auto data = [] 7 | { 8 | std::vector x; 9 | x.resize (N, 0.0f); 10 | for (size_t i = 0; i < N; ++i) 11 | x[i] = -10.0f + 20.0f * (float) i / (float) N; 12 | return x; 13 | }(); 14 | 15 | #define POLYLOG_BENCH(name, func) \ 16 | void name (benchmark::State& state) \ 17 | { \ 18 | for (auto _ : state) \ 19 | { \ 20 | for (auto& x : data) \ 21 | { \ 22 | auto y = func (x); \ 23 | benchmark::DoNotOptimize (y); \ 24 | } \ 25 | } \ 26 | } \ 27 | BENCHMARK (name); 28 | POLYLOG_BENCH (li2_ref, polylogarithm::Li2) 29 | POLYLOG_BENCH (li2_approx3_log6, (math_approx::li2<3,6>)) 30 | POLYLOG_BENCH (li2_approx3, math_approx::li2<3>) 31 | POLYLOG_BENCH (li2_approx2, math_approx::li2<2>) 32 | POLYLOG_BENCH (li2_approx1, math_approx::li2<1>) 33 | 34 | #define POLYLOG_SIMD_BENCH(name, func) \ 35 | void name (benchmark::State& state) \ 36 | { \ 37 | for (auto _ : state) \ 38 | { \ 39 | for (auto& x : data) \ 40 | { \ 41 | auto y = func (xsimd::broadcast (x)); \ 42 | static_assert (std::is_same_v, decltype(y)>); \ 43 | benchmark::DoNotOptimize (y); \ 44 | } \ 45 | } \ 46 | } \ 47 | BENCHMARK (name); 48 | POLYLOG_SIMD_BENCH (li2_simd_approx3_log6, (math_approx::li2<3,6>)) 49 | POLYLOG_SIMD_BENCH (li2_simd_approx3, math_approx::li2<3>) 50 | POLYLOG_SIMD_BENCH (li2_simd_approx2, math_approx::li2<2>) 51 | POLYLOG_SIMD_BENCH (li2_simd_approx1, math_approx::li2<1>) 52 | 53 | BENCHMARK_MAIN(); 54 | -------------------------------------------------------------------------------- /tools/bench/pow_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static constexpr size_t N = 2000; 5 | const auto data = [] 6 | { 7 | std::vector x; 8 | x.resize (N, 0.0f); 9 | for (size_t i = 0; i < N; ++i) 10 | x[i] = -10.0f + 20.0f * (float) i / (float) N; 11 | return x; 12 | }(); 13 | 14 | #define POW_BENCH(name, func) \ 15 | void name (benchmark::State& state) \ 16 | { \ 17 | for (auto _ : state) \ 18 | { \ 19 | for (auto& x : data) \ 20 | { \ 21 | auto y = func (x); \ 22 | benchmark::DoNotOptimize (y); \ 23 | } \ 24 | } \ 25 | } \ 26 | BENCHMARK (name); 27 | POW_BENCH (exp_std, std::exp) 28 | POW_BENCH (exp_approx6, math_approx::exp<6>) 29 | POW_BENCH (exp_approx5, math_approx::exp<5>) 30 | POW_BENCH (exp_approx4, math_approx::exp<4>) 31 | POW_BENCH (exp_approx3, math_approx::exp<3>) 32 | 33 | POW_BENCH (exp2_std, std::exp2) 34 | POW_BENCH (exp2_approx6, math_approx::exp2<6>) 35 | POW_BENCH (exp2_approx5, math_approx::exp2<5>) 36 | POW_BENCH (exp2_approx4, math_approx::exp2<4>) 37 | POW_BENCH (exp2_approx3, math_approx::exp2<3>) 38 | 39 | float stdpow_exp10 (float x) 40 | { 41 | return std::pow (10.0f, x); 42 | } 43 | POW_BENCH (exp10_std, stdpow_exp10) 44 | POW_BENCH (exp10_approx6, math_approx::exp10<6>) 45 | POW_BENCH (exp10_approx5, math_approx::exp10<5>) 46 | POW_BENCH (exp10_approx4, math_approx::exp10<4>) 47 | POW_BENCH (exp10_approx3, math_approx::exp10<3>) 48 | 49 | #define POW_SIMD_BENCH(name, func) \ 50 | void name (benchmark::State& state) \ 51 | { \ 52 | for (auto _ : state) \ 53 | { \ 54 | for (auto& x : data) \ 55 | { \ 56 | auto y = func (xsimd::broadcast (x)); \ 57 | static_assert (std::is_same_v, decltype(y)>); \ 58 | benchmark::DoNotOptimize (y); \ 59 | } \ 60 | } \ 61 | } \ 62 | BENCHMARK (name); 63 | POW_SIMD_BENCH (exp_xsimd, xsimd::exp) 64 | POW_SIMD_BENCH (exp_simd_approx6, math_approx::exp<6>) 65 | POW_SIMD_BENCH (exp_simd_approx5, math_approx::exp<5>) 66 | POW_SIMD_BENCH (exp_simd_approx4, math_approx::exp<4>) 67 | POW_SIMD_BENCH (exp_simd_approx3, math_approx::exp<3>) 68 | 69 | POW_SIMD_BENCH (exp2_xsimd, xsimd::exp2) 70 | POW_SIMD_BENCH (exp2_simd_approx6, math_approx::exp2<6>) 71 | POW_SIMD_BENCH (exp2_simd_approx5, math_approx::exp2<5>) 72 | POW_SIMD_BENCH (exp2_simd_approx4, math_approx::exp2<4>) 73 | POW_SIMD_BENCH (exp2_simd_approx3, math_approx::exp2<3>) 74 | 75 | POW_SIMD_BENCH (exp10_xsimd, xsimd::exp10) 76 | POW_SIMD_BENCH (exp10_simd_approx6, math_approx::exp10<6>) 77 | POW_SIMD_BENCH (exp10_simd_approx5, math_approx::exp10<5>) 78 | POW_SIMD_BENCH (exp10_simd_approx4, math_approx::exp10<4>) 79 | POW_SIMD_BENCH (exp10_simd_approx3, math_approx::exp10<3>) 80 | 81 | BENCHMARK_MAIN(); 82 | -------------------------------------------------------------------------------- /tools/bench/sigmoid_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static constexpr size_t N = 2000; 5 | const auto data = [] 6 | { 7 | std::vector x; 8 | x.resize (N, 0.0f); 9 | for (size_t i = 0; i < N; ++i) 10 | x[i] = -10.0f + 20.0f * (float) i / (float) N; 11 | return x; 12 | }(); 13 | 14 | #define SIGMOID_BENCH(name, func) \ 15 | void name (benchmark::State& state) \ 16 | { \ 17 | for (auto _ : state) \ 18 | { \ 19 | for (auto& x : data) \ 20 | { \ 21 | auto y = func (x); \ 22 | benchmark::DoNotOptimize (y); \ 23 | } \ 24 | } \ 25 | } \ 26 | BENCHMARK (name); 27 | SIGMOID_BENCH (sigmoid_std, [] (auto x) { return 1.0f / (1.0f + std::exp (-x)); }) 28 | SIGMOID_BENCH (sigmoid_approx9, math_approx::sigmoid<9>) 29 | SIGMOID_BENCH (sigmoid_approx7, math_approx::sigmoid<7>) 30 | SIGMOID_BENCH (sigmoid_approx5, math_approx::sigmoid<5>) 31 | SIGMOID_BENCH (sigmoid_exp_approx6, math_approx::sigmoid_exp<6>) 32 | SIGMOID_BENCH (sigmoid_exp_approx5, math_approx::sigmoid_exp<5>) 33 | SIGMOID_BENCH (sigmoid_exp_approx4, math_approx::sigmoid_exp<4>) 34 | 35 | #define SIGMOID_SIMD_BENCH(name, func) \ 36 | void name (benchmark::State& state) \ 37 | { \ 38 | for (auto _ : state) \ 39 | { \ 40 | for (auto& x : data) \ 41 | { \ 42 | auto y = func (xsimd::broadcast (x)); \ 43 | static_assert (std::is_same_v, decltype(y)>); \ 44 | benchmark::DoNotOptimize (y); \ 45 | } \ 46 | } \ 47 | } \ 48 | BENCHMARK (name); 49 | SIGMOID_SIMD_BENCH (sigmoid_xsimd, [] (auto x) { return 1.0f / (1.0f + xsimd::exp (-x)); }) 50 | SIGMOID_SIMD_BENCH (sigmoid_simd_approx9, math_approx::tanh<9>) 51 | SIGMOID_SIMD_BENCH (sigmoid_simd_approx7, math_approx::tanh<7>) 52 | SIGMOID_SIMD_BENCH (sigmoid_simd_approx5, math_approx::tanh<5>) 53 | SIGMOID_SIMD_BENCH (sigmoid_exp_simd_approx6, math_approx::sigmoid_exp<6>) 54 | SIGMOID_SIMD_BENCH (sigmoid_exp_simd_approx5, math_approx::sigmoid_exp<5>) 55 | SIGMOID_SIMD_BENCH (sigmoid_exp_simd_approx4, math_approx::sigmoid_exp<4>) 56 | 57 | BENCHMARK_MAIN(); 58 | -------------------------------------------------------------------------------- /tools/bench/trig_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static constexpr size_t N = 2000; 5 | const auto data = [] 6 | { 7 | std::vector x; 8 | x.resize (N, 0.0f); 9 | for (size_t i = 0; i < N; ++i) 10 | x[i] = -10.0f + 20.0f * (float) i / (float) N; 11 | return x; 12 | }(); 13 | 14 | #define TRIG_BENCH(name, func) \ 15 | void name (benchmark::State& state) \ 16 | { \ 17 | for (auto _ : state) \ 18 | { \ 19 | for (auto& x : data) \ 20 | { \ 21 | auto y = func (x); \ 22 | benchmark::DoNotOptimize (y); \ 23 | } \ 24 | } \ 25 | } \ 26 | BENCHMARK (name); 27 | 28 | TRIG_BENCH (cos_std, std::cos) 29 | TRIG_BENCH (cos_approx9, math_approx::cos<9>) 30 | TRIG_BENCH (cos_approx7, math_approx::cos<7>) 31 | TRIG_BENCH (cos_approx5, math_approx::cos<5>) 32 | 33 | TRIG_BENCH (sin_std, std::sin) 34 | TRIG_BENCH (sin_approx9, math_approx::sin<9>) 35 | TRIG_BENCH (sin_approx7, math_approx::sin<7>) 36 | TRIG_BENCH (sin_approx5, math_approx::sin<5>) 37 | 38 | TRIG_BENCH (tan_std, std::tan) 39 | TRIG_BENCH (tan_approx13, math_approx::tan<13>) 40 | TRIG_BENCH (tan_approx11, math_approx::tan<11>) 41 | TRIG_BENCH (tan_approx9, math_approx::tan<9>) 42 | TRIG_BENCH (tan_approx7, math_approx::tan<7>) 43 | TRIG_BENCH (tan_approx5, math_approx::tan<5>) 44 | TRIG_BENCH (tan_approx3, math_approx::tan<3>) 45 | 46 | #define TRIG_SIMD_BENCH(name, func) \ 47 | void name (benchmark::State& state) \ 48 | { \ 49 | for (auto _ : state) \ 50 | { \ 51 | for (auto& x : data) \ 52 | { \ 53 | auto y = func (xsimd::broadcast (x)); \ 54 | static_assert (std::is_same_v, decltype(y)>); \ 55 | benchmark::DoNotOptimize (y); \ 56 | } \ 57 | } \ 58 | } \ 59 | BENCHMARK (name); 60 | 61 | TRIG_SIMD_BENCH (sin_xsimd, xsimd::sin) 62 | TRIG_SIMD_BENCH (sin_simd_approx9, math_approx::sin<9>) 63 | TRIG_SIMD_BENCH (sin_simd_approx7, math_approx::sin<7>) 64 | TRIG_SIMD_BENCH (sin_simd_approx5, math_approx::sin<5>) 65 | 66 | TRIG_SIMD_BENCH (cos_xsimd, xsimd::cos) 67 | TRIG_SIMD_BENCH (cos_simd_approx9, math_approx::cos<9>) 68 | TRIG_SIMD_BENCH (cos_simd_approx7, math_approx::cos<7>) 69 | TRIG_SIMD_BENCH (cos_simd_approx5, math_approx::cos<5>) 70 | 71 | TRIG_SIMD_BENCH (tan_xsimd, xsimd::tan) 72 | TRIG_SIMD_BENCH (tan_simd_approx13, math_approx::tan<13>) 73 | TRIG_SIMD_BENCH (tan_simd_approx11, math_approx::tan<11>) 74 | TRIG_SIMD_BENCH (tan_simd_approx9, math_approx::tan<9>) 75 | TRIG_SIMD_BENCH (tan_simd_approx7, math_approx::tan<7>) 76 | TRIG_SIMD_BENCH (tan_simd_approx5, math_approx::tan<5>) 77 | TRIG_SIMD_BENCH (tan_simd_approx3, math_approx::tan<3>) 78 | 79 | BENCHMARK_MAIN(); 80 | -------------------------------------------------------------------------------- /tools/bench/trig_turns_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../test/src/reference/sincospi.hpp" 4 | 5 | static constexpr size_t N = 2000; 6 | const auto data = [] 7 | { 8 | std::vector x; 9 | x.resize (N, 0.0f); 10 | for (size_t i = 0; i < N; ++i) 11 | x[i] = -10.0f + 20.0f * (float) i / (float) N; 12 | return x; 13 | }(); 14 | 15 | #define TRIG_BENCH(name, func) \ 16 | void name (benchmark::State& state) \ 17 | { \ 18 | for (auto _ : state) \ 19 | { \ 20 | for (auto& x : data) \ 21 | { \ 22 | auto y = func (x); \ 23 | benchmark::DoNotOptimize (y); \ 24 | } \ 25 | } \ 26 | } \ 27 | BENCHMARK (name); 28 | 29 | TRIG_BENCH (cos_std, std::cos) 30 | TRIG_BENCH (cos_ref, sincospi::cos2pi) 31 | TRIG_BENCH (cos_turns_approx11, math_approx::cos_turns<11>) 32 | TRIG_BENCH (cos_turns_approx9, math_approx::cos_turns<9>) 33 | TRIG_BENCH (cos_turns_approx7, math_approx::cos_turns<7>) 34 | TRIG_BENCH (cos_turns_approx5, math_approx::cos_turns<5>) 35 | 36 | TRIG_BENCH (sin_std, std::sin) 37 | TRIG_BENCH (sin_turns_ref, sincospi::sin2pi) 38 | TRIG_BENCH (sin_turns_approx11, math_approx::sin_turns<11>) 39 | TRIG_BENCH (sin_turns_approx9, math_approx::sin_turns<9>) 40 | TRIG_BENCH (sin_turns_approx7, math_approx::sin_turns<7>) 41 | TRIG_BENCH (sin_turns_approx5, math_approx::sin_turns<5>) 42 | 43 | #define TRIG_SIMD_BENCH(name, func) \ 44 | void name (benchmark::State& state) \ 45 | { \ 46 | for (auto _ : state) \ 47 | { \ 48 | for (auto& x : data) \ 49 | { \ 50 | auto y = func (xsimd::broadcast (x)); \ 51 | static_assert (std::is_same_v, decltype(y)>); \ 52 | benchmark::DoNotOptimize (y); \ 53 | } \ 54 | } \ 55 | } \ 56 | BENCHMARK (name); 57 | 58 | TRIG_SIMD_BENCH (sin_xsimd, xsimd::sin) 59 | TRIG_SIMD_BENCH (sin_turns_simd_approx11, math_approx::sin_turns<11>) 60 | TRIG_SIMD_BENCH (sin_turns_simd_approx9, math_approx::sin_turns<9>) 61 | TRIG_SIMD_BENCH (sin_turns_simd_approx7, math_approx::sin_turns<7>) 62 | TRIG_SIMD_BENCH (sin_turns_simd_approx5, math_approx::sin_turns<5>) 63 | 64 | TRIG_SIMD_BENCH (cos_xsimd, xsimd::cos) 65 | TRIG_SIMD_BENCH (cos_turns_simd_approx11, math_approx::cos_turns<11>) 66 | TRIG_SIMD_BENCH (cos_turns_simd_approx9, math_approx::cos_turns<9>) 67 | TRIG_SIMD_BENCH (cos_turns_simd_approx7, math_approx::cos_turns<7>) 68 | TRIG_SIMD_BENCH (cos_turns_simd_approx5, math_approx::cos_turns<5>) 69 | 70 | BENCHMARK_MAIN(); 71 | -------------------------------------------------------------------------------- /tools/bench/wright_omega_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "../../test/src/reference/toms917.hpp" 5 | 6 | static constexpr size_t N = 2000; 7 | const auto data = [] 8 | { 9 | std::vector x; 10 | x.resize (N, 0.0f); 11 | for (size_t i = 0; i < N; ++i) 12 | x[i] = -10.0f + 40.0f * (float) i / (float) N; 13 | return x; 14 | }(); 15 | 16 | #define WO_BENCH(name, func) \ 17 | void name (benchmark::State& state) \ 18 | { \ 19 | for (auto _ : state) \ 20 | { \ 21 | for (auto& x : data) \ 22 | { \ 23 | auto y = func (x); \ 24 | benchmark::DoNotOptimize (y); \ 25 | } \ 26 | } \ 27 | } \ 28 | BENCHMARK (name); 29 | WO_BENCH (wright_omega_toms917, toms917::wrightomega) 30 | WO_BENCH (wright_omega_iter3_poly3_logexp5, (math_approx::wright_omega<3, 3, 5>)) 31 | WO_BENCH (wright_omega_iter3_poly3, (math_approx::wright_omega<3, 3>)) 32 | WO_BENCH (wright_omega_iter2_poly5, (math_approx::wright_omega<2, 5>)) 33 | WO_BENCH (wright_omega_iter2_poly3, (math_approx::wright_omega<2, 3>)) 34 | WO_BENCH (wright_omega_iter2_poly3_logexp3, (math_approx::wright_omega<2, 3, 3>)) 35 | WO_BENCH (wright_omega_iter1_poly5, (math_approx::wright_omega<1, 5>)) 36 | WO_BENCH (wright_omega_iter1_poly3, (math_approx::wright_omega<1, 3>)) 37 | WO_BENCH (wright_omega_iter0_poly5, (math_approx::wright_omega<0, 5>)) 38 | WO_BENCH (wright_omega_iter0_poly3, (math_approx::wright_omega<0, 3>)) 39 | WO_BENCH (wright_omega_dangelo2, (math_approx::wright_omega_dangelo<2>)) 40 | WO_BENCH (wright_omega_dangelo1, (math_approx::wright_omega_dangelo<1>)) 41 | WO_BENCH (wright_omega_dangelo0, (math_approx::wright_omega_dangelo<0>)) 42 | 43 | #define WO_SIMD_BENCH(name, func) \ 44 | void name (benchmark::State& state) \ 45 | { \ 46 | for (auto _ : state) \ 47 | { \ 48 | for (auto& x : data) \ 49 | { \ 50 | auto y = func (xsimd::broadcast (x)); \ 51 | static_assert (std::is_same_v, decltype(y)>); \ 52 | benchmark::DoNotOptimize (y); \ 53 | } \ 54 | } \ 55 | } \ 56 | BENCHMARK (name); 57 | WO_SIMD_BENCH (wright_omega_simd_iter3_poly3_logexp5, (math_approx::wright_omega<3, 3, 5>)) 58 | WO_SIMD_BENCH (wright_omega_simd_iter3_poly3, (math_approx::wright_omega<3, 3>)) 59 | WO_SIMD_BENCH (wright_omega_simd_iter2_poly5, (math_approx::wright_omega<2, 5>)) 60 | WO_SIMD_BENCH (wright_omega_simd_iter2_poly3, (math_approx::wright_omega<2, 3>)) 61 | WO_SIMD_BENCH (wright_omega_simd_iter2_poly3_logexp3, (math_approx::wright_omega<2, 3, 3>)) 62 | WO_SIMD_BENCH (wright_omega_simd_iter1_poly5, (math_approx::wright_omega<1, 5>)) 63 | WO_SIMD_BENCH (wright_omega_simd_iter1_poly3, (math_approx::wright_omega<1, 3>)) 64 | WO_SIMD_BENCH (wright_omega_simd_iter0_poly5, (math_approx::wright_omega<0, 5>)) 65 | WO_SIMD_BENCH (wright_omega_simd_iter0_poly3, (math_approx::wright_omega<0, 3>)) 66 | 67 | BENCHMARK_MAIN(); 68 | -------------------------------------------------------------------------------- /tools/plotter/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if($ENV{CI}) 2 | message(STATUS "Configuring in CI, skipping plotter tool...") 3 | return() 4 | endif() 5 | 6 | CPMAddPackage( 7 | NAME matplotlib-cpp 8 | GIT_REPOSITORY https://github.com/jatinchowdhury18/matplotlib-cpp 9 | GIT_TAG main 10 | ) 11 | 12 | add_executable(math_approx_plotter plotter.cpp) 13 | target_link_libraries(math_approx_plotter PRIVATE matplotlib-cpp math_approx) 14 | target_compile_features(math_approx_plotter PUBLIC cxx_std_20) 15 | target_compile_definitions(math_approx_plotter PUBLIC _USE_MATH_DEFINES=1) 16 | -------------------------------------------------------------------------------- /tools/plotter/plotter.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | namespace plt = matplotlibcpp; 8 | 9 | #include "../../test/src/reference/polylogarithm.hpp" 10 | #include "../../test/src/reference/toms917.hpp" 11 | #include "../../test/src/reference/sincospi.hpp" 12 | #include "../../test/src/test_helpers.hpp" 13 | #include 14 | 15 | template 16 | void plot_error (std::span all_floats, 17 | std::span y_exact, 18 | F_Approx&& f_approx, 19 | const std::string& name) 20 | { 21 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 22 | const auto error = test_helpers::compute_error (y_exact, y_approx); 23 | std::cout << "Max Error: " << test_helpers::abs_max (error) << std::endl; 24 | plt::named_plot (name, all_floats, error); 25 | } 26 | 27 | template 28 | void plot_rel_error (std::span all_floats, 29 | std::span y_exact, 30 | F_Approx&& f_approx, 31 | const std::string& name) 32 | { 33 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 34 | const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); 35 | std::cout << "Max Relative Error: " << test_helpers::abs_max (rel_error) << std::endl; 36 | plt::named_plot (name, all_floats, rel_error); 37 | } 38 | 39 | template 40 | void plot_ulp_error (std::span all_floats, 41 | std::span y_exact, 42 | F_Approx&& f_approx, 43 | const std::string& name) 44 | { 45 | const auto y_approx = test_helpers::compute_all (all_floats, f_approx); 46 | const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); 47 | std::cout << "Max Relative Error: " << *std::max_element (ulp_error.begin(), ulp_error.end()) << std::endl; 48 | plt::named_plot (name, all_floats, std::vector { ulp_error.begin(), ulp_error.end() }); 49 | } 50 | 51 | template 52 | void plot_function (std::span all_floats, 53 | F&& f, 54 | const std::string& name) 55 | { 56 | const auto y_approx = test_helpers::compute_all (all_floats, f); 57 | plt::named_plot (name, all_floats, y_approx); 58 | } 59 | 60 | template 61 | T sigmoid_ref (T x) 62 | { 63 | return (T) 1 / ((T) 1 + std::exp (-x)); 64 | } 65 | 66 | template 67 | T std_sin_turns (T x) 68 | { 69 | return std::sin ((T) 2 * (T) M_PI * x); 70 | } 71 | 72 | #define FLOAT_FUNC(func) [] (float x) { return func (x); } 73 | 74 | int main() 75 | { 76 | plt::figure(); 77 | const auto range = std::make_pair (-0.5f, 0.5f); 78 | static constexpr auto tol = 1.0e-3f; 79 | 80 | const auto all_floats = test_helpers::all_32_bit_floats (range.first, range.second, tol); 81 | const auto y_exact = test_helpers::compute_all (all_floats, FLOAT_FUNC (sincospi::cos2pi)); 82 | // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::sin_turns<5>) ), "sint-5"); 83 | // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::sin_turns<7>) ), "sint-7"); 84 | // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::sin_turns<9>) ), "sint-9"); 85 | plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::cos_turns<11>) ), "cost-11"); 86 | 87 | plt::legend ({ { "loc", "upper right" } }); 88 | plt::xlim (range.first, range.second); 89 | plt::grid (true); 90 | plt::show(); 91 | 92 | return 0; 93 | } 94 | --------------------------------------------------------------------------------