├── .clang-format
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── extern
    └── amd-libm
    │   ├── LICENSE
    │   └── lib
    │       └── libalm.so
├── include
    ├── sf_benchmarks.hpp
    ├── sf_libraries.hpp
    └── sf_utils.hpp
├── misc
    ├── join_for_readme.sql
    └── sf_benchmarks.sql
└── src
    ├── bessel.f
    ├── bind_af.cpp
    ├── bind_amdlibm.cpp
    ├── bind_baobzi.cpp
    ├── bind_boost.cpp
    ├── bind_eigen.cpp
    ├── bind_fort.cpp
    ├── bind_gsl.cpp
    ├── bind_misc.cpp
    ├── bind_sctl.cpp
    ├── bind_sleef.cpp
    ├── bind_stl.cpp
    ├── hank103.f
    ├── hank106.f
    ├── main.cpp
    └── utils.cpp


/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language:        Cpp
3 | BasedOnStyle:  LLVM
4 | TabWidth:        4
5 | ColumnLimit:    120
6 | IndentWidth: 4
7 | AlwaysBreakTemplateDeclarations: true
8 | ...
9 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "extern/sleef"]
 2 | 	path = extern/sleef
 3 | 	url = https://github.com/shibatch/sleef
 4 | [submodule "extern/baobzi"]
 5 | 	path = extern/baobzi
 6 | 	url = https://github.com/flatironinstitute/baobzi
 7 | [submodule "extern/SCTL"]
 8 | 	path = extern/SCTL
 9 | 	url = https://github.com/dmalhotra/SCTL.git
10 | [submodule "extern/eigen"]
11 | 	path = extern/eigen
12 | 	url = https://gitlab.com/libeigen/eigen.git
13 | [submodule "extern/toml11"]
14 | 	path = extern/toml11
15 | 	url = https://github.com/ToruNiina/toml11
16 | [submodule "extern/vectorclass2"]
17 | 	path = extern/vectorclass2
18 | 	url = https://github.com/vectorclass/version2
19 | [submodule "extern/sqlite_orm"]
20 | 	path = extern/sqlite_orm
21 | 	url = https://github.com/fnc12/sqlite_orm
22 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(
 3 |   sf_benchmarks
 4 |   LANGUAGES C CXX Fortran
 5 |   )
 6 | include(ExternalProject)
 7 | set(CMAKE_CXX_STANDARD 17)
 8 | 
 9 | find_package(GSL REQUIRED)
10 | find_package(Boost)
11 | find_package(SQLite3)
12 | 
13 | set (default_build_type "Release")
14 | if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
15 |   message (STATUS "No build type specified. Setting build type to Release.")
16 |   set (CMAKE_BUILD_TYPE "Release" CACHE STRING "Valid options: Debug, RelWithDebInfo, Release" FORCE)
17 | endif()
18 | 
19 | set(
20 |   SF_INCLUDES
21 |   ${PROJECT_SOURCE_DIR}/include
22 | )
23 | 
24 | ExternalProject_Add(
25 |   libsleef
26 |   SOURCE_DIR ${PROJECT_SOURCE_DIR}/extern/sleef
27 |   CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/contrib
28 | )
29 | 
30 | ExternalProject_Add(libbaobzi
31 |   SOURCE_DIR ${PROJECT_SOURCE_DIR}/extern/baobzi
32 |   CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/contrib -DBAOBZI_BUILD_TESTS=OFF
33 |              -DBAOBZI_BUILD_SHARED=OFF -DBAOBZI_BUILD_EXAMPLES=OFF -DBAOBZI_BUILD_STATIC=ON
34 | )
35 | 
36 | include_directories(
37 |   ${CMAKE_BINARY_DIR}/contrib/include
38 |   ${PROJECT_SOURCE_DIR}/extern/baobzi/extern/msgpack-c/include
39 |   ${PROJECT_SOURCE_DIR}/extern/SCTL/include
40 |   ${PROJECT_SOURCE_DIR}/extern/eigen
41 |   ${PROJECT_SOURCE_DIR}/extern/toml11
42 |   ${PROJECT_SOURCE_DIR}/extern/vectorclass2
43 |   ${PROJECT_SOURCE_DIR}/extern/sqlite_orm/include
44 | )
45 | link_directories(${CMAKE_BINARY_DIR}/contrib/lib64 ${PROJECT_SOURCE_DIR}/extern/amd-libm/lib)
46 | 
47 | file(GLOB SF_SOURCES "src/*.cpp" "src/*.f")
48 | add_executable(sf_benchmarks ${SF_SOURCES})
49 | target_include_directories(sf_benchmarks PRIVATE ${SF_INCLUDES} ${GSL_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
50 | target_link_libraries(sf_benchmarks sleef GSL::gsl baobzi dl SQLite::SQLite3)
51 | add_dependencies(sf_benchmarks libsleef libbaobzi)
52 | target_compile_options(sf_benchmarks PRIVATE -march=native -ftree-loop-vectorize -ffast-math -DSCTL_HAVE_LIBMVEC
53 |   $<$<COMPILE_LANGUAGE:Fortran>:-fallow-argument-mismatch>)
54 | 
55 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2022 Robert Blackwell
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/extern/amd-libm/LICENSE:
--------------------------------------------------------------------------------
 1 |  Copyright (C) 2008-2021 Advanced Micro Devices, Inc. All rights reserved.
 2 | 
 3 |  Redistribution and use in source and binary forms, with or without modification,
 4 |  are permitted provided that the following conditions are met:
 5 |  1. Redistributions of source code must retain the above copyright notice,
 6 |     this list of conditions and the following disclaimer.
 7 |  2. Redistributions in binary form must reproduce the above copyright notice,
 8 |     this list of conditions and the following disclaimer in the documentation
 9 |     and/or other materials provided with the distribution.
10 |  3. Neither the name of the copyright holder nor the names of its contributors
11 |     may be used to endorse or promote products derived from this software without
12 |     specific prior written permission.
13 | 
14 |  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 |  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 |  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 |  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
18 |  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19 |  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
20 |  OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21 |  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 |  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
23 |  POSSIBILITY OF SUCH DAMAGE.
24 | 
25 | 


--------------------------------------------------------------------------------
/extern/amd-libm/lib/libalm.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flatironinstitute/sf_benchmarks/02aea37608d5acef56330e4d1be972784e69b065/extern/amd-libm/lib/libalm.so


--------------------------------------------------------------------------------
/include/sf_benchmarks.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SF_BENCHMARKS_HPP
 2 | #define SF_BENCHMARKS_HPP
 3 | 
 4 | #include <complex>
 5 | #include <functional>
 6 | #include <sctl.hpp>
 7 | #include <sys/cdefs.h>
 8 | #include <vectorclass.h>
 9 | 
10 | // Attempt to force non-aliased pointers actually seems to slow things down...
11 | //#define RESTRICT __restrict
12 | #define RESTRICT
13 | 
14 | typedef std::complex<double> cdouble;
15 | typedef sctl::Vec<double, 4> sctl_dx4;
16 | typedef sctl::Vec<double, 8> sctl_dx8;
17 | 
18 | typedef sctl::Vec<float, 8> sctl_fx8;
19 | typedef sctl::Vec<float, 16> sctl_fx16;
20 | 
21 | typedef std::function<std::pair<cdouble, cdouble>(cdouble)> fun_cdx1_x2;
22 | 
23 | template <class VAL_T>
24 | using multi_eval_func = std::function<void(const VAL_T *, VAL_T *, size_t)>;
25 | 
26 | template <class VAL_T, int VecLen, class F>
27 | std::function<void(const VAL_T *RESTRICT, VAL_T *RESTRICT, size_t)> sctl_apply(const F &f) {
28 |     static const auto fn = [f](const VAL_T *RESTRICT vals, VAL_T *RESTRICT res, size_t N) {
29 |         using Vec = sctl::Vec<VAL_T, VecLen>;
30 |         for (size_t i = 0; i < N; i += VecLen) {
31 |             f(Vec::LoadAligned(vals + i)).StoreAligned(res + i);
32 |         }
33 |     };
34 |     return fn;
35 | }
36 | 
37 | template <class VEC_T, class VAL_T, class F>
38 | std::function<void(const VAL_T *RESTRICT, VAL_T *RESTRICT, size_t)> vec_func_apply(const F &f) {
39 |     static const auto fn = [f](const VAL_T *RESTRICT vals, VAL_T *RESTRICT res, size_t N) {
40 |         for (size_t i = 0; i < N; i += VEC_T::size()) {
41 |             f(VEC_T().load_a(vals + i)).store_a(res + i);
42 |         }
43 |     };
44 |     return fn;
45 | }
46 | 
47 | template <class VAL_T, class F>
48 | std::function<void(const VAL_T *RESTRICT, VAL_T *RESTRICT, size_t)> scalar_func_apply(const F &f) {
49 |     static const auto fn = [f](const VAL_T *RESTRICT vals, VAL_T *RESTRICT res, size_t N) {
50 |         for (size_t i = 0; i < N; i += 1) {
51 |             res[i] = f(vals[i]);
52 |         }
53 |     };
54 |     return fn;
55 | }
56 | 
57 | struct configuration_t {
58 |     int id;
59 |     std::string func;
60 |     std::string ftype;
61 |     double lbound = 0.0;
62 |     double ubound = 1.0;
63 |     double ilbound = 0.0;
64 |     double iubound = 0.0;
65 | };
66 | 
67 | #undef RESTRICT
68 | #endif
69 | 


--------------------------------------------------------------------------------
/include/sf_libraries.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef SF_LIBRARIES_HPP
  2 | #define SF_LIBRARIES_HPP
  3 | 
  4 | #include <sf_benchmarks.hpp>
  5 | 
  6 | #include <string>
  7 | #include <unordered_map>
  8 | 
  9 | #include <Eigen/Core>
 10 | #include <baobzi.hpp>
 11 | #include <boost/math/special_functions.hpp>
 12 | #include <gsl/gsl_sf.h>
 13 | #include <sctl.hpp>
 14 | #include <sleef.h>
 15 | #include <unsupported/Eigen/SpecialFunctions>
 16 | #include <vectorclass.h>
 17 | #include <vectormath_exp.h>
 18 | #include <vectormath_hyp.h>
 19 | #include <vectormath_trig.h>
 20 | 
 21 | #include <boost/version.hpp>
 22 | #include <gnu/libc-version.h>
 23 | #include <gsl/gsl_version.h>
 24 | 
 25 | extern "C" {
 26 | void hank103_(double _Complex *, double _Complex *, double _Complex *, int *);
 27 | void fort_bessel_jn_(int *, double *, double *);
 28 | void fort_bessel_yn_(int *, double *, double *);
 29 | }
 30 | 
 31 | namespace sf::functions {
 32 | namespace af {
 33 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx8();
 34 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx4();
 35 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx16();
 36 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx8();
 37 | } // namespace af
 38 | 
 39 | namespace amd {
 40 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx1();
 41 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx8();
 42 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1();
 43 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx4();
 44 | } // namespace amd
 45 | 
 46 | namespace baobzi {
 47 | std::unordered_map<std::string, std::shared_ptr<::baobzi::Baobzi>> &
 48 | get_funs_dx1(std::set<std::string> &keys_to_eval, std::unordered_map<std::string, configuration_t> &configs);
 49 | }
 50 | 
 51 | namespace boost {
 52 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx1();
 53 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1();
 54 | } // namespace boost
 55 | 
 56 | // https://eigen.tuxfamily.org/dox/group__CoeffwiseMathFunctions.html
 57 | namespace eigen {
 58 | enum OPS {
 59 |     cos,
 60 |     sin,
 61 |     tan,
 62 |     cosh,
 63 |     sinh,
 64 |     tanh,
 65 |     exp,
 66 |     log,
 67 |     log10,
 68 |     pow35,
 69 |     pow13,
 70 |     asin,
 71 |     acos,
 72 |     atan,
 73 |     asinh,
 74 |     acosh,
 75 |     atanh,
 76 |     erf,
 77 |     erfc,
 78 |     lgamma,
 79 |     digamma,
 80 |     ndtri,
 81 |     sqrt,
 82 |     rsqrt
 83 | };
 84 | 
 85 | std::unordered_map<std::string, OPS> &get_funs();
 86 | } // namespace eigen
 87 | 
 88 | namespace fort {
 89 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1();
 90 | } // namespace fort
 91 | 
 92 | namespace gsl {
 93 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1();
 94 | std::unordered_map<std::string, multi_eval_func<cdouble>> &get_funs_cdx1();
 95 | } // namespace gsl
 96 | 
 97 | namespace misc {
 98 | std::unordered_map<std::string, fun_cdx1_x2> &get_funs_cdx1_x2();
 99 | } // namespace misc
100 | 
101 | namespace SCTL {
102 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx8();
103 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx4();
104 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx16();
105 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx8();
106 | } // namespace SCTL
107 | 
108 | namespace sleef {
109 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx1();
110 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1();
111 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx8();
112 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx4();
113 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx16();
114 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx8();
115 | } // namespace sleef
116 | 
117 | namespace stl {
118 | std::unordered_map<::std::string, multi_eval_func<float>> &get_funs_fx1();
119 | std::unordered_map<::std::string, multi_eval_func<double>> &get_funs_dx1();
120 | } // namespace stl
121 | 
122 | } // namespace sf::functions
123 | 
124 | #endif
125 | 


--------------------------------------------------------------------------------
/include/sf_utils.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SF_UTILS_HPP
 2 | #define SF_UTILS_HPP
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <ctime>
 6 | #include <string>
 7 | 
 8 | #include <x86intrin.h>
 9 | 
10 | namespace sf::utils {
11 | 
12 | struct toolchain_info_t {
13 |     int id;
14 |     std::string compiler;
15 |     std::string compilervers;
16 |     std::string libcvers;
17 | 
18 |     toolchain_info_t();
19 | };
20 | 
21 | struct host_info_t {
22 |     int id;
23 |     std::string cpuname;
24 |     std::string cpuclock;
25 |     std::string cpuclockmax;
26 |     std::string memclock;
27 |     std::string L1d;
28 |     std::string L1i;
29 |     std::string L2;
30 |     std::string L3;
31 | 
32 |     host_info_t();
33 | };
34 | 
35 | struct library_info_t {
36 |     int id;
37 |     std::string name;
38 |     std::string version;
39 | };
40 | 
41 | struct timer {
42 |     struct timespec ts;
43 |     struct timespec tf;
44 | 
45 |     unsigned long long tscs;
46 |     unsigned long long tscf;
47 | 
48 |     timer() { start(); }
49 |     void start() { clock_gettime(CLOCK_MONOTONIC, &ts); tscs = __rdtsc(); }
50 |     void stop() { clock_gettime(CLOCK_MONOTONIC, &tf); tscf = __rdtsc(); }
51 |     double elapsed() { return (tf.tv_sec - ts.tv_sec) + (tf.tv_nsec - ts.tv_nsec) * 1E-9; }
52 |     unsigned long long ticks_elapsed() { return tscf - tscs; }
53 | };
54 | 
55 | std::string exec(const char *cmd);
56 | std::string get_alm_version();
57 | std::string get_sleef_version();
58 | std::string get_af_version();
59 | std::string get_boost_version();
60 | std::string get_gsl_version();
61 | std::string get_sctl_version();
62 | std::string get_baobzi_version();
63 | std::string get_eigen_version();
64 | 
65 | template <typename VAL_T>
66 | Eigen::VectorX<VAL_T> transform_domain(const Eigen::Ref<const Eigen::VectorX<VAL_T>> &vals, double lower, double upper) {
67 |     VAL_T delta = upper - lower;
68 |     return vals.array() * delta + lower;
69 | }
70 | 
71 | } // namespace sf::utils
72 | 
73 | #endif
74 | 


--------------------------------------------------------------------------------
/misc/join_for_readme.sql:
--------------------------------------------------------------------------------
 1 | .mode html
 2 | .headers on
 3 | 
 4 | SELECT
 5 |     configurations.func,
 6 |     libraries.name,
 7 |     configurations.ftype,
 8 |     measurements.nelem,
 9 |     measurements.veclev,
10 |     ROUND(configurations.lbound, 2),
11 |     ROUND(configurations.ubound, 2),
12 |     ROUND(measurements.megaevalspersec, 1),
13 |     ROUND(measurements.cyclespereval, 1)
14 | FROM
15 |     configurations
16 | JOIN measurements
17 | ON   configurations.id=measurements.configuration
18 | JOIN libraries
19 | ON   libraries.id=measurements.library
20 | WHERE
21 |      (measurements.nelem=1024 OR measurements.nrepeat=1) AND
22 |      measurements.run=(SELECT MIN(id) FROM runs)
23 | ORDER BY configurations.func, configurations.ftype, measurements.nelem, measurements.megaevalspersec DESC;
24 | 


--------------------------------------------------------------------------------
/misc/sf_benchmarks.sql:
--------------------------------------------------------------------------------
 1 | create table hosts (
 2 |   id integer primary key autoincrement,
 3 |   cpuname text not null unique,
 4 |   cpuclock text null,
 5 |   cpuclockmax text null,
 6 |   memclock text null,
 7 |   l1dcache text null,
 8 |   l1icache text null,
 9 |   l2cache text null,
10 |   l3cache text null
11 | );
12 | 
13 | create table libraries (
14 |   id integer primary key autoincrement,
15 |   name text,
16 |   version text,
17 |   unique(name, version)
18 | );
19 | 
20 | create table toolchains (
21 |   id integer primary key autoincrement,
22 |   compiler text,
23 |   compilervers text,
24 |   libcvers text,
25 |   unique(compiler, compilervers, libcvers)
26 | );
27 | 
28 | create table configurations (
29 |   id integer primary key autoincrement,
30 |   func text not null,
31 |   ftype text not null,
32 |   lbound real not null,
33 |   ubound real not null,
34 |   ilbound real null,
35 |   iubound real null,
36 |   unique(func, ftype, nelem, nrep, vectlev, lbound, ubound, ilbound, iubound)
37 | );
38 | 
39 | create table runs (
40 |   id integer primary key autoincrement,
41 |   time timestamp not null default current_timestamp,
42 |   host integer not null references hosts,
43 |   toolchain integer not null references toolchains
44 | );
45 | 
46 | create table measurements (
47 |   id integer primary key autoincrement,
48 |   run integer references runs,
49 |   library integer not null references libraries,
50 |   configuration integer not null references configurations,
51 |   nelem integer not null,
52 |   nrepeat integer not null,
53 |   vectlev integer not null,
54 |   evalspersec real not null,
55 |   meanevaltime real not null,
56 |   stddev real not null,
57 |   istddev real not null,
58 |   maxerr real,
59 |   imaxerr
60 | );
61 | 


--------------------------------------------------------------------------------
/src/bessel.f:
--------------------------------------------------------------------------------
 1 |       subroutine fort_bessel_jn(n, x, y)
 2 |       INTEGER*4 n
 3 |       REAL*8 x,y
 4 |       y = BESSEL_JN(n, x)
 5 |       end subroutine
 6 | 
 7 |       subroutine fort_bessel_yn(n, x, y)
 8 |       INTEGER*4 n
 9 |       REAL*8 x,y
10 |       y = BESSEL_YN(n, x)
11 |       end subroutine
12 | 


--------------------------------------------------------------------------------
/src/bind_af.cpp:
--------------------------------------------------------------------------------
  1 | #include <sf_libraries.hpp>
  2 | 
  3 | namespace sf::functions::af {
  4 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx8 = {
  5 |     {"sqrt", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return sqrt(x); })},
  6 |     {"sin", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return sin(x); })},
  7 |     {"cos", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return cos(x); })},
  8 |     {"tan", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return tan(x); })},
  9 |     {"sinh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return sinh(x); })},
 10 |     {"cosh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return cosh(x); })},
 11 |     {"tanh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return tanh(x); })},
 12 |     {"asinh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return asinh(x); })},
 13 |     {"acosh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return acosh(x); })},
 14 |     {"atanh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return atanh(x); })},
 15 |     {"asin", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return asin(x); })},
 16 |     {"acos", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return acos(x); })},
 17 |     {"atan", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return atan(x); })},
 18 |     {"exp", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return exp(x); })},
 19 |     {"exp2", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return exp2(x); })},
 20 |     {"exp10", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return exp10(x); })},
 21 |     {"log", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return log(x); })},
 22 |     {"log2", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return log2(x); })},
 23 |     {"log10", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return log10(x); })},
 24 |     {"pow3.5", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return pow(x, 3.5); })},
 25 |     {"pow13", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return pow_const(x, 13); })},
 26 | };
 27 | 
 28 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx4 = {
 29 |     {"sqrt", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return sqrt(x); })},
 30 |     {"sin", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return sin(x); })},
 31 |     {"cos", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return cos(x); })},
 32 |     {"tan", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return tan(x); })},
 33 |     {"sinh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return sinh(x); })},
 34 |     {"cosh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return cosh(x); })},
 35 |     {"tanh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return tanh(x); })},
 36 |     {"asinh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return asinh(x); })},
 37 |     {"acosh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return acosh(x); })},
 38 |     {"atanh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return atanh(x); })},
 39 |     {"asin", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return asin(x); })},
 40 |     {"acos", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return acos(x); })},
 41 |     {"atan", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return atan(x); })},
 42 |     {"exp", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return exp(x); })},
 43 |     {"exp2", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return exp2(x); })},
 44 |     {"exp10", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return exp10(x); })},
 45 |     {"log", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return log(x); })},
 46 |     {"log2", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return log2(x); })},
 47 |     {"log10", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return log10(x); })},
 48 |     {"pow3.5", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return pow(x, 3.5); })},
 49 |     {"pow13", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return pow_const(x, 13); })},
 50 | };
 51 | 
 52 | #ifdef __AVX512F__
 53 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx16 = {
 54 |     {"memcpy", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return x; })},
 55 |     {"memset", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Vec16f{0.0}; })},
 56 |     {"sqrt", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return sqrt(x); })},
 57 |     {"sin", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return sin(x); })},
 58 |     {"cos", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return cos(x); })},
 59 |     {"tan", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return tan(x); })},
 60 |     {"sinh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return sinh(x); })},
 61 |     {"cosh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return cosh(x); })},
 62 |     {"tanh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return tanh(x); })},
 63 |     {"asinh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return asinh(x); })},
 64 |     {"acosh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return acosh(x); })},
 65 |     {"atanh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return atanh(x); })},
 66 |     {"asin", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return asin(x); })},
 67 |     {"acos", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return acos(x); })},
 68 |     {"atan", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return atan(x); })},
 69 |     {"exp", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return exp(x); })},
 70 |     {"exp2", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return exp2(x); })},
 71 |     {"exp10", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return exp10(x); })},
 72 |     {"log", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return log(x); })},
 73 |     {"log2", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return log2(x); })},
 74 |     {"log10", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return log10(x); })},
 75 |     {"pow3.5", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return pow(x, 3.5); })},
 76 |     {"pow13", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return pow_const(x, 13); })},
 77 | };
 78 | 
 79 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx8 = {
 80 |     {"memset", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Vec8d{0.0}; })},
 81 |     {"memcpy", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return x; })},
 82 |     {"sqrt", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return sqrt(x); })},
 83 |     {"sin", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return sin(x); })},
 84 |     {"cos", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return cos(x); })},
 85 |     {"tan", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return tan(x); })},
 86 |     {"sinh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return sinh(x); })},
 87 |     {"cosh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return cosh(x); })},
 88 |     {"tanh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return tanh(x); })},
 89 |     {"asinh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return asinh(x); })},
 90 |     {"acosh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return acosh(x); })},
 91 |     {"atanh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return atanh(x); })},
 92 |     {"asin", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return asin(x); })},
 93 |     {"acos", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return acos(x); })},
 94 |     {"atan", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return atan(x); })},
 95 |     {"exp", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return exp(x); })},
 96 |     {"exp2", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return exp2(x); })},
 97 |     {"exp10", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return exp10(x); })},
 98 |     {"log", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return log(x); })},
 99 |     {"log2", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return log2(x); })},
100 |     {"log10", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return log10(x); })},
101 |     {"pow3.5", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return pow(x, 3.5); })},
102 |     {"pow13", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return pow_const(x, 13); })},
103 | };
104 | #else
105 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx16;
106 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx8;
107 | #endif
108 | 
109 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx8() { return funs_fx8; }
110 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx4() { return funs_dx4; }
111 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx16() { return funs_fx16; }
112 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx8() { return funs_dx8; }
113 | 
114 | } // namespace sf::functions::af
115 | 


--------------------------------------------------------------------------------
/src/bind_amdlibm.cpp:
--------------------------------------------------------------------------------
  1 | #include <dlfcn.h>
  2 | #include <sf_libraries.hpp>
  3 | 
  4 | namespace sf::functions::amd {
  5 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx1;
  6 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx8;
  7 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx1;
  8 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx4;
  9 | using C_FUN1F = float (*)(float);
 10 | using C_FUN2F = float (*)(float, float);
 11 | using C_FUN1D = double (*)(double);
 12 | using C_FUN2D = double (*)(double, double);
 13 | using C_FX8_FUN1F = Vec8f (*)(Vec8f);
 14 | using C_FX8_FUN2F = Vec8f (*)(Vec8f, Vec8f);
 15 | using C_DX4_FUN1D = Vec4d (*)(Vec4d);
 16 | using C_DX4_FUN2D = Vec4d (*)(Vec4d, Vec4d);
 17 | 
 18 | void *handle = NULL;
 19 | 
 20 | C_FUN1F amd_sinf, amd_cosf, amd_tanf, amd_sinhf, amd_coshf, amd_tanhf, amd_asinf, amd_acosf, amd_atanf, amd_asinhf,
 21 |     amd_acoshf, amd_atanhf, amd_logf, amd_log2f, amd_log10f, amd_expf, amd_exp2f, amd_exp10f, amd_sqrtf;
 22 | C_FUN2F amd_powf;
 23 | 
 24 | C_FUN1D amd_sin, amd_cos, amd_tan, amd_sinh, amd_cosh, amd_tanh, amd_asin, amd_acos, amd_atan, amd_asinh, amd_acosh,
 25 |     amd_atanh, amd_log, amd_log2, amd_log10, amd_exp, amd_exp2, amd_exp10, amd_sqrt;
 26 | C_FUN2D amd_pow;
 27 | 
 28 | C_FX8_FUN1F amd_vrs8_sinf, amd_vrs8_cosf, amd_vrs8_tanf, amd_vrs8_logf, amd_vrs8_log2f, amd_vrs8_expf, amd_vrs8_exp2f;
 29 | C_FX8_FUN2F amd_vrs8_powf;
 30 | 
 31 | C_DX4_FUN1D amd_vrd4_sin, amd_vrd4_cos, amd_vrd4_tan, amd_vrd4_log, amd_vrd4_log2, amd_vrd4_exp, amd_vrd4_exp2;
 32 | C_DX4_FUN2D amd_vrd4_pow;
 33 | 
 34 | void load_functions() {
 35 |     if (handle)
 36 |         return;
 37 |     void *handle = dlopen("libalm.so", RTLD_NOW);
 38 | 
 39 |     amd_sinf = (C_FUN1F)dlsym(handle, "amd_sinf");
 40 |     amd_cosf = (C_FUN1F)dlsym(handle, "amd_cosf");
 41 |     amd_tanf = (C_FUN1F)dlsym(handle, "amd_tanf");
 42 |     amd_sinhf = (C_FUN1F)dlsym(handle, "amd_sinhf");
 43 |     amd_coshf = (C_FUN1F)dlsym(handle, "amd_coshf");
 44 |     amd_tanhf = (C_FUN1F)dlsym(handle, "amd_tanhf");
 45 |     amd_asinf = (C_FUN1F)dlsym(handle, "amd_asinf");
 46 |     amd_acosf = (C_FUN1F)dlsym(handle, "amd_acosf");
 47 |     amd_atanf = (C_FUN1F)dlsym(handle, "amd_atanf");
 48 |     amd_asinhf = (C_FUN1F)dlsym(handle, "amd_asinhf");
 49 |     amd_acoshf = (C_FUN1F)dlsym(handle, "amd_acoshf");
 50 |     amd_atanhf = (C_FUN1F)dlsym(handle, "amd_atanhf");
 51 |     amd_logf = (C_FUN1F)dlsym(handle, "amd_logf");
 52 |     amd_log2f = (C_FUN1F)dlsym(handle, "amd_log2f");
 53 |     amd_log10f = (C_FUN1F)dlsym(handle, "amd_log10f");
 54 |     amd_expf = (C_FUN1F)dlsym(handle, "amd_expf");
 55 |     amd_exp2f = (C_FUN1F)dlsym(handle, "amd_exp2f");
 56 |     amd_exp10f = (C_FUN1F)dlsym(handle, "amd_exp10f");
 57 |     amd_sqrtf = (C_FUN1F)dlsym(handle, "amd_sqrtf");
 58 |     amd_powf = (C_FUN2F)dlsym(handle, "amd_powf");
 59 | 
 60 |     amd_sin = (C_FUN1D)dlsym(handle, "amd_sin");
 61 |     amd_cos = (C_FUN1D)dlsym(handle, "amd_cos");
 62 |     amd_tan = (C_FUN1D)dlsym(handle, "amd_tan");
 63 |     amd_sinh = (C_FUN1D)dlsym(handle, "amd_sinh");
 64 |     amd_cosh = (C_FUN1D)dlsym(handle, "amd_cosh");
 65 |     amd_tanh = (C_FUN1D)dlsym(handle, "amd_tanh");
 66 |     amd_asin = (C_FUN1D)dlsym(handle, "amd_asin");
 67 |     amd_acos = (C_FUN1D)dlsym(handle, "amd_acos");
 68 |     amd_atan = (C_FUN1D)dlsym(handle, "amd_atan");
 69 |     amd_asinh = (C_FUN1D)dlsym(handle, "amd_asinh");
 70 |     amd_acosh = (C_FUN1D)dlsym(handle, "amd_acosh");
 71 |     amd_atanh = (C_FUN1D)dlsym(handle, "amd_atanh");
 72 |     amd_log = (C_FUN1D)dlsym(handle, "amd_log");
 73 |     amd_log2 = (C_FUN1D)dlsym(handle, "amd_log2");
 74 |     amd_log10 = (C_FUN1D)dlsym(handle, "amd_log10");
 75 |     amd_exp = (C_FUN1D)dlsym(handle, "amd_exp");
 76 |     amd_exp2 = (C_FUN1D)dlsym(handle, "amd_exp2");
 77 |     amd_exp10 = (C_FUN1D)dlsym(handle, "amd_exp10");
 78 |     amd_sqrt = (C_FUN1D)dlsym(handle, "amd_sqrt");
 79 |     amd_pow = (C_FUN2D)dlsym(handle, "amd_pow");
 80 | 
 81 |     amd_vrs8_sinf = (C_FX8_FUN1F)dlsym(handle, "amd_vrs8_sinf");
 82 |     amd_vrs8_cosf = (C_FX8_FUN1F)dlsym(handle, "amd_vrs8_cosf");
 83 |     amd_vrs8_tanf = (C_FX8_FUN1F)dlsym(handle, "amd_vrs8_tanf");
 84 |     amd_vrs8_logf = (C_FX8_FUN1F)dlsym(handle, "amd_vrs8_logf");
 85 |     amd_vrs8_log2f = (C_FX8_FUN1F)dlsym(handle, "amd_vrs8_log2f");
 86 |     amd_vrs8_expf = (C_FX8_FUN1F)dlsym(handle, "amd_vrs8_expf");
 87 |     amd_vrs8_exp2f = (C_FX8_FUN1F)dlsym(handle, "amd_vrs8_exp2f");
 88 |     amd_vrs8_powf = (C_FX8_FUN2F)dlsym(handle, "amd_vrs8_powf");
 89 | 
 90 |     amd_vrd4_sin = (C_DX4_FUN1D)dlsym(handle, "amd_vrd4_sin");
 91 |     amd_vrd4_cos = (C_DX4_FUN1D)dlsym(handle, "amd_vrd4_cos");
 92 |     amd_vrd4_tan = (C_DX4_FUN1D)dlsym(handle, "amd_vrd4_tan");
 93 |     amd_vrd4_log = (C_DX4_FUN1D)dlsym(handle, "amd_vrd4_log");
 94 |     amd_vrd4_log2 = (C_DX4_FUN1D)dlsym(handle, "amd_vrd4_log2");
 95 |     amd_vrd4_exp = (C_DX4_FUN1D)dlsym(handle, "amd_vrd4_exp");
 96 |     amd_vrd4_exp2 = (C_DX4_FUN1D)dlsym(handle, "amd_vrd4_exp2");
 97 |     amd_vrd4_pow = (C_DX4_FUN2D)dlsym(handle, "amd_vrd4_pow");
 98 | 
 99 |     funs_fx1 = {
100 |         {"sin", scalar_func_apply<float>([](float x) -> float { return amd_sinf(x); })},
101 |         {"cos", scalar_func_apply<float>([](float x) -> float { return amd_cosf(x); })},
102 |         {"tan", scalar_func_apply<float>([](float x) -> float { return amd_tanf(x); })},
103 |         {"sinh", scalar_func_apply<float>([](float x) -> float { return amd_sinhf(x); })},
104 |         {"cosh", scalar_func_apply<float>([](float x) -> float { return amd_coshf(x); })},
105 |         {"tanh", scalar_func_apply<float>([](float x) -> float { return amd_tanhf(x); })},
106 |         {"asin", scalar_func_apply<float>([](float x) -> float { return amd_asinf(x); })},
107 |         {"acos", scalar_func_apply<float>([](float x) -> float { return amd_acosf(x); })},
108 |         {"atan", scalar_func_apply<float>([](float x) -> float { return amd_atanf(x); })},
109 |         {"asinh", scalar_func_apply<float>([](float x) -> float { return amd_asinhf(x); })},
110 |         {"acosh", scalar_func_apply<float>([](float x) -> float { return amd_acoshf(x); })},
111 |         {"atanh", scalar_func_apply<float>([](float x) -> float { return amd_atanhf(x); })},
112 |         {"log", scalar_func_apply<float>([](float x) -> float { return amd_logf(x); })},
113 |         {"log2", scalar_func_apply<float>([](float x) -> float { return amd_log2f(x); })},
114 |         {"log10", scalar_func_apply<float>([](float x) -> float { return amd_log10f(x); })},
115 |         {"exp", scalar_func_apply<float>([](float x) -> float { return amd_expf(x); })},
116 |         {"exp2", scalar_func_apply<float>([](float x) -> float { return amd_exp2f(x); })},
117 |         {"exp10", scalar_func_apply<float>([](float x) -> float { return amd_exp10f(x); })},
118 |         {"sqrt", scalar_func_apply<float>([](float x) -> float { return amd_sqrtf(x); })},
119 |         {"pow3.5", scalar_func_apply<float>([](float x) -> float { return amd_powf(x, 3.5); })},
120 |         {"pow13", scalar_func_apply<float>([](float x) -> float { return amd_powf(x, 13); })},
121 |     };
122 | 
123 |     funs_dx1 = {
124 |         {"sin", scalar_func_apply<double>([](double x) -> double { return amd_sin(x); })},
125 |         {"cos", scalar_func_apply<double>([](double x) -> double { return amd_cos(x); })},
126 |         {"tan", scalar_func_apply<double>([](double x) -> double { return amd_tan(x); })},
127 |         {"sinh", scalar_func_apply<double>([](double x) -> double { return amd_sinh(x); })},
128 |         {"cosh", scalar_func_apply<double>([](double x) -> double { return amd_cosh(x); })},
129 |         {"tanh", scalar_func_apply<double>([](double x) -> double { return amd_tanh(x); })},
130 |         {"asin", scalar_func_apply<double>([](double x) -> double { return amd_asin(x); })},
131 |         {"acos", scalar_func_apply<double>([](double x) -> double { return amd_acos(x); })},
132 |         {"atan", scalar_func_apply<double>([](double x) -> double { return amd_atan(x); })},
133 |         {"asinh", scalar_func_apply<double>([](double x) -> double { return amd_asinh(x); })},
134 |         {"acosh", scalar_func_apply<double>([](double x) -> double { return amd_acosh(x); })},
135 |         {"atanh", scalar_func_apply<double>([](double x) -> double { return amd_atanh(x); })},
136 |         {"log", scalar_func_apply<double>([](double x) -> double { return amd_log(x); })},
137 |         {"log2", scalar_func_apply<double>([](double x) -> double { return amd_log2(x); })},
138 |         {"log10", scalar_func_apply<double>([](double x) -> double { return amd_log10(x); })},
139 |         {"exp", scalar_func_apply<double>([](double x) -> double { return amd_exp(x); })},
140 |         {"exp2", scalar_func_apply<double>([](double x) -> double { return amd_exp2(x); })},
141 |         {"exp10", scalar_func_apply<double>([](double x) -> double { return amd_exp10(x); })},
142 |         {"sqrt", scalar_func_apply<double>([](double x) -> double { return amd_sqrt(x); })},
143 |         {"pow3.5", scalar_func_apply<double>([](double x) -> double { return amd_pow(x, 3.5); })},
144 |         {"pow13", scalar_func_apply<double>([](double x) -> double { return amd_pow(x, 13); })},
145 |     };
146 | 
147 |     funs_dx4 = {
148 |         {"sin", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return amd_vrd4_sin(x); })},
149 |         {"cos", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return amd_vrd4_cos(x); })},
150 |         {"tan", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return amd_vrd4_tan(x); })},
151 |         {"log", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return amd_vrd4_log(x); })},
152 |         {"log2", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return amd_vrd4_log2(x); })},
153 |         {"exp", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return amd_vrd4_exp(x); })},
154 |         {"exp2", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return amd_vrd4_exp2(x); })},
155 |         {"pow3.5", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return amd_vrd4_pow(x, Vec4d{3.5}); })},
156 |         {"pow13", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return amd_vrd4_pow(x, Vec4d{13}); })},
157 |     };
158 | 
159 |     funs_fx8 = {
160 |         {"sin", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return amd_vrs8_sinf(x); })},
161 |         {"cos", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return amd_vrs8_cosf(x); })},
162 |         {"tan", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return amd_vrs8_tanf(x); })},
163 |         {"log", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return amd_vrs8_logf(x); })},
164 |         {"log2", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return amd_vrs8_log2f(x); })},
165 |         {"exp", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return amd_vrs8_expf(x); })},
166 |         {"exp2", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return amd_vrs8_exp2f(x); })},
167 |         {"pow3.5", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return amd_vrs8_powf(x, Vec8f{3.5}); })},
168 |         {"pow13", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return amd_vrs8_powf(x, Vec8f{13}); })},
169 |     };
170 | }
171 | 
172 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx1() {
173 |     load_functions();
174 |     return funs_fx1;
175 | }
176 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx8() {
177 |     load_functions();
178 |     return funs_fx8;
179 | }
180 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1() {
181 |     load_functions();
182 |     return funs_dx1;
183 | }
184 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx4() {
185 |     load_functions();
186 |     return funs_dx4;
187 | }
188 | } // namespace sf::functions::amd
189 | 


--------------------------------------------------------------------------------
/src/bind_baobzi.cpp:
--------------------------------------------------------------------------------
 1 | #include <baobzi.hpp>
 2 | #include <sf_libraries.hpp>
 3 | 
 4 | namespace sf::functions::baobzi {
 5 | using ::baobzi::Baobzi;
 6 | 
 7 | double baobzi_fun_wrapper(const double *x, const void *data) {
 8 |     auto *myfun = (std::function<double(double)> *)data;
 9 |     return (*myfun)(*x);
10 | }
11 | 
12 | std::shared_ptr<Baobzi> create_baobzi_func(void *infun, const std::pair<double, double> &domain) {
13 |     baobzi_input_t input = {.func = baobzi_fun_wrapper,
14 |                             .data = infun,
15 |                             .dim = 1,
16 |                             .order = 8,
17 |                             .tol = 1E-10,
18 |                             .minimum_leaf_fraction = 0.6,
19 |                             .split_multi_eval = 0};
20 |     double hl = 0.5 * (domain.second - domain.first);
21 |     double center = domain.first + hl;
22 | 
23 |     return std::shared_ptr<Baobzi>(new Baobzi(&input, &center, &hl));
24 | }
25 | 
26 | std::unordered_map<std::string, std::shared_ptr<::baobzi::Baobzi>> baobzi_funs;
27 | std::unordered_map<std::string, std::function<double(double)>> potential_baobzi_funs{
28 |     {"bessel_Y0", [](double x) -> double { return gsl_sf_bessel_Y0(x); }},
29 |     {"bessel_Y1", [](double x) -> double { return gsl_sf_bessel_Y1(x); }},
30 |     {"bessel_Y2", [](double x) -> double { return gsl_sf_bessel_Yn(2, x); }},
31 |     {"bessel_I0", [](double x) -> double { return gsl_sf_bessel_I0(x); }},
32 |     {"bessel_I1", [](double x) -> double { return gsl_sf_bessel_I1(x); }},
33 |     {"bessel_I2", [](double x) -> double { return gsl_sf_bessel_In(2, x); }},
34 |     {"bessel_J0", [](double x) -> double { return gsl_sf_bessel_J0(x); }},
35 |     {"bessel_J1", [](double x) -> double { return gsl_sf_bessel_J1(x); }},
36 |     {"bessel_J2", [](double x) -> double { return gsl_sf_bessel_Jn(2, x); }},
37 |     {"hermite_0", [](double x) -> double { return gsl_sf_hermite(0, x); }},
38 |     {"hermite_1", [](double x) -> double { return gsl_sf_hermite(1, x); }},
39 |     {"hermite_2", [](double x) -> double { return gsl_sf_hermite(2, x); }},
40 |     {"hermite_3", [](double x) -> double { return gsl_sf_hermite(3, x); }},
41 | };
42 | 
43 | std::unordered_map<std::string, std::shared_ptr<::baobzi::Baobzi>> &
44 | get_funs_dx1(std::set<std::string> &keys_to_eval, std::unordered_map<std::string, configuration_t> &params) {
45 |     for (auto &key : keys_to_eval) {
46 |         if (potential_baobzi_funs.count(key) && !baobzi_funs.count(key)) {
47 |             std::cerr << "Creating baobzi function '" + key + "'.\n";
48 |             auto &param = params[key];
49 |             std::pair domain = std::make_pair(param.lbound, param.ubound);
50 |             baobzi_funs[key] = create_baobzi_func((void *)(&potential_baobzi_funs.at(key)), domain);
51 |         }
52 |     }
53 | 
54 |     return baobzi_funs;
55 | }
56 | 
57 | } // namespace sf::functions::baobzi
58 | 


--------------------------------------------------------------------------------
/src/bind_boost.cpp:
--------------------------------------------------------------------------------
 1 | #include <sf_libraries.hpp>
 2 | 
 3 | namespace sf::functions::boost {
 4 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx1 = {
 5 |     {"sin_pi", scalar_func_apply<float>([](float x) -> float { return ::boost::math::sin_pi(x); })},
 6 |     {"cos_pi", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cos_pi(x); })},
 7 |     {"tgamma", scalar_func_apply<float>([](float x) -> float { return ::boost::math::tgamma<float>(x); })},
 8 |     {"lgamma", scalar_func_apply<float>([](float x) -> float { return ::boost::math::lgamma<float>(x); })},
 9 |     {"digamma", scalar_func_apply<float>([](float x) -> float { return ::boost::math::digamma<float>(x); })},
10 |     {"pow13", scalar_func_apply<float>([](float x) -> float { return ::boost::math::pow<13>(x); })},
11 |     {"erf", scalar_func_apply<float>([](float x) -> float { return ::boost::math::erf(x); })},
12 |     {"erfc", scalar_func_apply<float>([](float x) -> float { return ::boost::math::erfc(x); })},
13 |     {"sinc_pi", scalar_func_apply<float>([](float x) -> float { return ::boost::math::sinc_pi(x); })},
14 |     {"bessel_Y0", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_neumann(0, x); })},
15 |     {"bessel_Y1", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_neumann(1, x); })},
16 |     {"bessel_Y2", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_neumann(2, x); })},
17 |     {"bessel_I0", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_bessel_i(0, x); })},
18 |     {"bessel_I1", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_bessel_i(1, x); })},
19 |     {"bessel_I2", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_bessel_i(2, x); })},
20 |     {"bessel_J0", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_bessel_j(0, x); })},
21 |     {"bessel_J1", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_bessel_j(1, x); })},
22 |     {"bessel_J2", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_bessel_j(2, x); })},
23 |     {"bessel_K0", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_bessel_k(0, x); })},
24 |     {"bessel_K1", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_bessel_k(1, x); })},
25 |     {"bessel_K2", scalar_func_apply<float>([](float x) -> float { return ::boost::math::cyl_bessel_k(2, x); })},
26 |     {"bessel_j0", scalar_func_apply<float>([](float x) -> float { return ::boost::math::sph_bessel(0, x); })},
27 |     {"bessel_j1", scalar_func_apply<float>([](float x) -> float { return ::boost::math::sph_bessel(1, x); })},
28 |     {"bessel_j2", scalar_func_apply<float>([](float x) -> float { return ::boost::math::sph_bessel(2, x); })},
29 |     {"bessel_y0", scalar_func_apply<float>([](float x) -> float { return ::boost::math::sph_neumann(0, x); })},
30 |     {"bessel_y1", scalar_func_apply<float>([](float x) -> float { return ::boost::math::sph_neumann(1, x); })},
31 |     {"bessel_y2", scalar_func_apply<float>([](float x) -> float { return ::boost::math::sph_neumann(2, x); })},
32 |     {"hermite_0", scalar_func_apply<float>([](float x) -> float { return ::boost::math::hermite(0, x); })},
33 |     {"hermite_1", scalar_func_apply<float>([](float x) -> float { return ::boost::math::hermite(1, x); })},
34 |     {"hermite_2", scalar_func_apply<float>([](float x) -> float { return ::boost::math::hermite(2, x); })},
35 |     {"hermite_3", scalar_func_apply<float>([](float x) -> float { return ::boost::math::hermite(3, x); })},
36 |     {"riemann_zeta", scalar_func_apply<float>([](float x) -> float { return ::boost::math::zeta(x); })},
37 | };
38 | 
39 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx1 = {
40 |     {"sin_pi", scalar_func_apply<double>([](double x) -> double { return ::boost::math::sin_pi(x); })},
41 |     {"cos_pi", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cos_pi(x); })},
42 |     {"tgamma", scalar_func_apply<double>([](double x) -> double { return ::boost::math::tgamma<double>(x); })},
43 |     {"lgamma", scalar_func_apply<double>([](double x) -> double { return ::boost::math::lgamma<double>(x); })},
44 |     {"digamma", scalar_func_apply<double>([](double x) -> double { return ::boost::math::digamma<double>(x); })},
45 |     {"pow13", scalar_func_apply<double>([](double x) -> double { return ::boost::math::pow<13>(x); })},
46 |     {"erf", scalar_func_apply<double>([](double x) -> double { return ::boost::math::erf(x); })},
47 |     {"erfc", scalar_func_apply<double>([](double x) -> double { return ::boost::math::erfc(x); })},
48 |     {"sinc_pi", scalar_func_apply<double>([](double x) -> double { return ::boost::math::sinc_pi(x); })},
49 |     {"bessel_Y0", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_neumann(0, x); })},
50 |     {"bessel_Y1", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_neumann(1, x); })},
51 |     {"bessel_Y2", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_neumann(2, x); })},
52 |     {"bessel_I0", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_bessel_i(0, x); })},
53 |     {"bessel_I1", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_bessel_i(1, x); })},
54 |     {"bessel_I2", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_bessel_i(2, x); })},
55 |     {"bessel_J0", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_bessel_j(0, x); })},
56 |     {"bessel_J1", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_bessel_j(1, x); })},
57 |     {"bessel_J2", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_bessel_j(2, x); })},
58 |     {"bessel_K0", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_bessel_k(0, x); })},
59 |     {"bessel_K1", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_bessel_k(1, x); })},
60 |     {"bessel_K2", scalar_func_apply<double>([](double x) -> double { return ::boost::math::cyl_bessel_k(2, x); })},
61 |     {"bessel_j0", scalar_func_apply<double>([](double x) -> double { return ::boost::math::sph_bessel(0, x); })},
62 |     {"bessel_j1", scalar_func_apply<double>([](double x) -> double { return ::boost::math::sph_bessel(1, x); })},
63 |     {"bessel_j2", scalar_func_apply<double>([](double x) -> double { return ::boost::math::sph_bessel(2, x); })},
64 |     {"bessel_y0", scalar_func_apply<double>([](double x) -> double { return ::boost::math::sph_neumann(0, x); })},
65 |     {"bessel_y1", scalar_func_apply<double>([](double x) -> double { return ::boost::math::sph_neumann(1, x); })},
66 |     {"bessel_y2", scalar_func_apply<double>([](double x) -> double { return ::boost::math::sph_neumann(2, x); })},
67 |     {"hermite_0", scalar_func_apply<double>([](double x) -> double { return ::boost::math::hermite(0, x); })},
68 |     {"hermite_1", scalar_func_apply<double>([](double x) -> double { return ::boost::math::hermite(1, x); })},
69 |     {"hermite_2", scalar_func_apply<double>([](double x) -> double { return ::boost::math::hermite(2, x); })},
70 |     {"hermite_3", scalar_func_apply<double>([](double x) -> double { return ::boost::math::hermite(3, x); })},
71 |     {"riemann_zeta", scalar_func_apply<double>([](double x) -> double { return ::boost::math::zeta(x); })},
72 | };
73 | 
74 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx1() { return funs_fx1; }
75 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1() { return funs_dx1; }
76 | } // namespace sf::functions::boost
77 | 


--------------------------------------------------------------------------------
/src/bind_eigen.cpp:
--------------------------------------------------------------------------------
 1 | #include <sf_libraries.hpp>
 2 | 
 3 | namespace sf::functions::eigen {
 4 | std::unordered_map<std::string, OPS> funs = {
 5 |     {"sin", OPS::sin},         {"cos", OPS::cos},      {"tan", OPS::tan},     {"sinh", OPS::sinh},
 6 |     {"cosh", OPS::cosh},       {"tanh", OPS::tanh},    {"exp", OPS::exp},     {"log", OPS::log},
 7 |     {"log10", OPS::log10},     {"pow3.5", OPS::pow35}, {"pow13", OPS::pow13}, {"asin", OPS::asin},
 8 |     {"acos", OPS::acos},       {"atan", OPS::atan},    {"asinh", OPS::asinh}, {"atanh", OPS::atanh},
 9 |     {"acosh", OPS::acosh},     {"erf", OPS::erf},      {"erfc", OPS::erfc},   {"lgamma", OPS::lgamma},
10 |     {"digamma", OPS::digamma}, {"ndtri", OPS::ndtri},  {"sqrt", OPS::sqrt},   {"rsqrt", OPS::rsqrt},
11 | };
12 | 
13 | std::unordered_map<std::string, OPS> &get_funs() { return funs; }
14 | } // namespace sf::functions::eigen
15 | 


--------------------------------------------------------------------------------
/src/bind_fort.cpp:
--------------------------------------------------------------------------------
 1 | #include <sf_libraries.hpp>
 2 | 
 3 | namespace sf::functions::fort {
 4 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx1 = {
 5 |     {"bessel_Y0", scalar_func_apply<double>([](double x) -> double {
 6 |          int n = 0;
 7 |          double y;
 8 |          fort_bessel_yn_(&n, &x, &y);
 9 |          return y;
10 |      })},
11 |     {"bessel_J0", scalar_func_apply<double>([](double x) -> double {
12 |          int n = 0;
13 |          double y;
14 |          fort_bessel_jn_(&n, &x, &y);
15 |          return y;
16 |      })},
17 | };
18 | 
19 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1() { return funs_dx1; }
20 | } // namespace sf::functions::fort
21 | 


--------------------------------------------------------------------------------
/src/bind_gsl.cpp:
--------------------------------------------------------------------------------
 1 | #include <sf_libraries.hpp>
 2 | 
 3 | namespace sf::functions::gsl {
 4 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx1;
 5 | std::unordered_map<std::string, multi_eval_func<cdouble>> funs_cdx1;
 6 | bool initialized = false;
 7 | 
 8 | inline cdouble gsl_complex_wrapper(cdouble z, int (*f)(double, double, gsl_sf_result *, gsl_sf_result *)) {
 9 |     gsl_sf_result re, im;
10 |     f(z.real(), z.imag(), &re, &im);
11 |     return cdouble{re.val, im.val};
12 | }
13 | 
14 | void load_functions() {
15 |     if (initialized)
16 |         return;
17 |     initialized = true;
18 | 
19 |     funs_dx1 = {
20 |         {"sin_pi", scalar_func_apply<double>([](double x) -> double { return gsl_sf_sin_pi(x); })},
21 |         {"cos_pi", scalar_func_apply<double>([](double x) -> double { return gsl_sf_cos_pi(x); })},
22 |         {"sin", scalar_func_apply<double>([](double x) -> double { return gsl_sf_sin(x); })},
23 |         {"cos", scalar_func_apply<double>([](double x) -> double { return gsl_sf_cos(x); })},
24 |         {"sinc", scalar_func_apply<double>([](double x) -> double { return gsl_sf_sinc(x / M_PI); })},
25 |         {"sinc_pi", scalar_func_apply<double>([](double x) -> double { return gsl_sf_sinc(x); })},
26 |         {"erf", scalar_func_apply<double>([](double x) -> double { return gsl_sf_erf(x); })},
27 |         {"erfc", scalar_func_apply<double>([](double x) -> double { return gsl_sf_erfc(x); })},
28 |         {"tgamma", scalar_func_apply<double>([](double x) -> double { return gsl_sf_gamma(x); })},
29 |         {"lgamma", scalar_func_apply<double>([](double x) -> double { return gsl_sf_lngamma(x); })},
30 |         {"log", scalar_func_apply<double>([](double x) -> double { return gsl_sf_log(x); })},
31 |         {"exp", scalar_func_apply<double>([](double x) -> double { return gsl_sf_exp(x); })},
32 |         {"pow13", scalar_func_apply<double>([](double x) -> double { return gsl_sf_pow_int(x, 13); })},
33 |         {"bessel_Y0", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_Y0(x); })},
34 |         {"bessel_Y1", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_Y1(x); })},
35 |         {"bessel_Y2", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_Yn(2, x); })},
36 |         {"bessel_I0", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_I0(x); })},
37 |         {"bessel_I1", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_I1(x); })},
38 |         {"bessel_I2", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_In(2, x); })},
39 |         {"bessel_J0", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_J0(x); })},
40 |         {"bessel_J1", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_J1(x); })},
41 |         {"bessel_J2", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_Jn(2, x); })},
42 |         {"bessel_K0", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_K0(x); })},
43 |         {"bessel_K1", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_K1(x); })},
44 |         {"bessel_K2", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_Kn(2, x); })},
45 |         {"bessel_j0", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_j0(x); })},
46 |         {"bessel_j1", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_j1(x); })},
47 |         {"bessel_j2", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_j2(x); })},
48 |         {"bessel_y0", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_y0(x); })},
49 |         {"bessel_y1", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_y1(x); })},
50 |         {"bessel_y2", scalar_func_apply<double>([](double x) -> double { return gsl_sf_bessel_y2(x); })},
51 |         {"hermite_0", scalar_func_apply<double>([](double x) -> double { return gsl_sf_hermite(0, x); })},
52 |         {"hermite_1", scalar_func_apply<double>([](double x) -> double { return gsl_sf_hermite(1, x); })},
53 |         {"hermite_2", scalar_func_apply<double>([](double x) -> double { return gsl_sf_hermite(2, x); })},
54 |         {"hermite_3", scalar_func_apply<double>([](double x) -> double { return gsl_sf_hermite(3, x); })},
55 |         {"riemann_zeta", scalar_func_apply<double>([](double x) -> double { return gsl_sf_zeta(x); })},
56 |     };
57 | 
58 |     // FIXME: check accuracy of this and this+test_func
59 |     funs_cdx1 = {
60 |         {"sin",
61 |          scalar_func_apply<cdouble>([](cdouble z) -> cdouble { return gsl_complex_wrapper(z, gsl_sf_complex_sin_e); })},
62 |         {"cos",
63 |          scalar_func_apply<cdouble>([](cdouble z) -> cdouble { return gsl_complex_wrapper(z, gsl_sf_complex_cos_e); })},
64 |         {"log",
65 |          scalar_func_apply<cdouble>([](cdouble z) -> cdouble { return gsl_complex_wrapper(z, gsl_sf_complex_log_e); })},
66 |         {"dilog", scalar_func_apply<cdouble>(
67 |                       [](cdouble z) -> cdouble { return gsl_complex_wrapper(z, gsl_sf_complex_dilog_e); })},
68 |         {"lgamma", scalar_func_apply<cdouble>(
69 |                        [](cdouble z) -> cdouble { return gsl_complex_wrapper(z, gsl_sf_lngamma_complex_e); })},
70 |     };
71 | }
72 | 
73 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1() {
74 |     load_functions();
75 |     return funs_dx1;
76 | }
77 | 
78 | std::unordered_map<std::string, multi_eval_func<cdouble>> &get_funs_cdx1() {
79 |     load_functions();
80 |     return funs_cdx1;
81 | }
82 | 
83 | } // namespace sf::functions::gsl
84 | 


--------------------------------------------------------------------------------
/src/bind_misc.cpp:
--------------------------------------------------------------------------------
 1 | #include <sf_libraries.hpp>
 2 | 
 3 | namespace sf::functions::misc {
 4 | std::unordered_map<std::string, fun_cdx1_x2> funs_cdx1_x2 = {
 5 |     {"hank103", [](cdouble z) -> std::pair<cdouble, cdouble> {
 6 |          cdouble h0, h1;
 7 |          int ifexpon = 1;
 8 |          hank103_((double _Complex *)&z, (double _Complex *)&h0, (double _Complex *)&h1, &ifexpon);
 9 |          return {h0, h1};
10 |      }}};
11 | 
12 | std::unordered_map<std::string, fun_cdx1_x2> &get_funs_cdx1_x2() { return funs_cdx1_x2; }
13 | } // namespace sf::functions::misc
14 | 


--------------------------------------------------------------------------------
/src/bind_sctl.cpp:
--------------------------------------------------------------------------------
 1 | #include <sf_libraries.hpp>
 2 | 
 3 | namespace sf::functions::SCTL {
 4 | 
 5 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx8 = {
 6 |     {"memcpy", sctl_apply<float, 8>([](const sctl_fx8 &x) { return x; })},
 7 |     {"memset", sctl_apply<float, 8>([](const sctl_fx8 &x) -> sctl_fx8 { return (sctl_fx8::VData)Vec8f{0.0}; })},
 8 |     {"exp", sctl_apply<float, 8>([](const sctl_fx8 &x) { return exp(x); })},
 9 |     {"log", sctl_apply<float, 8>([](const sctl_fx8 &x) { return log(x); })},
10 |     {"sin", sctl_apply<float, 8>([](const sctl_fx8 &x) {
11 |          sctl_fx8 sinx, cosx;
12 |          sincos(sinx, cosx, x);
13 |          return sinx;
14 |      })},
15 |     {"cos", sctl_apply<float, 8>([](const sctl_fx8 &x) {
16 |          sctl_fx8 sinx, cosx;
17 |          sincos(sinx, cosx, x);
18 |          return cosx;
19 |      })},
20 |     {"rsqrt", sctl_apply<float, 8>([](const sctl_fx8 &x) { return sctl::approx_rsqrt<7>(x); })},
21 | };
22 | 
23 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx4 = {
24 |     {"memcpy", sctl_apply<double, 4>([](const sctl_dx4 &x) { return x; })},
25 |     {"memset", sctl_apply<double, 4>([](const sctl_dx4 &x) -> sctl_dx4 { return (sctl_dx4::VData)Vec4d{0.0}; })},
26 |     {"exp", sctl_apply<double, 4>([](const sctl_dx4 &x) { return exp(x); })},
27 |     {"log", sctl_apply<double, 4>([](const sctl_dx4 &x) { return log(x); })},
28 |     {"sin", sctl_apply<double, 4>([](const sctl_dx4 &x) {
29 |          sctl_dx4 sinx, cosx;
30 |          sincos(sinx, cosx, x);
31 |          return sinx;
32 |      })},
33 |     {"cos", sctl_apply<double, 4>([](const sctl_dx4 &x) {
34 |          sctl_dx4 sinx, cosx;
35 |          sincos(sinx, cosx, x);
36 |          return cosx;
37 |      })},
38 |     {"rsqrt", sctl_apply<double, 4>([](const sctl_dx4 &x) { return sctl::approx_rsqrt<16>(x); })},
39 | };
40 | 
41 | #ifdef __AVX512F__
42 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx16 = {
43 |     {"memcpy", sctl_apply<float, 16>([](const sctl_fx16 &x) { return x; })},
44 |     {"memset", sctl_apply<float, 16>([](const sctl_fx16 &x) -> sctl_fx16 { return (sctl_fx16::VData)Vec16f{0.0}; })},
45 |     {"exp", sctl_apply<float, 16>([](const sctl_fx16 &x) { return exp(x); })},
46 |     {"log", sctl_apply<float, 16>([](const sctl_fx16 &x) { return log(x); })},
47 |     {"sin", sctl_apply<float, 16>([](const sctl_fx16 &x) {
48 |          sctl_fx16 sinx, cosx;
49 |          sincos(sinx, cosx, x);
50 |          return sinx;
51 |      })},
52 |     {"cos", sctl_apply<float, 16>([](const sctl_fx16 &x) {
53 |          sctl_fx16 sinx, cosx;
54 |          sincos(sinx, cosx, x);
55 |          return cosx;
56 |      })},
57 |     {"rsqrt", sctl_apply<float, 16>([](const sctl_fx16 &x) { return sctl::approx_rsqrt<7>(x); })},
58 | };
59 | 
60 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx8 = {
61 |     {"memcpy", sctl_apply<double, 8>([](const sctl_dx8 &x) { return x; })},
62 |     {"memset", sctl_apply<double, 8>([](const sctl_dx8 &x) -> sctl_dx8 { return (sctl_dx8::VData)Vec8d{0.0}; })},
63 |     {"exp", sctl_apply<double, 8>([](const sctl_dx8 &x) { return exp(x); })},
64 |     {"log", sctl_apply<double, 8>([](const sctl_dx8 &x) { return log(x); })},
65 |     {"sin", sctl_apply<double, 8>([](const sctl_dx8 &x) {
66 |          sctl_dx8 sinx, cosx;
67 |          sincos(sinx, cosx, x);
68 |          return sinx;
69 |      })},
70 |     {"cos", sctl_apply<double, 8>([](const sctl_dx8 &x) {
71 |          sctl_dx8 sinx, cosx;
72 |          sincos(sinx, cosx, x);
73 |          return cosx;
74 |      })},
75 |     {"rsqrt", sctl_apply<double, 8>([](const sctl_dx8 &x) { return sctl::approx_rsqrt<16>(x); })},
76 | };
77 | #else
78 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx16;
79 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx8;
80 | #endif
81 | 
82 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx8() { return funs_fx8; }
83 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx4() { return funs_dx4; }
84 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx16() { return funs_fx16; }
85 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx8() { return funs_dx8; }
86 | 
87 | } // namespace sf::functions::SCTL
88 | 


--------------------------------------------------------------------------------
/src/bind_sleef.cpp:
--------------------------------------------------------------------------------
  1 | #include <sf_libraries.hpp>
  2 | 
  3 | namespace sf::functions::sleef {
  4 | 
  5 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx1 = {
  6 |     {"sin_pi", scalar_func_apply<float>([](float x) -> float { return Sleef_sinpif1_u05purecfma(x); })},
  7 |     {"cos_pi", scalar_func_apply<float>([](float x) -> float { return Sleef_cospif1_u05purecfma(x); })},
  8 |     {"sin", scalar_func_apply<float>([](float x) -> float { return Sleef_sinf1_u10purecfma(x); })},
  9 |     {"cos", scalar_func_apply<float>([](float x) -> float { return Sleef_cosf1_u10purecfma(x); })},
 10 |     {"tan", scalar_func_apply<float>([](float x) -> float { return Sleef_tanf1_u10purecfma(x); })},
 11 |     {"sinh", scalar_func_apply<float>([](float x) -> float { return Sleef_sinhf1_u10purecfma(x); })},
 12 |     {"cosh", scalar_func_apply<float>([](float x) -> float { return Sleef_coshf1_u10purecfma(x); })},
 13 |     {"tanh", scalar_func_apply<float>([](float x) -> float { return Sleef_tanhf1_u10purecfma(x); })},
 14 |     {"asin", scalar_func_apply<float>([](float x) -> float { return Sleef_asinf1_u10purecfma(x); })},
 15 |     {"acos", scalar_func_apply<float>([](float x) -> float { return Sleef_acosf1_u10purecfma(x); })},
 16 |     {"atan", scalar_func_apply<float>([](float x) -> float { return Sleef_atanf1_u10purecfma(x); })},
 17 |     {"asinh", scalar_func_apply<float>([](float x) -> float { return Sleef_asinhf1_u10purecfma(x); })},
 18 |     {"acosh", scalar_func_apply<float>([](float x) -> float { return Sleef_acoshf1_u10purecfma(x); })},
 19 |     {"atanh", scalar_func_apply<float>([](float x) -> float { return Sleef_atanhf1_u10purecfma(x); })},
 20 |     {"log", scalar_func_apply<float>([](float x) -> float { return Sleef_logf1_u10purecfma(x); })},
 21 |     {"log2", scalar_func_apply<float>([](float x) -> float { return Sleef_log2f1_u10purecfma(x); })},
 22 |     {"log10", scalar_func_apply<float>([](float x) -> float { return Sleef_log10f1_u10purecfma(x); })},
 23 |     {"exp", scalar_func_apply<float>([](float x) -> float { return Sleef_expf1_u10purecfma(x); })},
 24 |     {"exp2", scalar_func_apply<float>([](float x) -> float { return Sleef_exp2f1_u10purecfma(x); })},
 25 |     {"exp10", scalar_func_apply<float>([](float x) -> float { return Sleef_exp10f1_u10purecfma(x); })},
 26 |     {"erf", scalar_func_apply<float>([](float x) -> float { return Sleef_erff1_u10purecfma(x); })},
 27 |     {"erfc", scalar_func_apply<float>([](float x) -> float { return Sleef_erfcf1_u15purecfma(x); })},
 28 |     {"lgamma", scalar_func_apply<float>([](float x) -> float { return Sleef_lgammaf1_u10purecfma(x); })},
 29 |     {"tgamma", scalar_func_apply<float>([](float x) -> float { return Sleef_tgammaf1_u10purecfma(x); })},
 30 |     {"sqrt", scalar_func_apply<float>([](float x) -> float { return Sleef_sqrtf1_u05purecfma(x); })},
 31 |     {"pow3.5", scalar_func_apply<float>([](float x) -> float { return Sleef_powf1_u10purecfma(x, 3.5); })},
 32 |     {"pow13", scalar_func_apply<float>([](float x) -> float { return Sleef_powf1_u10purecfma(x, 13); })},
 33 | };
 34 | 
 35 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx1 = {
 36 |     {"sin_pi", scalar_func_apply<double>([](double x) -> double { return Sleef_sinpid1_u05purecfma(x); })},
 37 |     {"cos_pi", scalar_func_apply<double>([](double x) -> double { return Sleef_cospid1_u05purecfma(x); })},
 38 |     {"sin", scalar_func_apply<double>([](double x) -> double { return Sleef_sind1_u10purecfma(x); })},
 39 |     {"cos", scalar_func_apply<double>([](double x) -> double { return Sleef_cosd1_u10purecfma(x); })},
 40 |     {"tan", scalar_func_apply<double>([](double x) -> double { return Sleef_tand1_u10purecfma(x); })},
 41 |     {"sinh", scalar_func_apply<double>([](double x) -> double { return Sleef_sinhd1_u10purecfma(x); })},
 42 |     {"cosh", scalar_func_apply<double>([](double x) -> double { return Sleef_coshd1_u10purecfma(x); })},
 43 |     {"tanh", scalar_func_apply<double>([](double x) -> double { return Sleef_tanhd1_u10purecfma(x); })},
 44 |     {"asin", scalar_func_apply<double>([](double x) -> double { return Sleef_asind1_u10purecfma(x); })},
 45 |     {"acos", scalar_func_apply<double>([](double x) -> double { return Sleef_acosd1_u10purecfma(x); })},
 46 |     {"atan", scalar_func_apply<double>([](double x) -> double { return Sleef_atand1_u10purecfma(x); })},
 47 |     {"asinh", scalar_func_apply<double>([](double x) -> double { return Sleef_asinhd1_u10purecfma(x); })},
 48 |     {"acosh", scalar_func_apply<double>([](double x) -> double { return Sleef_acoshd1_u10purecfma(x); })},
 49 |     {"atanh", scalar_func_apply<double>([](double x) -> double { return Sleef_atanhd1_u10purecfma(x); })},
 50 |     {"log", scalar_func_apply<double>([](double x) -> double { return Sleef_logd1_u10purecfma(x); })},
 51 |     {"log2", scalar_func_apply<double>([](double x) -> double { return Sleef_log2d1_u10purecfma(x); })},
 52 |     {"log10", scalar_func_apply<double>([](double x) -> double { return Sleef_log10d1_u10purecfma(x); })},
 53 |     {"exp", scalar_func_apply<double>([](double x) -> double { return Sleef_expd1_u10purecfma(x); })},
 54 |     {"exp2", scalar_func_apply<double>([](double x) -> double { return Sleef_exp2d1_u10purecfma(x); })},
 55 |     {"exp10", scalar_func_apply<double>([](double x) -> double { return Sleef_exp10d1_u10purecfma(x); })},
 56 |     {"erf", scalar_func_apply<double>([](double x) -> double { return Sleef_erfd1_u10purecfma(x); })},
 57 |     {"erfc", scalar_func_apply<double>([](double x) -> double { return Sleef_erfcd1_u15purecfma(x); })},
 58 |     {"lgamma", scalar_func_apply<double>([](double x) -> double { return Sleef_lgammad1_u10purecfma(x); })},
 59 |     {"tgamma", scalar_func_apply<double>([](double x) -> double { return Sleef_tgammad1_u10purecfma(x); })},
 60 |     {"sqrt", scalar_func_apply<double>([](double x) -> double { return Sleef_sqrtd1_u05purecfma(x); })},
 61 |     {"pow3.5", scalar_func_apply<double>([](double x) -> double { return Sleef_powd1_u10purecfma(x, 3.5); })},
 62 |     {"pow13", scalar_func_apply<double>([](double x) -> double { return Sleef_powd1_u10purecfma(x, 13); })},
 63 | };
 64 | 
 65 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx8 = {
 66 |     {"sin_pi", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_sinpif8_u05avx2(x); })},
 67 |     {"cos_pi", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_cospif8_u05avx2(x); })},
 68 |     {"sin", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_sinf8_u10avx2(x); })},
 69 |     {"cos", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_cosf8_u10avx2(x); })},
 70 |     {"tan", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_tanf8_u10avx2(x); })},
 71 |     {"sinh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_sinhf8_u10avx2(x); })},
 72 |     {"cosh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_coshf8_u10avx2(x); })},
 73 |     {"tanh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_tanhf8_u10avx2(x); })},
 74 |     {"asin", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_asinf8_u10avx2(x); })},
 75 |     {"acos", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_acosf8_u10avx2(x); })},
 76 |     {"atan", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_atanf8_u10avx2(x); })},
 77 |     {"asinh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_asinhf8_u10avx2(x); })},
 78 |     {"acosh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_acoshf8_u10avx2(x); })},
 79 |     {"atanh", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_atanhf8_u10avx2(x); })},
 80 |     {"log", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_logf8_u10avx2(x); })},
 81 |     {"log2", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_log2f8_u10avx2(x); })},
 82 |     {"log10", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_log10f8_u10avx2(x); })},
 83 |     {"exp", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_expf8_u10avx2(x); })},
 84 |     {"exp2", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_exp2f8_u10avx2(x); })},
 85 |     {"exp10", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_exp10f8_u10avx2(x); })},
 86 |     {"erf", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_erff8_u10avx2(x); })},
 87 |     {"erfc", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_erfcf8_u15avx2(x); })},
 88 |     {"lgamma", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_lgammaf8_u10avx2(x); })},
 89 |     {"tlgamma", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_tgammaf8_u10avx2(x); })},
 90 |     {"sqrt", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_sqrtf8_u05avx2(x); })},
 91 |     {"pow3.5", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_powf8_u10avx2(x, Vec8f{3.5}); })},
 92 |     {"pow13", vec_func_apply<Vec8f, float>([](Vec8f x) -> Vec8f { return Sleef_powf8_u10avx2(x, Vec8f{13}); })},
 93 | };
 94 | 
 95 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx4 = {
 96 |     {"sin_pi", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_sinpid4_u05avx2(x); })},
 97 |     {"cos_pi", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_cospid4_u05avx2(x); })},
 98 |     {"sin", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_sind4_u10avx2(x); })},
 99 |     {"cos", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_cosd4_u10avx2(x); })},
100 |     {"tan", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_tand4_u10avx2(x); })},
101 |     {"sinh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_sinhd4_u10avx2(x); })},
102 |     {"cosh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_coshd4_u10avx2(x); })},
103 |     {"tanh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_tanhd4_u10avx2(x); })},
104 |     {"asin", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_asind4_u10avx2(x); })},
105 |     {"acos", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_acosd4_u10avx2(x); })},
106 |     {"atan", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_atand4_u10avx2(x); })},
107 |     {"asinh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_asinhd4_u10avx2(x); })},
108 |     {"acosh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_acoshd4_u10avx2(x); })},
109 |     {"atanh", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_atanhd4_u10avx2(x); })},
110 |     {"log", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_logd4_u10avx2(x); })},
111 |     {"log2", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_log2d4_u10avx2(x); })},
112 |     {"log10", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_log10d4_u10avx2(x); })},
113 |     {"exp", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_expd4_u10avx2(x); })},
114 |     {"exp2", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_exp2d4_u10avx2(x); })},
115 |     {"exp10", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_exp10d4_u10avx2(x); })},
116 |     {"erf", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_erfd4_u10avx2(x); })},
117 |     {"erfc", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_erfcd4_u15avx2(x); })},
118 |     {"lgamma", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_lgammad4_u10avx2(x); })},
119 |     {"tlgamma", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_tgammad4_u10avx2(x); })},
120 |     {"sqrt", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_sqrtd4_u05avx2(x); })},
121 |     {"pow3.5", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_powd4_u10avx2(x, Vec4d{3.5}); })},
122 |     {"pow13", vec_func_apply<Vec4d, double>([](Vec4d x) -> Vec4d { return Sleef_powd4_u10avx2(x, Vec4d{13}); })},
123 | };
124 | 
125 | #ifdef __AVX512F__
126 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx16 = {
127 |     {"sin_pi", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_sinpif16_u05avx512f(x); })},
128 |     {"cos_pi", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_cospif16_u05avx512f(x); })},
129 |     {"sin", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_sinf16_u10avx512f(x); })},
130 |     {"cos", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_cosf16_u10avx512f(x); })},
131 |     {"tan", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_tanf16_u10avx512f(x); })},
132 |     {"sinh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_sinhf16_u10avx512f(x); })},
133 |     {"cosh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_coshf16_u10avx512f(x); })},
134 |     {"tanh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_tanhf16_u10avx512f(x); })},
135 |     {"asin", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_asinf16_u10avx512f(x); })},
136 |     {"acos", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_acosf16_u10avx512f(x); })},
137 |     {"atan", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_atanf16_u10avx512f(x); })},
138 |     {"asinh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_asinhf16_u10avx512f(x); })},
139 |     {"acosh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_acoshf16_u10avx512f(x); })},
140 |     {"atanh", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_atanhf16_u10avx512f(x); })},
141 |     {"log", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_logf16_u10avx512f(x); })},
142 |     {"log2", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_log2f16_u10avx512f(x); })},
143 |     {"log10", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_log10f16_u10avx512f(x); })},
144 |     {"exp", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_expf16_u10avx512f(x); })},
145 |     {"exp2", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_exp2f16_u10avx512f(x); })},
146 |     {"exp10", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_exp10f16_u10avx512f(x); })},
147 |     {"erf", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_erff16_u10avx512f(x); })},
148 |     {"erfc", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_erfcf16_u15avx512f(x); })},
149 |     {"lgamma", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_lgammaf16_u10avx512f(x); })},
150 |     {"tlgamma", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_tgammaf16_u10avx512f(x); })},
151 |     {"sqrt", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_sqrtf16_u05avx512f(x); })},
152 |     {"pow3.5",
153 |      vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_powf16_u10avx512f(x, Vec16f{3.5}); })},
154 |     {"pow13", vec_func_apply<Vec16f, float>([](Vec16f x) -> Vec16f { return Sleef_powf16_u10avx512f(x, Vec16f{13}); })},
155 | };
156 | 
157 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx8 = {
158 |     {"sin_pi", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_sinpid8_u05avx512f(x); })},
159 |     {"cos_pi", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_cospid8_u05avx512f(x); })},
160 |     {"sin", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_sind8_u10avx512f(x); })},
161 |     {"cos", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_cosd8_u10avx512f(x); })},
162 |     {"tan", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_tand8_u10avx512f(x); })},
163 |     {"sinh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_sinhd8_u10avx512f(x); })},
164 |     {"cosh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_coshd8_u10avx512f(x); })},
165 |     {"tanh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_tanhd8_u10avx512f(x); })},
166 |     {"asin", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_asind8_u10avx512f(x); })},
167 |     {"acos", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_acosd8_u10avx512f(x); })},
168 |     {"atan", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_atand8_u10avx512f(x); })},
169 |     {"asinh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_asinhd8_u10avx512f(x); })},
170 |     {"acosh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_acoshd8_u10avx512f(x); })},
171 |     {"atanh", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_atanhd8_u10avx512f(x); })},
172 |     {"log", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_logd8_u10avx512f(x); })},
173 |     {"log2", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_log2d8_u10avx512f(x); })},
174 |     {"log10", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_log10d8_u10avx512f(x); })},
175 |     {"exp", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_expd8_u10avx512f(x); })},
176 |     {"exp2", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_exp2d8_u10avx512f(x); })},
177 |     {"exp10", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_exp10d8_u10avx512f(x); })},
178 |     {"erf", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_erfd8_u10avx512f(x); })},
179 |     {"erfc", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_erfcd8_u15avx512f(x); })},
180 |     {"lgamma", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_lgammad8_u10avx512f(x); })},
181 |     {"tlgamma", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_tgammad8_u10avx512f(x); })},
182 |     {"sqrt", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_sqrtd8_u05avx512f(x); })},
183 |     {"pow3.5", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_powd8_u10avx512f(x, Vec8d{3.5}); })},
184 |     {"pow13", vec_func_apply<Vec8d, double>([](Vec8d x) -> Vec8d { return Sleef_powd8_u10avx512f(x, Vec8d{13}); })},
185 | };
186 | #else
187 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx16;
188 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx8;
189 | #endif
190 | 
191 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx1() { return funs_fx1; }
192 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1() { return funs_dx1; }
193 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx8() { return funs_fx8; }
194 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx4() { return funs_dx4; }
195 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx16() { return funs_fx16; }
196 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx8() { return funs_dx8; }
197 | 
198 | } // namespace sf::functions::sleef
199 | 


--------------------------------------------------------------------------------
/src/bind_stl.cpp:
--------------------------------------------------------------------------------
 1 | #include <sf_libraries.hpp>
 2 | 
 3 | namespace sf::functions::stl {
 4 | std::unordered_map<std::string, multi_eval_func<float>> funs_fx1 = {
 5 |     {"memcpy", [](const float *src, float *dst, size_t N) { std::memcpy(dst, src, N * sizeof(float)); }},
 6 |     {"memset", [](const float *src, float *dst, size_t N) { std::memset(dst, 0, N * sizeof(float)); }},
 7 |     {"tgamma", scalar_func_apply<float>([](float x) -> float { return std::tgamma(x); })},
 8 |     {"lgamma", scalar_func_apply<float>([](float x) -> float { return std::lgamma(x); })},
 9 |     {"sin", scalar_func_apply<float>([](float x) -> float { return std::sin(x); })},
10 |     {"cos", scalar_func_apply<float>([](float x) -> float { return std::cos(x); })},
11 |     {"tan", scalar_func_apply<float>([](float x) -> float { return std::tan(x); })},
12 |     {"asin", scalar_func_apply<float>([](float x) -> float { return std::asin(x); })},
13 |     {"acos", scalar_func_apply<float>([](float x) -> float { return std::acos(x); })},
14 |     {"atan", scalar_func_apply<float>([](float x) -> float { return std::atan(x); })},
15 |     {"asin", scalar_func_apply<float>([](float x) -> float { return std::asin(x); })},
16 |     {"acos", scalar_func_apply<float>([](float x) -> float { return std::acos(x); })},
17 |     {"atan", scalar_func_apply<float>([](float x) -> float { return std::atan(x); })},
18 |     {"sinh", scalar_func_apply<float>([](float x) -> float { return std::sinh(x); })},
19 |     {"cosh", scalar_func_apply<float>([](float x) -> float { return std::cosh(x); })},
20 |     {"tanh", scalar_func_apply<float>([](float x) -> float { return std::tanh(x); })},
21 |     {"asinh", scalar_func_apply<float>([](float x) -> float { return std::asinh(x); })},
22 |     {"acosh", scalar_func_apply<float>([](float x) -> float { return std::acosh(x); })},
23 |     {"atanh", scalar_func_apply<float>([](float x) -> float { return std::atanh(x); })},
24 |     {"sin_pi", scalar_func_apply<float>([](float x) -> float { return std::sin(M_PI * x); })},
25 |     {"cos_pi", scalar_func_apply<float>([](float x) -> float { return std::cos(M_PI * x); })},
26 |     {"erf", scalar_func_apply<float>([](float x) -> float { return std::erf(x); })},
27 |     {"erfc", scalar_func_apply<float>([](float x) -> float { return std::erfc(x); })},
28 |     {"log", scalar_func_apply<float>([](float x) -> float { return std::log(x); })},
29 |     {"log2", scalar_func_apply<float>([](float x) -> float { return std::log2(x); })},
30 |     {"log10", scalar_func_apply<float>([](float x) -> float { return std::log10(x); })},
31 |     {"exp", scalar_func_apply<float>([](float x) -> float { return std::exp(x); })},
32 |     {"exp2", scalar_func_apply<float>([](float x) -> float { return std::exp2(x); })},
33 |     {"exp10", scalar_func_apply<float>([](float x) -> float { return exp10(x); })},
34 |     {"sqrt", scalar_func_apply<float>([](float x) -> float { return std::sqrt(x); })},
35 |     {"rsqrt", scalar_func_apply<float>([](float x) -> float { return 1.0 / std::sqrt(x); })},
36 |     {"pow3.5", scalar_func_apply<float>([](float x) -> float { return std::pow(x, 3.5); })},
37 |     {"pow13", scalar_func_apply<float>([](float x) -> float { return std::pow(x, 13); })},
38 | };
39 | 
40 | std::unordered_map<std::string, multi_eval_func<double>> funs_dx1 = {
41 |     {"memcpy", [](const double *src, double *dst, size_t N) { std::memcpy(dst, src, N * sizeof(double)); }},
42 |     {"memset", [](const double *src, double *dst, size_t N) { std::memset(dst, 0, N * sizeof(double)); }},
43 |     {"tgamma", scalar_func_apply<double>([](double x) -> double { return std::tgamma(x); })},
44 |     {"lgamma", scalar_func_apply<double>([](double x) -> double { return std::lgamma(x); })},
45 |     {"sin", scalar_func_apply<double>([](double x) -> double { return std::sin(x); })},
46 |     {"cos", scalar_func_apply<double>([](double x) -> double { return std::cos(x); })},
47 |     {"tan", scalar_func_apply<double>([](double x) -> double { return std::tan(x); })},
48 |     {"asin", scalar_func_apply<double>([](double x) -> double { return std::asin(x); })},
49 |     {"acos", scalar_func_apply<double>([](double x) -> double { return std::acos(x); })},
50 |     {"atan", scalar_func_apply<double>([](double x) -> double { return std::atan(x); })},
51 |     {"asin", scalar_func_apply<double>([](double x) -> double { return std::asin(x); })},
52 |     {"acos", scalar_func_apply<double>([](double x) -> double { return std::acos(x); })},
53 |     {"atan", scalar_func_apply<double>([](double x) -> double { return std::atan(x); })},
54 |     {"sinh", scalar_func_apply<double>([](double x) -> double { return std::sinh(x); })},
55 |     {"cosh", scalar_func_apply<double>([](double x) -> double { return std::cosh(x); })},
56 |     {"tanh", scalar_func_apply<double>([](double x) -> double { return std::tanh(x); })},
57 |     {"asinh", scalar_func_apply<double>([](double x) -> double { return std::asinh(x); })},
58 |     {"acosh", scalar_func_apply<double>([](double x) -> double { return std::acosh(x); })},
59 |     {"atanh", scalar_func_apply<double>([](double x) -> double { return std::atanh(x); })},
60 |     {"sin_pi", scalar_func_apply<double>([](double x) -> double { return std::sin(M_PI * x); })},
61 |     {"cos_pi", scalar_func_apply<double>([](double x) -> double { return std::cos(M_PI * x); })},
62 |     {"erf", scalar_func_apply<double>([](double x) -> double { return std::erf(x); })},
63 |     {"erfc", scalar_func_apply<double>([](double x) -> double { return std::erfc(x); })},
64 |     {"log", scalar_func_apply<double>([](double x) -> double { return std::log(x); })},
65 |     {"log2", scalar_func_apply<double>([](double x) -> double { return std::log2(x); })},
66 |     {"log10", scalar_func_apply<double>([](double x) -> double { return std::log10(x); })},
67 |     {"exp", scalar_func_apply<double>([](double x) -> double { return std::exp(x); })},
68 |     {"exp2", scalar_func_apply<double>([](double x) -> double { return std::exp2(x); })},
69 |     {"exp10", scalar_func_apply<double>([](double x) -> double { return exp10(x); })},
70 |     {"sqrt", scalar_func_apply<double>([](double x) -> double { return std::sqrt(x); })},
71 |     {"rsqrt", scalar_func_apply<double>([](double x) -> double { return 1.0 / std::sqrt(x); })},
72 |     {"pow3.5", scalar_func_apply<double>([](double x) -> double { return std::pow(x, 3.5); })},
73 |     {"pow13", scalar_func_apply<double>([](double x) -> double { return std::pow(x, 13); })},
74 | };
75 | 
76 | std::unordered_map<std::string, multi_eval_func<float>> &get_funs_fx1() { return funs_fx1; }
77 | std::unordered_map<std::string, multi_eval_func<double>> &get_funs_dx1() { return funs_dx1; }
78 | } // namespace sf::functions::stl
79 | 


--------------------------------------------------------------------------------
/src/hank103.f:
--------------------------------------------------------------------------------
  1 | ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
  2 | c
  3 | c
  4 | c        this is the end of the debugging code and the beginning of the
  5 | c        hankel function code proper.
  6 | c
  7 | c
  8 | ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
  9 | c
 10 | c
 11 | c
 12 | c
 13 | c     code yanked with author's permission from
 14 | c     https://github.com/ahbarnett/mpspack
 15 |         subroutine hanks103(z,hanks,n,ifexpon)
 16 |         implicit real *8 (a-h,o-z)
 17 |         complex *16 z,hanks(1),cd,cdd
 18 | c
 19 | c       This subroutine evaluates the first n+1 Hankel functions of the
 20 | c       argument z. The user also has the option of evaluating the
 21 | c       functions H_m(z) scaled by the (complex) coefficient e^{-i \cdot z}.
 22 | c       This option is provided via the parameter ifexpon (see below)
 23 | c
 24 | c
 25 | c                      input parameters:
 26 | c
 27 | c  z - the complex number for which the hankel functions
 28 | c        H_0, H_1 are to be evaluated
 29 | c  n - the highest order of any Hankel function to be evaluated
 30 | c  ifexpon - the integer parameter telling the subroutine whether
 31 | c        to calculate the actual values of the hankel functions,
 32 | c        or the values of Hankel functions scaled by e^{-i \cdot z}.
 33 | c        Permitted values: 0 and 1.
 34 | c    ifexpon = 1 will cause the subroutine to evaluate the Hankel functions
 35 | c        honestly
 36 | c    ifexpon = 0 will cause the subroutine to scale the Hankel functions
 37 | c        by e^{-i \cdot z}.
 38 | c
 39 | c                      output parameters:
 40 | c
 41 | c  hanks - the first n+1 Hankel functions of the (complex) argument z.
 42 | c        Please note that hanks(1) is the Hankel function of order 0,
 43 | c        hanks(2) is the Hankel function of order 1, ..., hanks(n+1)
 44 | c        is the Hankel function of order n
 45 | c
 46 | c       . . . evaluate the functions h0,h1
 47 | c
 48 |         call hank103(z,hanks(1),hanks(2),ifexpon)
 49 | c
 50 | c
 51 | c       conduct recursion
 52 | c
 53 |         cd=2/z
 54 |         cdd=cd
 55 |         do 1200 i1=2,n
 56 | c
 57 |         i=i1-1
 58 | c
 59 | cccc        hanks(i1+1)=(2*i)/z*hanks(i1)-hanks(i1-1)
 60 |         hanks(i1+1)=cdd*hanks(i1)-hanks(i1-1)
 61 | c
 62 |         cdd=cdd+cd
 63 |  1200 continue
 64 | c
 65 |         return
 66 |         end
 67 | c
 68 | c
 69 | c
 70 | c
 71 | c
 72 |         subroutine hank103(z,h0,h1,ifexpon)
 73 |         implicit real *8 (a-h,o-z)
 74 |         complex *16 z,h0,h1,h0u,h0r,h1u,h1r,
 75 |      1      fj0,fj1,y0,y1,com,zu,zr,ima,ser2,ser3,z2,
 76 |      2      cclog,cd
 77 |         real *8 rea(2)
 78 |         equivalence (rea(1),com)
 79 |         data ima/(0.0d0,1.0d0)/,pi/0.31415926535897932D+01/
 80 | c
 81 | c        this subroutine evaluates the hankel functions H_0^1, H_1^1
 82 | c        for an arbitrary user-specified complex number z. The user
 83 | c        also has the option of evaluating the functions h0, h1
 84 | c        scaled by the (complex) coefficient e^{-i \cdot z}. This
 85 | c        subroutine is a modification of the subroutine hank102
 86 | c        (see), different from the latter by having the parameter
 87 | c        ifexpon. Please note that the subroutine hank102 is in
 88 | c        turn a slightly accelerated version of the old hank101
 89 | c        (see). The principal claim to fame of all three is that
 90 | c        they are valid on the whole  complex plane, and are
 91 | c        reasonably accurate (14-digit relative accuracy) and
 92 | c        reasonably fast. Also, please note that all three have not
 93 | c        been carefully tested in the third quadrant (both x and y
 94 | c        negative); some sort of numerical trouble is possible
 95 | c        (though has not been observed) for LARGE z in the third
 96 | c        quadrant.
 97 | c
 98 | c                      input parameters:
 99 | c
100 | c  z - the complex number for which the hankel functions
101 | c        H_0, H_1 are to be evaluated
102 | c  ifexpon - the integer parameter telling the subroutine whether
103 | c        to calculate the actual values of the hankel functions,
104 | c        or the values of Hankel functions scaled by e^{-i \cdot z}.
105 | c        Permitted values: 0 and 1.
106 | c    ifexpon = 1 will cause the subroutine to evaluate the Hankel functions
107 | c        honestly
108 | c    ifexpon = 0 will cause the subroutine to scale the Hankel functions
109 | c        by e^{-i \cdot z}.
110 | c
111 | c                      output parameters:
112 | c
113 | c  h0, h1 - the said Hankel functions
114 | c
115 | c
116 | c        . . . if z in the upper half-plane - act accordingly
117 | c
118 |         com=z
119 |         if(rea(2) .lt. 0) goto 1400
120 |         call hank103u(z,ier,h0,h1,ifexpon)
121 |         return
122 |  1400 continue
123 | c
124 | c       if z is in the right lower quadrant - act accordingly
125 | c
126 |         if(rea(1) .lt. 0) goto 2000
127 |         call hank103r(z,ier,h0,h1,ifexpon)
128 |         return
129 |  2000 continue
130 | c
131 | c       z is in the left lower quadrant. compute
132 | c       h0, h1 at the points zu, zr obtained from z by reflection
133 | c       in the x and y axis, respectively
134 | c
135 |         zu=dconjg(z)
136 |         zr=-zu
137 | c
138 |         call hank103u(zu,ier,h0u,h1u,ifexpon)
139 |         call hank103r(zr,ier,h0r,h1r,ifexpon)
140 | 
141 |         if(ifexpon .eq. 1) goto 3000
142 | 
143 |         com=zu
144 |         subt=abs(rea(2))
145 | 
146 |         cd=exp(ima*zu-subt)
147 |         h0u=h0u*cd
148 |         h1u=h1u*cd
149 | 
150 |         cd=exp(ima*zr-subt)
151 |         h0r=h0r*cd
152 |         h1r=h1r*cd
153 |  3000 continue
154 | c
155 | c       compute the functions j0, j1, y0, y1
156 | c       at the point zr
157 | c
158 |         half=1
159 |         half=half/2
160 |         y0=(h0u+h0r)*half/ima
161 |         fj0=-(h0u-h0r)*half
162 | c
163 |         y1=-(h1u-h1r)*half/ima
164 |         fj1=(h1u+h1r)*half
165 | c
166 | c        finally, compute h0, h1
167 | c
168 | c       . . . calculate ser2, ser3
169 | c
170 |          z2=-dconjg(z)
171 |          cclog=cdlog(z2)
172 |          ser2=y0-fj0*2/pi*cclog
173 |          ser3=y1-fj1*2/pi*cclog
174 | c
175 | c       reflect all of these in the imaginary axis
176 | c
177 |         fj0=dconjg(fj0)
178 |         fj1=-dconjg(fj1)
179 | c
180 |         ser2=dconjg(ser2)
181 |         ser3=-dconjg(ser3)
182 | c
183 | c       reconstitute y0, y1
184 | c
185 |         cclog=cdlog(z)
186 |         y0=ser2+fj0*2/pi*cclog
187 |         y1=ser3+fj1*2/pi*cclog
188 | c
189 |         h0=fj0+ima*y0
190 |         h1=fj1+ima*y1
191 | 
192 |         if(ifexpon .eq. 1) return
193 | 
194 |         cd=exp(-ima*z+subt)
195 |         h0=h0*cd
196 |         h1=h1*cd
197 | 
198 |         return
199 |         end
200 | c
201 | c
202 | c
203 | c
204 | c
205 |         subroutine hank103u(z,ier,h0,h1,ifexpon)
206 |         implicit real *8 (a-h,o-z)
207 |         complex *16 z,com,ima,cd,h0,h1,ccex,zzz9
208 |         dimension rea(2)
209 |         real *8 c0p1(34),c0p1b(36),buf01(2)
210 |         equivalence (c0p1(34),buf01(1)),
211 |      1      (c0p1b(1),buf01(2)),(rea(1),com)
212 |         real *8 c1p1(34),c1p1b(36),buf11(2)
213 |         equivalence (c1p1(34),buf11(1)),
214 |      1      (c1p1b(1),buf11(2))
215 |         real *8 c0p2(34),c0p2b(28),buf02(2)
216 |         equivalence (c0p2(34),buf02(1)),
217 |      1      (c0p2b(1),buf02(2))
218 |         real *8 c1p2(34),c1p2b(28),buf12(2)
219 |         equivalence (c1p2(34),buf12(1)),
220 |      1      (c1p2b(1),buf12(2))
221 |         data ima/(0.0d0,1.0d0)/
222 | c
223 | c        this subroutine evaluates the hankel functions H_0^1, H_1^1
224 | c        for a user-specified complex number z in the upper half-plane.
225 | c        it is reasonably accurate (14-digit relative accuracy)
226 | c        and reasonably fast.
227 | c
228 | c
229 | c                      input parameters:
230 | c
231 | c  z - the complex number for which the hankel functions
232 | c        H_0, H_1 are to be evaluated
233 | c
234 | c                      output parameters:
235 | c
236 | c  ier - error return code.
237 | c         ier=0 means successful conclusion
238 | c         ier=4 means that z is not in the upper half-plane
239 | c  h0, h1 - the said Hankel functions
240 | c
241 |         data c0p1/
242 |      1     -.6619836118357782D-12,  -.6619836118612709D-12,
243 |      2     -.7307514264754200D-21,  0.3928160926261892D-10,
244 |      3     0.5712712520172854D-09,  -.5712712519967086D-09,
245 |      4     -.1083820384008718D-07,  -.1894529309455499D-18,
246 |      5     0.7528123700585197D-07,  0.7528123700841491D-07,
247 |      6     0.1356544045548053D-16,  -.8147940452202855D-06,
248 |      7     -.3568198575016769D-05,  0.3568198574899888D-05,
249 |      8     0.2592083111345422D-04,  0.4209074870019400D-15,
250 |      9     -.7935843289157352D-04,  -.7935843289415642D-04,
251 |      a     -.6848330800445365D-14,  0.4136028298630129D-03,
252 |      1     0.9210433149997867D-03,  -.9210433149680665D-03,
253 |      2     -.3495306809056563D-02,  -.6469844672213905D-13,
254 |      3     0.5573890502766937D-02,  0.5573890503000873D-02,
255 |      4     0.3767341857978150D-12,  -.1439178509436339D-01,
256 |      5     -.1342403524448708D-01,  0.1342403524340215D-01,
257 |      6     0.8733016209933828D-02,  0.1400653553627576D-11,
258 |      7     0.2987361261932706D-01,  0.2987361261607835D-01/
259 |         data c0p1b/
260 |      8     -.3388096836339433D-11,  -.1690673895793793D+00,
261 |      9     0.2838366762606121D+00,  -.2838366762542546D+00,
262 |      a     0.7045107746587499D+00,  -.5363893133864181D-11,
263 |      1     -.7788044738211666D+00,  -.7788044738130360D+00,
264 |      2     0.5524779104964783D-11,  0.1146003459721775D+01,
265 |      3     0.6930697486173089D+00,  -.6930697486240221D+00,
266 |      4     -.7218270272305891D+00,  0.3633022466839301D-11,
267 |      5     0.3280924142354455D+00,  0.3280924142319602D+00,
268 |      6     -.1472323059106612D-11,  -.2608421334424268D+00,
269 |      7     -.9031397649230536D-01,  0.9031397649339185D-01,
270 |      8     0.5401342784296321D-01,  -.3464095071668884D-12,
271 |      9     -.1377057052946721D-01,  -.1377057052927901D-01,
272 |      a     0.4273263742980154D-13,  0.5877224130705015D-02,
273 |      1     0.1022508471962664D-02,  -.1022508471978459D-02,
274 |      2     -.2789107903871137D-03,  0.2283984571396129D-14,
275 |      3     0.2799719727019427D-04,  0.2799719726970900D-04,
276 |      4     -.3371218242141487D-16,  -.3682310515545645D-05,
277 |      5     -.1191412910090512D-06,  0.1191412910113518D-06/
278 | c
279 |         data c1p1/
280 | 
281 |      1     0.4428361927253983D-12,  -.4428361927153559D-12,
282 |      2     -.2575693161635231D-10,  -.2878656317479645D-21,
283 |      3     0.3658696304107867D-09,  0.3658696304188925D-09,
284 |      4     0.7463138750413651D-19,  -.6748894854135266D-08,
285 |      5     -.4530098210372099D-07,  0.4530098210271137D-07,
286 |      6     0.4698787882823243D-06,  0.5343848349451927D-17,
287 |      7     -.1948662942158171D-05,  -.1948662942204214D-05,
288 |      8     -.1658085463182409D-15,  0.1316906100496570D-04,
289 |      9     0.3645368564036497D-04,  -.3645368563934748D-04,
290 |      a     -.1633458547818390D-03,  -.2697770638600506D-14,
291 |      1     0.2816784976551660D-03,  0.2816784976676616D-03,
292 |      2     0.2548673351180060D-13,  -.6106478245116582D-03,
293 |      3     0.2054057459296899D-03,  -.2054057460218446D-03,
294 |      4     -.6254962367291260D-02,  0.1484073406594994D-12,
295 |      5     0.1952900562500057D-01,  0.1952900562457318D-01,
296 |      6     -.5517611343746895D-12,  -.8528074392467523D-01,
297 |      7     -.1495138141086974D+00,  0.1495138141099772D+00/
298 | c
299 |         data c1p1b/
300 |      8     0.4394907314508377D+00,  -.1334677126491326D-11,
301 |      9     -.1113740586940341D+01,  -.1113740586937837D+01,
302 |      a     0.2113005088866033D-11,  0.1170212831401968D+01,
303 |      1     0.1262152242318805D+01,  -.1262152242322008D+01,
304 |      2     -.1557810619605511D+01,  0.2176383208521897D-11,
305 |      3     0.8560741701626648D+00,  0.8560741701600203D+00,
306 |      4     -.1431161194996653D-11,  -.8386735092525187D+00,
307 |      5     -.3651819176599290D+00,  0.3651819176613019D+00,
308 |      6     0.2811692367666517D+00,  -.5799941348040361D-12,
309 |      7     -.9494630182937280D-01,  -.9494630182894480D-01,
310 |      8     0.1364615527772751D-12,  0.5564896498129176D-01,
311 |      9     0.1395239688792536D-01,  -.1395239688799950D-01,
312 |      a     -.5871314703753967D-02,  0.1683372473682212D-13,
313 |      1     0.1009157100083457D-02,  0.1009157100077235D-02,
314 |      2     -.8997331160162008D-15,  -.2723724213360371D-03,
315 |      3     -.2708696587599713D-04,  0.2708696587618830D-04,
316 |      4     0.3533092798326666D-05,  -.1328028586935163D-16,
317 |      5     -.1134616446885126D-06,  -.1134616446876064D-06/
318 | c
319 |         data c0p2/
320 |      1     0.5641895835516786D+00,  -.5641895835516010D+00,
321 |      2     -.3902447089770041D-09,  -.3334441074447365D-11,
322 |      3     -.7052368835911731D-01,  -.7052368821797083D-01,
323 |      4     0.1957299315085370D-08,  -.3126801711815631D-06,
324 |      5     -.3967331737107949D-01,  0.3967327747706934D-01,
325 |      6     0.6902866639752817D-04,  0.3178420816292497D-06,
326 |      7     0.4080457166061280D-01,  0.4080045784614144D-01,
327 |      8     -.2218731025620065D-04,  0.6518438331871517D-02,
328 |      9     0.9798339748600499D-01,  -.9778028374972253D-01,
329 |      a     -.3151825524811773D+00,  -.7995603166188139D-03,
330 |      1     0.1111323666639636D+01,  0.1116791178994330D+01,
331 |      2     0.1635711249533488D-01,  -.8527067497983841D+01,
332 |      3     -.2595553689471247D+02,  0.2586942834408207D+02,
333 |      4     0.1345583522428299D+03,  0.2002017907999571D+00,
334 |      5     -.3086364384881525D+03,  -.3094609382885628D+03,
335 |      6     -.1505974589617013D+01,  0.1250150715797207D+04,
336 |      7     0.2205210257679573D+04,  -.2200328091885836D+04/
337 |         data c0p2b/
338 |      8     -.6724941072552172D+04,  -.7018887749450317D+01,
339 |      9     0.8873498980910335D+04,  0.8891369384353965D+04,
340 |      a     0.2008805099643591D+02,  -.2030681426035686D+05,
341 |      1     -.2010017782384992D+05,  0.2006046282661137D+05,
342 |      2     0.3427941581102808D+05,  0.3432892927181724D+02,
343 |      3     -.2511417407338804D+05,  -.2516567363193558D+05,
344 |      4     -.3318253740485142D+02,  0.3143940826027085D+05,
345 |      5     0.1658466564673543D+05,  -.1654843151976437D+05,
346 |      6     -.1446345041326510D+05,  -.1645433213663233D+02,
347 |      7     0.5094709396573681D+04,  0.5106816671258367D+04,
348 |      8     0.3470692471612145D+01,  -.2797902324245621D+04,
349 |      9     -.5615581955514127D+03,  0.5601021281020627D+03,
350 |      a     0.1463856702925587D+03,  0.1990076422327786D+00,
351 |      1     -.9334741618922085D+01,  -.9361368967669095D+01/
352 | c
353 |         data c1p2/
354 |      1     -.5641895835446003D+00,  -.5641895835437973D+00,
355 |      2     0.3473016376419171D-10,  -.3710264617214559D-09,
356 |      3     0.2115710836381847D+00,  -.2115710851180242D+00,
357 |      4     0.3132928887334847D-06,  0.2064187785625558D-07,
358 |      5     -.6611954881267806D-01,  -.6611997176900310D-01,
359 |      6     -.3386004893181560D-05,  0.7146557892862998D-04,
360 |      7     -.5728505088320786D-01,  0.5732906930408979D-01,
361 |      8     -.6884187195973806D-02,  -.2383737409286457D-03,
362 |      9     0.1170452203794729D+00,  0.1192356405185651D+00,
363 |      a     0.8652871239920498D-02,  -.3366165876561572D+00,
364 |      1     -.1203989383538728D+01,  0.1144625888281483D+01,
365 |      2     0.9153684260534125D+01,  0.1781426600949249D+00,
366 |      3     -.2740411284066946D+02,  -.2834461441294877D+02,
367 |      4     -.2192611071606340D+01,  0.1445470231392735D+03,
368 |      5     0.3361116314072906D+03,  -.3270584743216529D+03,
369 |      6     -.1339254798224146D+04,  -.1657618537130453D+02,
370 |      7     0.2327097844591252D+04,  0.2380960024514808D+04/
371 |         data c1p2b/
372 |      8     0.7760611776965994D+02,  -.7162513471480693D+04,
373 |      9     -.9520608696419367D+04,  0.9322604506839242D+04,
374 |      a     0.2144033447577134D+05,  0.2230232555182369D+03,
375 |      1     -.2087584364240919D+05,  -.2131762020653283D+05,
376 |      2     -.3825699231499171D+03,  0.3582976792594737D+05,
377 |      3     0.2642632405857713D+05,  -.2585137938787267D+05,
378 |      4     -.3251446505037506D+05,  -.3710875194432116D+03,
379 |      5     0.1683805377643986D+05,  0.1724393921722052D+05,
380 |      6     0.1846128226280221D+03,  -.1479735877145448D+05,
381 |      7     -.5258288893282565D+04,  0.5122237462705988D+04,
382 |      8     0.2831540486197358D+04,  0.3905972651440027D+02,
383 |      9     -.5562781548969544D+03,  -.5726891190727206D+03,
384 |      a     -.2246192560136119D+01,  0.1465347141877978D+03,
385 |      1     0.9456733342595993D+01,  -.9155767836700837D+01/
386 | c
387 | c        if the user-specified z is in the lower half-plane
388 | c        - bomb out
389 | c
390 |         ier=0
391 |         com=z
392 |         if(rea(2) .ge. 0) goto 1200
393 |         ier=4
394 |         return
395 |  1200 continue
396 | c
397 |         done=1
398 |         thresh1=1**2
399 |         thresh2=3.7**2
400 |         thresh3=20**2
401 | c
402 | c       check if if the user-specified z is in one of the
403 | c       intermediate regimes
404 | c
405 |         d=z*dconjg(z)
406 |         if( (d .lt. thresh1) .or. (d .gt. thresh3) ) goto 3000
407 | c
408 | c        the user-specified z is in one of the intermediate regimes.
409 | c        act accordingly
410 | c
411 | c
412 |         if(d .gt. thresh2) goto 2000
413 | c
414 | c       z is in the first intermediate regime: its absolute value is
415 | c       between 1 and 3.7. act accordingly
416 | c
417 | c       . . . evaluate the expansion
418 | c
419 |         cd=done/cdsqrt(z)
420 | c
421 |         ccex=cd
422 |         if(ifexpon .eq. 1) ccex=ccex*cdexp(ima*z)
423 | c
424 |         zzz9=z**9
425 |         m=35
426 |         call hank103p(c0p1,m,cd,h0)
427 |         h0=h0*ccex * zzz9
428 | c
429 |         call hank103p(c1p1,m,cd,h1)
430 |         h1=h1*ccex * zzz9
431 |         return
432 |  2000 continue
433 | c
434 | c       z is in the second intermediate regime: its absolute value is
435 | c       between 3.7 and 20. act accordingly.
436 | c
437 |         cd=done/cdsqrt(z)
438 | c
439 |         ccex=cd
440 |         if(ifexpon .eq. 1) ccex=ccex*cdexp(ima*z)
441 | 
442 |         m=31
443 |         call hank103p(c0p2,m,cd,h0)
444 |         h0=h0*ccex
445 | c
446 |         m=31
447 |         call hank103p(c1p2,m,cd,h1)
448 |         h1=h1*ccex
449 |         return
450 |  3000 continue
451 | c
452 | c        z is either in the local regime or the asymptotic one.
453 | c        if it is in the local regime - act accordingly.
454 | c
455 |         if(d .gt. 50.d0) goto 4000
456 |         call hank103l(z,h0,h1,ifexpon)
457 |         return
458 | c
459 | c        z is in the asymptotic regime. act accordingly.
460 | c
461 |  4000 continue
462 |         call hank103a(z,h0,h1,ifexpon)
463 |         return
464 |         end
465 | c
466 | c
467 | c
468 | c
469 |         subroutine hank103p(p,m,z,f)
470 |         implicit real *8 (a-h,o-z)
471 |         complex *16 p(1),z,f
472 | c
473 | c       evaluate a polynomial at a point
474 | c
475 |         f=p(m)
476 |         do 1200 i=m-1,1,-1
477 |         f=f*z+p(i)
478 |  1200 continue
479 |         return
480 |         end
481 | 
482 | 
483 | 
484 | 
485 | c
486 | c
487 | c
488 | c
489 | c
490 |         subroutine hank103a(z,h0,h1,ifexpon)
491 |         implicit real *8 (a-h,o-z)
492 |         dimension p(18),q(18),p1(18),q1(18),rea(2)
493 |         complex *16 z,zinv,pp,qq,ima,h0,h1,pp1,qq1,
494 |      1      com,cccexp,cdd,cdumb,zinv22
495 |         equivalence (rea(1),com)
496 |         data ima/(0.0d0,1.0d0)/,pi/0.31415926535897932D+01/,
497 |      1      done/1.0d0/,cdumb/
498 |      2      (0.70710678118654757D+00,-.70710678118654746D+00)/
499 | c
500 |          data p/
501 |      1     0.1000000000000000D+01,  -.7031250000000000D-01,
502 |      2     0.1121520996093750D+00,  -.5725014209747314D+00,
503 |      3     0.6074042001273483D+01,  -.1100171402692467D+03,
504 |      4     0.3038090510922384D+04,  -.1188384262567833D+06,
505 |      5     0.6252951493434797D+07,  -.4259392165047669D+09,
506 |      6     0.3646840080706556D+11,  -.3833534661393944D+13,
507 |      7     0.4854014686852901D+15,  -.7286857349377657D+17,
508 |      8     0.1279721941975975D+20,  -.2599382102726235D+22,
509 |      9     0.6046711487532401D+24,  -.1597065525294211D+27/
510 | c
511 |          data q/
512 |      1     -.1250000000000000D+00,  0.7324218750000000D-01,
513 |      2     -.2271080017089844D+00,  0.1727727502584457D+01,
514 |      3     -.2438052969955606D+02,  0.5513358961220206D+03,
515 |      4     -.1825775547429317D+05,  0.8328593040162893D+06,
516 |      5     -.5006958953198893D+08,  0.3836255180230434D+10,
517 |      6     -.3649010818849834D+12,  0.4218971570284096D+14,
518 |      7     -.5827244631566907D+16,  0.9476288099260110D+18,
519 |      8     -.1792162323051699D+21,  0.3900121292034000D+23,
520 |      9     -.9677028801069847D+25,  0.2715581773544907D+28/
521 | 
522 |          data p1/
523 |      1     0.1000000000000000D+01,  0.1171875000000000D+00,
524 |      2     -.1441955566406250D+00,  0.6765925884246826D+00,
525 |      3     -.6883914268109947D+01,  0.1215978918765359D+03,
526 |      4     -.3302272294480852D+04,  0.1276412726461746D+06,
527 |      5     -.6656367718817687D+07,  0.4502786003050393D+09,
528 |      6     -.3833857520742789D+11,  0.4011838599133198D+13,
529 |      7     -.5060568503314726D+15,  0.7572616461117957D+17,
530 |      8     -.1326257285320556D+20,  0.2687496750276277D+22,
531 |      9     -.6238670582374700D+24,  0.1644739123064188D+27/
532 | c
533 |          data q1/
534 |      1     0.3750000000000000D+00,  -.1025390625000000D+00,
535 |      2     0.2775764465332031D+00,  -.1993531733751297D+01,
536 |      3     0.2724882731126854D+02,  -.6038440767050702D+03,
537 |      4     0.1971837591223663D+05,  -.8902978767070679D+06,
538 |      5     0.5310411010968522D+08,  -.4043620325107754D+10,
539 |      6     0.3827011346598606D+12,  -.4406481417852279D+14,
540 |      7     0.6065091351222699D+16,  -.9833883876590680D+18,
541 |      8     0.1855045211579829D+21,  -.4027994121281017D+23,
542 |      9     0.9974783533410457D+25,  -.2794294288720121D+28/
543 | c
544 | c        evaluate the asymptotic expansion for h0,h1 at
545 | c        the user-supplied point z, provided it is not
546 | c        in the fourth quadrant
547 | c
548 |         m=10
549 |         zinv=done/z
550 | c
551 |         pp=p(m)
552 |         pp1=p1(m)
553 |         zinv22=zinv**2
554 | c
555 |         qq=q(m)
556 |         qq1=q1(m)
557 | c
558 |         do 1600 i=m-1,1,-1
559 | 
560 |         pp=pp* zinv22+p(i)
561 |         pp1=pp1* zinv22+p1(i)
562 | 
563 |         qq=qq* zinv22+q(i)
564 |         qq1=qq1* zinv22+q1(i)
565 |  1600 continue
566 | c
567 |         qq=qq*zinv
568 |         qq1=qq1*zinv
569 | c
570 |         cccexp=1
571 |         if(ifexpon .eq. 1) cccexp=cdexp(ima*z)
572 | c
573 |         cdd=cdsqrt(2/pi*zinv)
574 | c
575 |         h0=pp+ima*qq
576 |         h0=cdd*cdumb*cccexp * h0
577 | c
578 |         h1=pp1+ima*qq1
579 |         h1=-cdd*cccexp*cdumb* h1*ima
580 | c
581 |         return
582 |         end
583 | c
584 | c
585 | c
586 | c
587 | c
588 |         subroutine hank103l(z,h0,h1,ifexpon)
589 |         implicit real *8 (a-h,o-z)
590 |         dimension cj0(16),cj1(16),ser2(16),ser2der(16)
591 |         complex *16 z,fj0,fj1,y0,y1,h0,h1,z2,cd,ima,cdddlog
592 | c
593 |         data gamma/0.5772156649015328606d+00/
594 |         data ima/(0.0d0,1.0d0)/,pi/0.31415926535897932D+01/,
595 |      1      two/2.0d0/
596 | c
597 | c        this subroutine evaluates the hankel functions H_0^1, H_1^1
598 | c        for a user-specified complex number z in the local regime,
599 | c        i. e. for cdabs(z) < 1 in the upper half-plane,
600 | c        and for cdabs(z) < 4 in the lower half-plane,
601 | c        it is reasonably accurate (14-digit relative accuracy) and
602 | c        reasonably fast.
603 | c
604 | c                      input parameters:
605 | c
606 | c  z - the complex number for which the hankel functions
607 | c        H_0, H_1 are to be evaluated
608 | c
609 | c                      output parameters:
610 | c
611 | c  h0, h1 - the said Hankel functions
612 | c
613 |         data cj0/
614 |      1     0.1000000000000000D+01,  -.2500000000000000D+00,
615 |      2     0.1562500000000000D-01,  -.4340277777777778D-03,
616 |      3     0.6781684027777778D-05,  -.6781684027777778D-07,
617 |      4     0.4709502797067901D-09,  -.2402807549524439D-11,
618 |      5     0.9385966990329841D-14,  -.2896903392077112D-16,
619 |      6     0.7242258480192779D-19,  -.1496334396734045D-21,
620 |      7     0.2597802772107717D-24,  -.3842903509035085D-27,
621 |      8     0.4901662639075363D-30,  -.5446291821194848D-33/
622 |         data cj1/
623 |      1     -.5000000000000000D+00,  0.6250000000000000D-01,
624 |      2     -.2604166666666667D-02,  0.5425347222222222D-04,
625 |      3     -.6781684027777778D-06,  0.5651403356481481D-08,
626 |      4     -.3363930569334215D-10,  0.1501754718452775D-12,
627 |      5     -.5214426105738801D-15,  0.1448451696038556D-17,
628 |      6     -.3291935672814899D-20,  0.6234726653058522D-23,
629 |      7     -.9991549123491221D-26,  0.1372465538941102D-28,
630 |      8     -.1633887546358454D-31,  0.1701966194123390D-34/
631 |         data ser2/
632 |      1     0.2500000000000000D+00,  -.2343750000000000D-01,
633 |      2     0.7957175925925926D-03,  -.1412850839120370D-04,
634 |      3     0.1548484519675926D-06,  -.1153828185281636D-08,
635 |      4     0.6230136717695511D-11,  -.2550971742728932D-13,
636 |      5     0.8195247730999099D-16,  -.2121234517551702D-18,
637 |      6     0.4518746345057852D-21,  -.8061529302289970D-24,
638 |      7     0.1222094716680443D-26,  -.1593806157473552D-29,
639 |      8     0.1807204342667468D-32,  -.1798089518115172D-35/
640 |         data ser2der/
641 |      1     0.5000000000000000D+00,  -.9375000000000000D-01,
642 |      2     0.4774305555555556D-02,  -.1130280671296296D-03,
643 |      3     0.1548484519675926D-05,  -.1384593822337963D-07,
644 |      4     0.8722191404773715D-10,  -.4081554788366291D-12,
645 |      5     0.1475144591579838D-14,  -.4242469035103405D-17,
646 |      6     0.9941241959127275D-20,  -.1934767032549593D-22,
647 |      7     0.3177446263369152D-25,  -.4462657240925946D-28,
648 |      8     0.5421613028002404D-31,  -.5753886457968550D-34/
649 | c
650 | c        evaluate j0, j1
651 | c
652 |         m=16
653 |         fj0=0
654 |         fj1=0
655 |         y0=0
656 |         y1=0
657 |         z2=z**2
658 |         cd=1
659 | c
660 |         do 1800 i=1,m
661 |         fj0=fj0+cj0(i)*cd
662 |         fj1=fj1+cj1(i)*cd
663 |         y1=y1+ser2der(i)*cd
664 |         cd=cd*z2
665 |         y0=y0+ser2(i)*cd
666 |  1800 continue
667 |         fj1=-fj1*z
668 | c
669 |         cdddlog=cdlog(z/two)+gamma
670 |         y0=cdddlog*fj0+y0
671 |         y0=two/pi*y0
672 | c
673 |         y1=y1*z
674 | c
675 |         y1=-cdddlog*fj1+fj0/z+y1
676 |         y1=-y1*two/pi
677 | c
678 |         h0=fj0+ima*y0
679 |         h1=fj1+ima*y1
680 | c
681 |         if(ifexpon .eq. 1) return
682 | c
683 |         cd=exp(-ima*z)
684 |         h0=h0*cd
685 |         h1=h1*cd
686 | c
687 |         return
688 |         end
689 | c
690 | c
691 | c
692 | c
693 | c
694 |         subroutine hank103r(z,ier,h0,h1,ifexpon)
695 |         implicit real *8 (a-h,o-z)
696 |         complex *16 z,com,ima,cd,h0,h1,cccexp,cdd,zz18
697 |         dimension rea(2)
698 |         real *8 c0p1(34),c0p1b(36),buf01(2)
699 |         equivalence (c0p1(34),buf01(1)),
700 |      1      (c0p1b(1),buf01(2)),(rea(1),com)
701 |         real *8 c1p1(34),c1p1b(36),buf11(2)
702 |         equivalence (c1p1(34),buf11(1)),
703 |      1      (c1p1b(1),buf11(2))
704 |         real *8 c0p2(34),c0p2b(20),buf02(2)
705 |         equivalence (c0p2(34),buf02(1)),
706 |      1      (c0p2b(1),buf02(2))
707 |         real *8 c1p2(34),c1p2b(28),buf12(2)
708 |         equivalence (c1p2(34),buf12(1)),
709 |      1      (c1p2b(1),buf12(2))
710 |         data ima/(0.0d0,1.0d0)/
711 | c
712 | c        this subroutine evaluates the hankel functions H_0^1, H_1^1
713 | c        for a user-specified complex number z in the right lower
714 | c        quadrant. it is reasonably accurate (14-digit relative
715 | c        accuracy) and reasonably fast.
716 | c
717 | c
718 | c                      input parameters:
719 | c
720 | c  z - the complex number for which the hankel functions
721 | c        H_0, H_1 are to be evaluated
722 | c
723 | c                      output parameters:
724 | c
725 | c  ier - error return code.
726 | c         ier=0 means successful conclusion
727 | c         ier=4 means that z is not in the right lower quadrant
728 | c  h0, h1 - the said Hankel functions
729 | c
730 |         data c0p1/
731 |      1     -.4268441995428495D-23,  0.4374027848105921D-23,
732 |      2     0.9876152216238049D-23,  -.1065264808278614D-20,
733 |      3     0.6240598085551175D-19,  0.6658529985490110D-19,
734 |      4     -.5107210870050163D-17,  -.2931746613593983D-18,
735 |      5     0.1611018217758854D-15,  -.1359809022054077D-15,
736 |      6     -.7718746693707326D-15,  0.6759496139812828D-14,
737 |      7     -.1067620915195442D-12,  -.1434699000145826D-12,
738 |      8     0.3868453040754264D-11,  0.7061853392585180D-12,
739 |      9     -.6220133527871203D-10,  0.3957226744337817D-10,
740 |      a     0.3080863675628417D-09,  -.1154618431281900D-08,
741 |      1     0.7793319486868695D-08,  0.1502570745460228D-07,
742 |      2     -.1978090852638430D-06,  -.7396691873499030D-07,
743 |      3     0.2175857247417038D-05,  -.8473534855334919D-06,
744 |      4     -.1053381327609720D-04,  0.2042555121261223D-04,
745 |      5     -.4812568848956982D-04,  -.1961519090873697D-03,
746 |      6     0.1291714391689374D-02,  0.9234422384950050D-03,
747 |      7     -.1113890671502769D-01,  0.9053687375483149D-03/
748 |         data c0p1b/
749 |      8     0.5030666896877862D-01,  -.4923119348218356D-01,
750 |      9     0.5202355973926321D+00,  -.1705244841954454D+00,
751 |      a     -.1134990486611273D+01,  -.1747542851820576D+01,
752 |      1     0.8308174484970718D+01,  0.2952358687641577D+01,
753 |      2     -.3286074510100263D+02,  0.1126542966971545D+02,
754 |      3     0.6576015458463394D+02,  -.1006116996293757D+03,
755 |      4     0.3216834899377392D+02,  0.3614005342307463D+03,
756 |      5     -.6653878500833375D+03,  -.6883582242804924D+03,
757 |      6     0.2193362007156572D+04,  0.2423724600546293D+03,
758 |      7     -.3665925878308203D+04,  0.2474933189642588D+04,
759 |      8     0.1987663383445796D+04,  -.7382586600895061D+04,
760 |      9     0.4991253411017503D+04,  0.1008505017740918D+05,
761 |      a     -.1285284928905621D+05,  -.5153674821668470D+04,
762 |      1     0.1301656757246985D+05,  -.4821250366504323D+04,
763 |      2     -.4982112643422311D+04,  0.9694070195648748D+04,
764 |      3     -.1685723189234701D+04,  -.6065143678129265D+04,
765 |      4     0.2029510635584355D+04,  0.1244402339119502D+04,
766 |      5     -.4336682903961364D+03,  0.8923209875101459D+02/
767 | c
768 |         data c1p1/
769 |      1     -.4019450270734195D-23,  -.4819240943285824D-23,
770 |      2     0.1087220822839791D-20,  0.1219058342725899D-21,
771 |      3     -.7458149572694168D-19,  0.5677825613414602D-19,
772 |      4     0.8351815799518541D-18,  -.5188585543982425D-17,
773 |      5     0.1221075065755962D-15,  0.1789261470637227D-15,
774 |      6     -.6829972121890858D-14,  -.1497462301804588D-14,
775 |      7     0.1579028042950957D-12,  -.9414960303758800D-13,
776 |      8     -.1127570848999746D-11,  0.3883137940932639D-11,
777 |      9     -.3397569083776586D-10,  -.6779059427459179D-10,
778 |      a     0.1149529442506273D-08,  0.4363087909873751D-09,
779 |      1     -.1620182360840298D-07,  0.6404695607668289D-08,
780 |      2     0.9651461037419628D-07,  -.1948572160668177D-06,
781 |      3     0.6397881896749446D-06,  0.2318661930507743D-05,
782 |      4     -.1983192412396578D-04,  -.1294811208715315D-04,
783 |      5     0.2062663873080766D-03,  -.2867633324735777D-04,
784 |      6     -.1084309075952914D-02,  0.1227880935969686D-02,
785 |      7     0.2538406015667726D-03,  -.1153316815955356D-01/
786 | c
787 |         data c1p1b/
788 |      8     0.4520140008266983D-01,  0.5693944718258218D-01,
789 |      9     -.9640790976658534D+00,  -.6517135574036008D+00,
790 |      a     0.2051491829570049D+01,  -.1124151010077572D+01,
791 |      1     -.3977380460328048D+01,  0.8200665483661009D+01,
792 |      2     -.7950131652215817D+01,  -.3503037697046647D+02,
793 |      3     0.9607320812492044D+02,  0.7894079689858070D+02,
794 |      4     -.3749002890488298D+03,  -.8153831134140778D+01,
795 |      5     0.7824282518763973D+03,  -.6035276543352174D+03,
796 |      6     -.5004685759675768D+03,  0.2219009060854551D+04,
797 |      7     -.2111301101664672D+04,  -.4035632271617418D+04,
798 |      8     0.7319737262526823D+04,  0.2878734389521922D+04,
799 |      9     -.1087404934318719D+05,  0.3945740567322783D+04,
800 |      a     0.6727823761148537D+04,  -.1253555346597302D+05,
801 |      1     0.3440468371829973D+04,  0.1383240926370073D+05,
802 |      2     -.9324927373036743D+04,  -.6181580304530313D+04,
803 |      3     0.6376198146666679D+04,  -.1033615527971958D+04,
804 |      4     -.1497604891055181D+04,  0.1929025541588262D+04,
805 |      5     -.4219760183545219D+02,  -.4521162915353207D+03/
806 | c
807 |         data c0p2/
808 |      1     0.5641895835569398D+00,  -.5641895835321127D+00,
809 |      2     -.7052370223565544D-01,  -.7052369923405479D-01,
810 |      3     -.3966909368581382D-01,  0.3966934297088857D-01,
811 |      4     0.4130698137268744D-01,  0.4136196771522681D-01,
812 |      5     0.6240742346896508D-01,  -.6553556513852438D-01,
813 |      6     -.3258849904760676D-01,  -.7998036854222177D-01,
814 |      7     -.3988006311955270D+01,  0.1327373751674479D+01,
815 |      8     0.6121789346915312D+02,  -.9251865216627577D+02,
816 |      9     0.4247064992018806D+03,  0.2692553333489150D+04,
817 |      a     -.4374691601489926D+05,  -.3625248208112831D+05,
818 |      1     0.1010975818048476D+07,  -.2859360062580096D+05,
819 |      2     -.1138970241206912D+08,  0.1051097979526042D+08,
820 |      3     0.2284038899211195D+08,  -.2038012515235694D+09,
821 |      4     0.1325194353842857D+10,  0.1937443530361381D+10,
822 |      5     -.2245999018652171D+11,  -.5998903865344352D+10,
823 |      6     0.1793237054876609D+12,  -.8625159882306147D+11,
824 |      7     -.5887763042735203D+12,  0.1345331284205280D+13/
825 | c
826 |         data c0p2b/
827 |      8     -.2743432269370813D+13,  -.8894942160272255D+13,
828 |      9     0.4276463113794564D+14,  0.2665019886647781D+14,
829 |      a     -.2280727423955498D+15,  0.3686908790553973D+14,
830 |      1     0.5639846318168615D+15,  -.6841529051615703D+15,
831 |      2     0.9901426799966038D+14,  0.2798406605978152D+16,
832 |      3     -.4910062244008171D+16,  -.5126937967581805D+16,
833 |      4     0.1387292951936756D+17,  0.1043295727224325D+16,
834 |      5     -.1565204120687265D+17,  0.1215262806973577D+17,
835 |      6     0.3133802397107054D+16,  -.1801394550807078D+17,
836 |      7     0.4427598668012807D+16,  0.6923499968336864D+16/
837 | c
838 | c
839 |         data c1p2/
840 |      1     -.5641895835431980D+00,  -.5641895835508094D+00,
841 |      2     0.2115710934750869D+00,  -.2115710923186134D+00,
842 |      3     -.6611607335011594D-01,  -.6611615414079688D-01,
843 |      4     -.5783289433408652D-01,  0.5785737744023628D-01,
844 |      5     0.8018419623822896D-01,  0.8189816020440689D-01,
845 |      6     0.1821045296781145D+00,  -.2179738973008740D+00,
846 |      7     0.5544705668143094D+00,  0.2224466316444440D+01,
847 |      8     -.8563271248520645D+02,  -.4394325758429441D+02,
848 |      9     0.2720627547071340D+04,  -.6705390850875292D+03,
849 |      a     -.3936221960600770D+05,  0.5791730432605451D+05,
850 |      1     -.1976787738827811D+06,  -.1502498631245144D+07,
851 |      2     0.2155317823990686D+08,  0.1870953796705298D+08,
852 |      3     -.4703995711098311D+09,  0.3716595906453190D+07,
853 |      4     0.5080557859012385D+10,  -.4534199223888966D+10,
854 |      5     -.1064438211647413D+11,  0.8612243893745942D+11,
855 |      6     -.5466017687785078D+12,  -.8070950386640701D+12,
856 |      7     0.9337074941225827D+13,  0.2458379240643264D+13/
857 | c
858 |         data c1p2b/
859 |      8     -.7548692171244579D+14,  0.3751093169954336D+14,
860 |      9     0.2460677431350039D+15,  -.5991919372881911D+15,
861 |      a     0.1425679408434606D+16,  0.4132221939781502D+16,
862 |      1     -.2247506469468969D+17,  -.1269771078165026D+17,
863 |      2     0.1297336292749026D+18,  -.2802626909791308D+17,
864 |      3     -.3467137222813017D+18,  0.4773955215582192D+18,
865 |      4     -.2347165776580206D+18,  -.2233638097535785D+19,
866 |      5     0.5382350866778548D+19,  0.4820328886922998D+19,
867 |      6     -.1928978948099345D+20,  0.1575498747750907D+18,
868 |      7     0.3049162180215152D+20,  -.2837046201123502D+20,
869 |      8     -.5429391644354291D+19,  0.6974653380104308D+20,
870 |      9     -.5322120857794536D+20,  -.6739879079691706D+20,
871 |      a     0.6780343087166473D+20,  0.1053455984204666D+20,
872 |      1     -.2218784058435737D+20,  0.1505391868530062D+20/
873 | c
874 | c        if z is not in the right lower quadrant - bomb out
875 | c
876 |         ier=0
877 |         com=z
878 |         if( (rea(1) .ge. 0) .and. (rea(2) .le. 0) ) goto 1400
879 |         ier=4
880 |         return
881 |  1400 continue
882 | c
883 |         done=1
884 |         thresh1=4**2
885 |         thresh2=8**2
886 |         thresh3=20**2
887 | c
888 | c       check if if the user-specified z is in one of the
889 | c       intermediate regimes
890 | c
891 |         d=z*dconjg(z)
892 |         if( (d .lt. thresh1) .or. (d .gt. thresh3) ) goto 3000
893 | c
894 | c        if the user-specified z is in the first intermediate regime
895 | c        (i.e. if its absolute value is between 4 and 8), act accordingly
896 | c
897 |         if(d .gt. thresh2) goto 2000
898 | c
899 |         cccexp=1
900 |         if(ifexpon .eq. 1) cccexp=cdexp(ima*z)
901 |         cdd=done/cdsqrt(z)
902 |         cd=done/z
903 |         zz18=z**18
904 |         m=35
905 |         call hank103p(c0p1,m,cd,h0)
906 |         h0=h0*cdd*cccexp*zz18
907 | c
908 |         call hank103p(c1p1,m,cd,h1)
909 |         h1=h1*cdd*cccexp*zz18
910 |         return
911 |  2000 continue
912 | c
913 | c       z is in the second intermediate regime (i.e. its
914 | c       absolute value is between 8 and 20). act accordingly.
915 | c
916 |         cd=done/z
917 |         cdd=sqrt(cd)
918 | 
919 |         cccexp=1
920 |         if(ifexpon .eq. 1) cccexp=cdexp(ima*z)
921 | 
922 |         m=27
923 | c
924 |         call hank103p(c0p2,m,cd,h0)
925 |         h0=h0*cccexp*cdd
926 | c
927 |         m=31
928 |         call hank103p(c1p2,m,cd,h1)
929 |         h1=h1*cccexp*cdd
930 |         return
931 |  3000 continue
932 | c
933 | c
934 | c        z is either in the local regime or the asymptotic one.
935 | c        if it is in the local regime - act accordingly.
936 | c
937 |         if(d .gt. 50.d0) goto 4000
938 |         call hank103l(z,h0,h1,ifexpon)
939 |         return
940 | c
941 | c        z is in the asymptotic regime. act accordingly.
942 | c
943 |  4000 continue
944 |         call hank103a(z,h0,h1,ifexpon)
945 |         return
946 |         end
947 | 


--------------------------------------------------------------------------------
/src/hank106.f:
--------------------------------------------------------------------------------
  1 | cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
  2 | c
  3 | c        This is the end of the debugging code, and the beginning of
  4 | c        the Hankel function code proper
  5 | c
  6 | cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
  7 | c
  8 | c
  9 | ccc        subroutine hank106a(r,h0,h1,w)
 10 |         subroutine hank106a(r,h0,h1,w,ninterv)
 11 |         implicit real *8 (a-h,o-z)
 12 |         integer *4 ninterv
 13 | c
 14 | c       ninterv is number of intervals used in equispaced
 15 | c       subdivision of current interval
 16 | c
 17 |         complex *16 rk,h0,h1
 18 |         dimension w(1)
 19 |         save
 20 | c
 21 | ccc        call hank106b(r,h0,h1,w,w(11) )
 22 |         call hank106b(r,h0,h1,w,w(11),ninterv)
 23 | c
 24 |         return
 25 |         end
 26 | c
 27 | c
 28 | c
 29 | c
 30 | c
 31 | ccc        subroutine hank106init(rk7,rmin,rmax,w,keep)
 32 |         subroutine hank106init(rk7,rmin,rmax,w,keep,ninterv)
 33 |         implicit real *8 (a-h,o-z)
 34 |         complex *16 rk,h0,h1,ima,z,rk7,u0,u07,com
 35 |         dimension w(1),rea(2),rws(10)
 36 |         integer ijw(1),iws(20)
 37 |         
 38 | c
 39 |         equivalence (rea(1),com),(rws(1),iws(1))
 40 |         save
 41 | c
 42 |         data ima/(0.0d0,1.0d0)/
 43 | c
 44 | c        This subroutine evaluates the Hankel functions 
 45 | c        H_0^1, H^1_1 of a complex argument, the argument 
 46 | c        living on a ray. The subroutine you are looking at is
 47 | c        the initialization subroutine; the evaluation 
 48 | c        subroutine is hank106 (see).
 49 | c
 50 | c
 51 | c         8/28/02 - added ninterv to calling sequence  (LG)
 52 | c
 53 | c
 54 | c   PLEASE NOTE THAT THE USE OF THIS SUBROUTIONE IS NOT COMPLETELY
 55 | C   STRAIGHTFORWARD: A STRAIGHTFORWARD SUBROUTINE TO USE IS HANK103
 56 | C   (SEE). HOWEVER, THIS SUBROUTINE IS ABOUT 4 TIMES FASTER THAN 
 57 | C   HANK103.
 58 | c
 59 | c        Recommended pairs rmin,rmax (assuming that rk \sim 1):
 60 | c
 61 | c
 62 | c
 63 | c        rmin       rmax
 64 | c
 65 | c        0.062      0.125
 66 | c        0.031      0.062
 67 | c        0.062       0.125
 68 | c        0.125       0.25
 69 | c        0.25        0.5
 70 | c        0.5         1
 71 | c        1           2
 72 | c        2           5
 73 | c        5           10
 74 | c        10          100
 75 | c        10          200
 76 | c
 77 | c  GENERALLY, WITH ABS(RMIN*RK) > 10, THERE IS NO SHARP LIMIT ON
 78 | C  RMAX. HOWEVER, FOR SUFFICIENTLY LARGE (RMAX-RMIN)*RK, THE 
 79 | C  code loses speed due to caching problems. THE DETERIORATION
 80 | C  BECOMES NOTICEABLE AT SOME POINT AFTER |(RMAX-RMIN)*RK| > 100
 81 | C  (ON THE PENTIUM-IV DESKTOP).
 82 | C
 83 | c
 84 | c                   Input parameters:
 85 | c
 86 | c  rk7 - the Helmholtz coefficient
 87 | c  rmin - the minimum r for which this subroutine will evaluate 
 88 | c       the Hankel functions
 89 | c  rmax - the minimum r for which this subroutine will evaluate 
 90 | c       the Hankel functions
 91 | c
 92 | c                   Output parameters:
 93 | c
 94 | c  w - contains various data to be used by the entry hank106 (see)
 95 | c  keep - the first keep elements of the array w should not be 
 96 | c       changed between the call to this entry, and the subsequent
 97 | c       calls to the entry hank106.
 98 | c  ninterv - number of intervals used in equispaced
 99 | c       subdivision of current interval
100 | c
101 | c
102 |         rk=rk7
103 | c
104 |         dfool=rk/abs(rk)
105 |         if(abs(dfool) .lt. 1.0d-30) rk=rk+2*1.0d-30*abs(rk)
106 | c
107 |         d=rk
108 | c
109 |         x1=d*rmin
110 |         x2=d*rmax
111 | c
112 | c        initialize the evaluation of H_0, H_1
113 | c
114 |         n=11
115 |         ddd=(abs(rmax*rk)-abs(rmin*rk))*2
116 |         i=ddd
117 | c
118 | 
119 | ccc        call prin2('ddd=*',ddd,1)
120 |         if(i .lt. 10) i=10
121 |         ninterv=i
122 | c
123 | ccc        call prinf('ninterv as calculated*',ninterv,1)
124 | c
125 |         d=rk
126 |         d2=-ima*rk
127 |         coef=d2/d
128 | c
129 | c       allocate memory for the initialization
130 | c
131 |         icenters=21
132 |         lcenters=ninterv*2+4
133 | c
134 |         ih0s=icenters+lcenters
135 |         lh0s=ninterv+4
136 |         lh0s=lh0s*2
137 | c
138 |         ih1s=ih0s+lh0s
139 |         lh1s=ninterv+4
140 |         lh1s=lh1s*2
141 | c
142 |         ih0derss=ih1s+lh1s
143 |         lh0derss=(ninterv*n+4)*2
144 | c
145 |         ih1derss=ih0derss+lh0derss
146 |         lh1derss=(ninterv*n+4)*2
147 | c
148 |         keep=ih1derss+lh1derss
149 | c
150 |         call hank106ini(coef,x1,x2,ninterv,n,
151 |      1      w(icenters),w(ih0derss),w(ih0s),w(ih1s),h,
152 |      2      w(ih1derss),u07)
153 | c
154 |         u0=1/u07
155 | c
156 | c       store in the beginning of the array w various types of data 
157 | c
158 |         ix1=1
159 |         ih=2
160 |         iu0=3
161 |         in=5
162 | c
163 |         irk=6
164 | c
165 |         w(ix1)=x1
166 |         w(ih)=h
167 |         w(ih)=1/h
168 | c
169 |         com=u0
170 |         w(iu0)=rea(1)
171 |         w(iu0+1)=rea(2)
172 | c
173 |         w(in)=n+0.1
174 | c
175 |         com=rk
176 | c
177 |         w(irk)=rea(1)
178 |         w(irk+1)=rea(2)
179 | c
180 | c        store integer data in the array w
181 | c
182 |         iws(1)=ih0derss
183 |         iws(2)=ih0s
184 |         iws(3)=ih1derss
185 |         iws(4)=ih1s
186 | c
187 |         do 3200 j=1,8
188 | c
189 |         w(10+j)=rws(j)
190 |  3200 continue
191 | c
192 |         return
193 | c
194 | c
195 | c
196 | c
197 | ccc        entry hank106b(r,h0,h1,w,ijw)
198 |         entry hank106b(r,h0,h1,w,ijw,ninterv)
199 | c
200 |         call hank106eva(r,w(ix1),n,w(icenters),w(ijw(1)),
201 |      1      w(ijw(2)),w(ijw(4)),h0,h1,w(ih),w(ijw(3)),
202 | ccc     2      w(iu0),w(irk) )
203 |      2      w(iu0),w(irk),ninterv)
204 | c
205 |         return
206 |         end
207 | c
208 | c
209 | c
210 | c
211 | c
212 |         subroutine hank106eva(r,x1,n,centers,h0derss,
213 | ccc     1      h0s,h1s,h0,h1,h,h1derss,u0,rk)
214 |      1      h0s,h1s,h0,h1,h,h1derss,u0,rk,ninterv)
215 |         implicit real *8 (a-h,o-z)
216 |         complex *16 h0s(1),h0derss(n,1),h0,h1,
217 |      1      h1derss(n,1),h1s(1),u0,rk,zcom,centers(1),zh,z
218 | c
219 |         save
220 | c 
221 | c   input:
222 | c   ninterv - number of intervals used in equispaced
223 | c       subdivision of current interval
224 | c
225 | c-----------------------------------
226 | c
227 | c       find the subinterval where the point z lives
228 | c
229 |         z=rk*r
230 | c
231 | ccccc        d = (z-x1)*h
232 | ccc        if (d.lt.0) d = 1.0d-12
233 | ccccc       if (d.lt.0) d = 0
234 | ccccc        i = d+1
235 | c
236 |         i=(z-x1)*h +1
237 | ccc        call prinf(' i = *',i,1)
238 |         if (i.lt.0) then 
239 |             i = 1
240 |         else if (i.gt.ninterv) then 
241 |             i = ninterv
242 |         endif
243 | c
244 | c        evaluate the functions h0 and h1 at the point z
245 | c
246 |         zh=z-centers(i)
247 |         t=zh*u0
248 | ccc        call prin2(' zh = *',zh,2)
249 | ccc        call prin2(' t = *',t,1)
250 | c
251 |         h0=(((((((((h0derss(10,i)*t+h0derss(9,i))*t+h0derss(8,i) ) 
252 |      1    * t+h0derss(7,i))*t+h0derss(6,i))*t+h0derss(5,i))*t 
253 |      2    +h0derss(4,i))*t+h0derss(3,i))*t+h0derss(2,i))
254 |      3    *t+h0derss(1,i) ) * t + h0s(i)
255 | c
256 |         h1=(((((((((h1derss(10,i)*t+h1derss(9,i))*t+h1derss(8,i) ) 
257 |      a    * t+h1derss(7,i))*t+h1derss(6,i))*t+h1derss(5,i))*t
258 |      2    +h1derss(4,i) ) *t + h1derss(3,i) )* t +h1derss(2,i))
259 |      3    *t+h1derss(1,i) ) * t +h1s(i)
260 | c
261 |         return
262 |         end
263 | c
264 | c
265 | c
266 | c
267 | c
268 |         subroutine hank106ini(coef,x1,x2,ninterv,n,
269 |      1      centers,h0derss,h0s,h1s,h,h1derss,u0)
270 |         implicit real *8 (a-h,o-z)
271 |         complex *16 h0s(1),h0derss(n,1),h1derss(n,1),ima,u0,
272 |      1      us(22),h1s(1)
273 |         dimension centers(2,1)
274 | c
275 |         data ima/(0.0d0,1.0d0)/
276 | c 
277 | c        construct the subintervals
278 | c
279 |         h=(x2-x1)/ninterv
280 | c
281 |         do 1200 i=1,ninterv
282 | c
283 |         ab1i=(i-1)*h+x1
284 |         ab2i=(i-1)*h+x1 +h
285 |         centers(1,i)=(ab2i+ab1i)/2
286 |         centers(2,i)=coef*centers(1,i)
287 |  1200 continue
288 | c
289 | c        construct the values of Hankel functions and their
290 | c        derivatibes at the centers
291 | c
292 |         do 1400 i=1,ninterv
293 | c
294 |         call hank0ders(centers(1,i),n,h0s(i),h1s(i),
295 |      1      h0derss(1,i),h1derss(1,i) )
296 |  1400 continue
297 | c
298 | c       scale them things by factorials and by complex powers
299 | c
300 |         u0=1+ima*coef
301 |         u0=u0/abs(u0)
302 | c
303 |         us(1)=u0
304 |         do 1500 i=1,20
305 | c
306 |         us(i+1)=us(i)*u0
307 |  1500 continue
308 | c
309 |         do 1800 i=1,ninterv
310 |         fact=1
311 |         do 1600 j=1,n-1
312 |         h0derss(j,i)=h0derss(j,i)*fact * us(j)
313 |         h1derss(j,i)=h1derss(j,i)*fact * us(j)
314 | c
315 |         fact=fact/(j+1)
316 |  1600 continue
317 | c
318 |  1800  continue
319 | c
320 |         return
321 |         end
322 | c
323 | c
324 | c
325 | c
326 | c
327 |         subroutine hank0ders(z,n,h0,h1,h0ders,h1ders)
328 |         implicit real *8 (a-h,o-z)
329 |         complex *16 z,h0,h0ders(1),h1,h1ders(1)
330 | c
331 |         data ifexpon/1/
332 | c 
333 | c        evaluate h0 and h1
334 | c
335 |         call hank103(z,h0,h1,ifexpon)
336 | c
337 |         h0ders(1)=-h1
338 |         h0ders(2)=-(h0ders(1)/z+h0)
339 |         h0ders(3)=-(2*h0ders(2)+h0ders(1)*z+h0)/z
340 |         h0ders(4)=-(3*h0ders(3)+h0ders(2)*z+2*h0ders(1))/z
341 | c
342 |         if(n .le. 4) return
343 | c
344 |         do 1400 m=2,n-2
345 | c
346 |         h0ders(m+2)=-( (m+1)*h0ders(m+1)+z*h0ders(m)+
347 |      1      m*h0ders(m-1) )/z
348 |  1400 continue
349 | c
350 |         do 1600 i=1,n-1
351 | c
352 |         h1ders(i)=-h0ders(i+1)
353 |  1600 continue
354 | c
355 |         return
356 |         end
357 | 
358 | 
359 | ccc        subroutine hank106datagen(rk,rmin,rmax,ab,nab,ninterval,
360 | ccc     1             w,lw,istart,ier)
361 |         subroutine hank106datagen(rk,ier)
362 |         implicit real *8 (a-h,o-z)
363 | ccc        integer *4 istart(1)
364 |         integer *4 istart(29),nintervec(28)
365 | ccc        dimension w(1),ab(2,nab)
366 |         dimension w(50000),ab(2,28)
367 |         complex *16 rk,h0,h1,z,rksav
368 |         data nab/28/
369 |         data lw/50000/
370 |         data rmin/1.0d-6/
371 |         data rmax/200/
372 |         save nab,ninterval,lw,istart,nintervec
373 |         save w,ab,rmin,rmax,rksav
374 | c
375 | c     INPUT PARAMETERS:    ----> Now hidden as local vars.....
376 | c
377 | c       create top level (dyadic) intervals for hank106init, which 
378 | c       then uses equisized subintervals to precompute interpolation
379 | c       polynomials
380 | c
381 | c     rk (complex *16) frequency parameter
382 | c
383 | c     rmin, rmax (real *8)  desired range of argument to hank106
384 | c                           [rmin*rk,...,rmax*rk]
385 | c 
386 | c     ab(2,nab) (real *8)   blank array of length 2*nab
387 | c     w(lw)     (real *8)   work array of length lw
388 | c
389 | c     OUTPUT PARAMETERS:
390 | c
391 | c     ninterval (integer *4)   number of subintervals created
392 | c     ab(2,ninterval) (real *8) boundary of ith interval is
393 | c                               (ab(1,i),ab(2,i))
394 | c     nintervec(ninterval)    nuomber of equispaced subintervals
395 | c                             used for ith interval
396 | c     istart  (integer *4)     istart(i) is pointer into workspace for
397 | c                              data pertaining to ith interval
398 | c     ier (integer *4) error flag
399 | c             ier = 0 upon normal execution.
400 | c             ier = 1 if length (nab) of array ab is of 
401 | c                     insufficient length 
402 | c             ier = 2 if length (lw) of workspace w is of 
403 | c                     insufficient length 
404 | c-----------------------------------------------------------------
405 | c
406 | c
407 |         ier = 0
408 |         ninterval = 1
409 |         istart(1) = 1
410 |         rmaxloc = rmin
411 | ccc        rksav = rk
412 |         rksav = rk/cdabs(rk)
413 |         rminsav = rmin
414 |         do i = 1,1000
415 |            rminloc = rmaxloc 
416 |            rmaxloc = 2*rminloc 
417 |            if (rminloc.gt.100) rmaxloc = rminloc+100
418 |            ab(1,i) = rminloc
419 |            ab(2,i) = rmaxloc
420 | ccc           call prinf(' i = *',i,1)
421 | ccc           call prin2(' rminloc = *',rminloc,1)
422 | ccc           call prin2(' rmaxloc = *',rmaxloc,1)
423 | ccc           call prinf(' istart(i) = *',istart(i),1)
424 | ccc           call hank106init(rk,rminloc,rmaxloc,w(istart(i)),keep)
425 | ccc           call hank106init(rksav,rminloc,rmaxloc,w(istart(i)),keep)
426 |            call hank106init(rksav,rminloc,rmaxloc,w(istart(i)),
427 |      1        keep,ninterv)
428 |            nintervec(i) = ninterv
429 | ccc           call prinf(' keep = *',keep,1)
430 |            istart(i+1) = istart(i) + keep + 1
431 |            if (rmaxloc.ge.rmax) goto 1111
432 |            if (i.gt.nab) then
433 |               ier = 1
434 |               return
435 |            endif
436 |            if (istart(i+1).gt.lw) then
437 |               ier = 2
438 |               return
439 |            endif
440 |            ninterval = ninterval + 1
441 |         enddo
442 | 1111    continue
443 | ccc        call prinf(' istart(i+1) = *',istart(i+1),1)
444 | ccc        call prinf(' i+1 = *',i+1,1)
445 | ccc        call prinf(' ninterval = *',ninterval,1)
446 | ccc        call prinf(' istart(ninterval) = *',istart(ninterval),1)
447 |         return
448 | c
449 | ccc        entry hank106(z,h0,h1,ab,ninterval,w,istart)
450 |         entry hank106(z,h0,h1,ifexpon)
451 | c
452 | c
453 | c       z   (complex *16) argument for Hankel function evaluation.
454 | c       h0,h1 (complex *16) H_0(z) and H_1(z) where frequency 
455 | c                           parameter is ASSUMED TO BE that from 
456 | c                           previous call to hank106datagen.
457 | c
458 | c       ab,ninterval,w,istart   defined above.
459 | c
460 | c-------------------------------------------------------------
461 | c
462 | c       determine subinterval and call hank106.
463 | c
464 | ccc        call prinf(' ninterval = *',ninterval,1)
465 | ccc        call prin2(' rminsav = *',rminsav,1)
466 | ccc        call prin2(' rksav = *',rksav,2)
467 | ccc        x = dreal(z/(rksav*rminsav))
468 | ccc        call prin2(' x = *',x,1)
469 | ccc        call prinf(' ifexpon = *',ifexpon,1)
470 |         if (ifexpon.eq.0) then
471 |            call hank103(z,h0,h1,ifexpon)
472 |            return
473 |         endif
474 |         r = dreal(z/rksav)
475 |         call findinte(r,ab,ninterval,i)
476 | ccc        call prin2(' z = *',z,2)
477 | cc        call prin2(' rksav = *',rksav,2)
478 | ccc        call prin2(' r = *',r,1)
479 | ccc        call prinf(' i = *',i,1)
480 |         if (i.le.ninterval) then
481 | cc        call prin2(' w is = *',w(istart(i)),10)
482 | cc        call prinf(' int part is = *',w(istart(i+10)),10)
483 | ccc           call hank106a(r,h0,h1,w(istart(i)))
484 |            call hank106a(r,h0,h1,w(istart(i)),nintervec(i))
485 | ccc        call prin2(' 106 gives h0 is = *',h0,2)
486 | ccc        call hank103(z,h0,h1,ifexpon)
487 | ccc        call prin2(' 103 gives h0 is = *',h0,2)
488 |         else
489 |            call hank103(z,h0,h1,ifexpon)
490 |         endif
491 |         return
492 |         end
493 | c
494 | c
495 | c
496 |         subroutine findinte(x,ab,nn,intnum)
497 |         implicit real *8 (a-h,o-z)
498 |         integer *4 intold,ithresh
499 |         dimension ab(2,nn)
500 | c
501 |         data intold/-10/
502 |         data ithresh/10/
503 | c
504 | c       check if the point is on the subinterval as the preceding one
505 | c
506 |         if(intold .le. 0) goto 2000
507 |         if(intold .gt. nn) goto 2000
508 | c
509 |         intnum=intold
510 | ccc        call prinf(' intnum is *',intnum,1)
511 | ccc        call prin2(' ab is *',ab(1,intnum),2)
512 |         if( (x .ge. ab(1,intnum) ) .and. (x .le. ab(2,intnum) ) ) return
513 | c
514 |  2000 continue
515 |        if(x .lt. ab(1,1)) then
516 |            intnum = 777
517 |            return
518 |        else if(x .gt. ab(2,nn)) then
519 |            intnum = 777
520 |            return
521 |        endif
522 | c
523 | c      the point is not on the same subinterval as the preceding one.
524 | c      if nn is less than ithresh, use direct scan to find the proper 
525 | c      interval
526 | c
527 |        if(nn .gt. ithresh) goto 3000
528 | c
529 | c
530 |         do 2200 j=1,nn
531 | c
532 |            intnum=j
533 | c
534 |         if(ab(2,j) .ge. x) goto 2400
535 |  2200 continue
536 | c
537 |  2400 continue
538 | c
539 |         intold=intnum
540 |         return
541 | c
542 |  3000 continue
543 | c
544 | c      The point is not on the same subinterval as the preceding one,
545 | c      and nn is greater than ithresh; use bisection to find the proper 
546 | c      interval
547 | c
548 |        i1=1
549 |        i2=nn
550 |        i3=(i1+i2)/2
551 | c
552 | cccc       nsteps=0
553 |        do 3400 i=1,100
554 | c
555 |        if(x .ge. ab(1,i3)) i1=i3
556 |        if(x .le. ab(2,i3)) i2=i3
557 | c
558 |        if(i2 .eq. i1) goto 3600
559 | c
560 |        i3=(i1+i2)/2
561 |  3400 continue
562 | c
563 |  3600 continue
564 | 
565 |        if(x .lt. ab(1,i3)) i3=i3-1
566 |        if(x .gt. ab(2,i3)) i3=i3+1
567 | 
568 |        intnum=i3
569 |        intold=intnum
570 | c       
571 |         return
572 |         end
573 | c
574 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <filesystem>
  2 | #include <functional>
  3 | #include <iostream>
  4 | #include <memory>
  5 | #include <set>
  6 | #include <string>
  7 | #include <tuple>
  8 | #include <type_traits>
  9 | #include <unordered_map>
 10 | 
 11 | #include <sys/mman.h>
 12 | 
 13 | #include <sf_benchmarks.hpp>
 14 | #include <sf_libraries.hpp>
 15 | #include <sf_utils.hpp>
 16 | 
 17 | #include <sqlite_orm/sqlite_orm.h>
 18 | 
 19 | struct run_t {
 20 |     int id;
 21 |     std::string time;
 22 |     std::unique_ptr<int> host;
 23 |     std::unique_ptr<int> toolchain;
 24 | };
 25 | 
 26 | run_t run_info;
 27 | sf::utils::toolchain_info_t toolchain_info;
 28 | sf::utils::host_info_t host_info;
 29 | std::unordered_map<std::string, sf::utils::library_info_t> libraries_info = {
 30 |     {"agnerfog", {.name = "agnerfog", .version = sf::utils::get_af_version()}},
 31 |     {"amdlibm", {.name = "amdlibm", .version = sf::utils::get_alm_version()}},
 32 |     {"baobzi", {.name = "baobzi", .version = sf::utils::get_baobzi_version()}},
 33 |     {"boost", {.name = "boost", .version = sf::utils::get_boost_version()}},
 34 |     {"eigen", {.name = "eigen", .version = sf::utils::get_eigen_version()}},
 35 |     {"gsl", {.name = "gsl", .version = sf::utils::get_gsl_version()}},
 36 |     {"fort", {.name = "fort", .version = "NA"}},
 37 |     {"misc", {.name = "misc", .version = "NA"}},
 38 |     {"sctl", {.name = "sctl", .version = sf::utils::get_sctl_version()}},
 39 |     {"sleef", {.name = "sleef", .version = sf::utils::get_sleef_version()}},
 40 |     {"stl", {.name = "stl", .version = "NA"}},
 41 | };
 42 | 
 43 | struct measurement_t {
 44 |     int id;
 45 |     std::unique_ptr<int> run;
 46 |     std::unique_ptr<int> library;
 47 |     std::unique_ptr<int> configuration;
 48 |     sf::utils::library_info_t library_copy;
 49 |     configuration_t config_copy;
 50 |     int nelem = 0;
 51 |     int nrepeat = 0;
 52 |     int veclev = 0;
 53 |     double megaevalspersec = 0;
 54 |     double cyclespereval = 0;
 55 |     double meanevaltime = 0;
 56 |     double stddev = 0;
 57 |     double maxerr = 0;
 58 |     double maxrelerr = 0;
 59 | 
 60 |     explicit operator bool() const { return nrepeat; }
 61 |     friend std::ostream &operator<<(std::ostream &, const measurement_t &);
 62 | };
 63 | 
 64 | std::ostream &operator<<(std::ostream &os, const measurement_t &meas) {
 65 | 
 66 |     using std::left;
 67 |     using std::setw;
 68 | 
 69 |     if (meas) {
 70 |         std::string label = meas.config_copy.func + "_" + meas.library_copy.name + "_" + meas.config_copy.ftype + "x" +
 71 |                             std::to_string(meas.veclev);
 72 | 
 73 |         os.precision(6);
 74 |         os << left << setw(25) << label + ": " << left << setw(15) << meas.megaevalspersec;
 75 |         os.precision(15);
 76 |         os << left << setw(15) << meas.cyclespereval << left << setw(5) << " ";
 77 |         os.precision(5);
 78 |         os << "[" << meas.config_copy.lbound << ", " << meas.config_copy.ubound << "]" << std::endl;
 79 |     }
 80 |     return os;
 81 | }
 82 | 
 83 | #define EIGEN_CASE(OP)                                                                                                 \
 84 |     case sf::functions::eigen::OPS::OP: {                                                                              \
 85 |         res = x.array().OP();                                                                                          \
 86 |         break;                                                                                                         \
 87 |     }
 88 | 
 89 | template <typename VAL_T, typename FUN_T>
 90 | measurement_t test_func(const FUN_T &f, int veclev, sf::utils::library_info_t &library_info, configuration_t &config,
 91 |                         const Eigen::Ref<const Eigen::VectorX<VAL_T>> &x_in,
 92 |                         const Eigen::Ref<const Eigen::VectorXd> &y_ref, int n_repeat) {
 93 |     if (!f)
 94 |         return measurement_t();
 95 |     const std::string label = library_info.name + "_" + config.func;
 96 | 
 97 |     Eigen::VectorX<VAL_T> x = sf::utils::transform_domain<VAL_T>(x_in, config.lbound, config.ubound);
 98 | 
 99 |     size_t res_size = x.size();
100 |     size_t n_evals = x.size() * n_repeat;
101 |     if constexpr (std::is_same_v<FUN_T, fun_cdx1_x2>)
102 |         res_size *= 2;
103 | 
104 |     Eigen::VectorX<VAL_T> res(res_size);
105 |     // Force virtual memory to RAM (to force malloc to do its thing)
106 |     mlock(res.data(), res_size * sizeof(VAL_T));
107 |     VAL_T *resptr = res.data();
108 | 
109 |     sf::utils::timer timer;
110 | 
111 |     for (long k = 0; k < n_repeat; k++) {
112 |         if constexpr (std::is_same_v<FUN_T, fun_cdx1_x2>) {
113 |             for (std::size_t i = 0; i < x.size(); ++i) {
114 |                 std::tie(resptr[i * 2], resptr[i * 2 + 1]) = f(x[i]);
115 |             }
116 |         } else if constexpr (std::is_same_v<FUN_T, std::shared_ptr<baobzi::Baobzi>>) {
117 |             (*f)(x.data(), resptr, x.size());
118 |         } else if constexpr (std::is_same_v<FUN_T, sf::functions::eigen::OPS>) {
119 |             switch (f) {
120 |                 EIGEN_CASE(cos)
121 |                 EIGEN_CASE(sin)
122 |                 EIGEN_CASE(tan)
123 |                 EIGEN_CASE(cosh)
124 |                 EIGEN_CASE(sinh)
125 |                 EIGEN_CASE(tanh)
126 |                 EIGEN_CASE(exp)
127 |                 EIGEN_CASE(log)
128 |                 EIGEN_CASE(log10)
129 |                 EIGEN_CASE(asin)
130 |                 EIGEN_CASE(acos)
131 |                 EIGEN_CASE(atan)
132 |                 EIGEN_CASE(asinh)
133 |                 EIGEN_CASE(acosh)
134 |                 EIGEN_CASE(atanh)
135 |                 EIGEN_CASE(erf)
136 |                 EIGEN_CASE(erfc)
137 |                 EIGEN_CASE(lgamma)
138 |                 EIGEN_CASE(digamma)
139 |                 EIGEN_CASE(ndtri)
140 |                 EIGEN_CASE(sqrt)
141 |                 EIGEN_CASE(rsqrt)
142 |             case sf::functions::eigen::OPS::pow35: {
143 |                 res = x.array().pow(3.5);
144 |                 break;
145 |             }
146 |             case sf::functions::eigen::OPS::pow13: {
147 |                 res = x.array().pow(13);
148 |                 break;
149 |             }
150 |             }
151 |         } else {
152 |             f(x.data(), resptr, x.size());
153 |         }
154 |     }
155 |     timer.stop();
156 | 
157 |     measurement_t meas;
158 |     meas.config_copy = config;
159 |     meas.library_copy = library_info;
160 | 
161 |     meas.run = std::make_unique<int>(run_info.id);
162 |     meas.configuration = std::make_unique<int>(config.id);
163 |     meas.library = std::make_unique<int>(library_info.id);
164 |     meas.nelem = x.size();
165 |     meas.nrepeat = n_repeat;
166 |     meas.cyclespereval = timer.ticks_elapsed() / (double)n_evals;
167 |     meas.megaevalspersec = n_evals / timer.elapsed() / 1E6;
168 |     meas.meanevaltime = timer.elapsed() / n_evals / 1E-9;
169 |     meas.veclev = veclev;
170 | 
171 |     if (y_ref.size() && (std::is_same_v<VAL_T, float> || std::is_same_v<VAL_T, double>)) {
172 |         Eigen::VectorXd delta = res.template cast<double>() - y_ref;
173 |         meas.maxerr = delta.array().abs().maxCoeff();
174 |         meas.maxrelerr = (delta.array().abs() / y_ref.array().abs()).maxCoeff();
175 |         meas.stddev = std::sqrt((delta.array() - delta.mean()).square().sum() / (delta.size() - 1));
176 | 
177 |         meas.maxerr = std::isnan(meas.maxerr) ? -2.0 : meas.maxerr;
178 |         meas.maxrelerr = std::isnan(meas.maxrelerr) ? -2.0 : meas.maxrelerr;
179 |         meas.stddev = std::isnan(meas.stddev) ? -2.0 : meas.stddev;
180 |     } else {
181 |         meas.stddev = -1.0;
182 |         meas.maxerr = -1.0;
183 |         meas.maxrelerr = -1.0;
184 |     }
185 | 
186 |     munlock(res.data(), res_size * sizeof(VAL_T));
187 |     return meas;
188 | }
189 | #undef EIGEN_CASE
190 | 
191 | std::set<std::string> parse_args(int argc, char *argv[]) {
192 |     // lol: "parse"
193 |     std::set<std::string> res;
194 |     for (int i = 0; i < argc; ++i)
195 |         res.insert(argv[i]);
196 | 
197 |     return res;
198 | }
199 | 
200 | inline auto init_storage(const std::string &path) {
201 |     using namespace sqlite_orm;
202 |     using sf::utils::host_info_t;
203 |     using sf::utils::library_info_t;
204 |     using sf::utils::toolchain_info_t;
205 | 
206 |     auto storage = make_storage(
207 |         "db.sqlite",
208 |         make_table(
209 |             "hosts", make_column("id", &host_info_t::id, autoincrement(), primary_key()),
210 |             make_column("cpuname", &host_info_t::cpuname, unique()), make_column("cpuclock", &host_info_t::cpuclock),
211 |             make_column("cpuclockmax", &host_info_t::cpuclockmax), make_column("memclock", &host_info_t::memclock),
212 |             make_column("l1dcache", &host_info_t::L1d), make_column("l1icache", &host_info_t::L1i),
213 |             make_column("l2cache", &host_info_t::L2), make_column("l3cache", &host_info_t::L3)),
214 |         make_table("configurations", make_column("id", &configuration_t::id, autoincrement(), primary_key()),
215 |                    make_column("func", &configuration_t::func), make_column("ftype", &configuration_t::ftype),
216 |                    make_column("lbound", &configuration_t::lbound), make_column("ubound", &configuration_t::ubound),
217 |                    make_column("ilbound", &configuration_t::ilbound), make_column("iubound", &configuration_t::iubound),
218 |                    sqlite_orm::unique(&configuration_t::func, &configuration_t::ftype, &configuration_t::lbound,
219 |                                       &configuration_t::ubound, &configuration_t::ilbound, &configuration_t::iubound)),
220 |         make_table("toolchains", make_column("id", &toolchain_info_t::id, autoincrement(), primary_key()),
221 |                    make_column("compiler", &toolchain_info_t::compiler),
222 |                    make_column("compilervers", &toolchain_info_t::compilervers),
223 |                    make_column("libcvers", &toolchain_info_t::libcvers),
224 |                    sqlite_orm::unique(&toolchain_info_t::compiler, &toolchain_info_t::compilervers,
225 |                                       &toolchain_info_t::libcvers)),
226 |         make_table("libraries", make_column("id", &library_info_t::id, autoincrement(), primary_key()),
227 |                    make_column("name", &library_info_t::name), make_column("version", &library_info_t::version),
228 |                    sqlite_orm::unique(&library_info_t::name, &library_info_t::version)),
229 |         make_table("runs", make_column("id", &run_t::id, autoincrement(), primary_key()),
230 |                    make_column("time", &run_t::time), make_column("host", &run_t::host),
231 |                    make_column("toolchain", &run_t::toolchain), foreign_key(&run_t::host).references(&host_info_t::id),
232 |                    foreign_key(&run_t::toolchain).references(&toolchain_info_t::id)),
233 |         make_table(
234 |             "measurements", make_column("id", &measurement_t::id, autoincrement(), primary_key()),
235 |             make_column("run", &measurement_t::run), make_column("library", &measurement_t::library),
236 |             make_column("configuration", &measurement_t::configuration), make_column("nelem", &measurement_t::nelem),
237 |             make_column("nrepeat", &measurement_t::nrepeat), make_column("veclev", &measurement_t::veclev),
238 |             make_column("megaevalspersec", &measurement_t::megaevalspersec),
239 |             make_column("cyclespereval", &measurement_t::cyclespereval),
240 |             make_column("meanevaltime", &measurement_t::meanevaltime), make_column("stddev", &measurement_t::stddev),
241 |             make_column("maxrelerr", &measurement_t::maxrelerr), make_column("maxerr", &measurement_t::maxerr),
242 |             foreign_key(&measurement_t::run).references(&run_t::id),
243 |             foreign_key(&measurement_t::library).references(&library_info_t::id),
244 |             foreign_key(&measurement_t::configuration).references(&configuration_t::id)));
245 | 
246 |     storage.sync_schema();
247 |     auto host_ids =
248 |         storage.select(columns(&host_info_t::id), where(is_equal(&host_info_t::cpuname, host_info.cpuname)));
249 |     if (host_ids.size() == 0)
250 |         host_info.id = storage.insert(host_info);
251 |     else
252 |         host_info.id = std::get<int>(host_ids[0]);
253 | 
254 |     auto toolchain_ids = storage.select(columns(&toolchain_info_t::id),
255 |                                         where(is_equal(&toolchain_info_t::compiler, toolchain_info.compiler) and
256 |                                               is_equal(&toolchain_info_t::compilervers, toolchain_info.compilervers) and
257 |                                               is_equal(&toolchain_info_t::libcvers, toolchain_info.libcvers)));
258 |     if (toolchain_ids.size() == 0)
259 |         toolchain_info.id = storage.insert(toolchain_info);
260 |     else
261 |         toolchain_info.id = std::get<int>(toolchain_ids[0]);
262 | 
263 |     for (auto &[name, lib] : libraries_info) {
264 |         auto library_ids =
265 |             storage.select(columns(&library_info_t::id), where(is_equal(&library_info_t::name, lib.name) and
266 |                                                                is_equal(&library_info_t::version, lib.version)));
267 |         if (library_ids.size() == 0)
268 |             lib.id = storage.insert(lib);
269 |         else
270 |             lib.id = std::get<int>(library_ids[0]);
271 |     }
272 | 
273 |     run_info.time = storage.select(datetime("now")).front();
274 |     run_info.toolchain = std::make_unique<int>(toolchain_info.id);
275 |     run_info.host = std::make_unique<int>(host_info.id);
276 |     run_info.id = storage.insert(run_info);
277 | 
278 |     return storage;
279 | }
280 | 
281 | using Storage = decltype(init_storage(""));
282 | 
283 | int main(int argc, char *argv[]) {
284 |     Storage storage = init_storage("db.sqlite");
285 | 
286 |     std::cout << host_info.cpuname << std::endl;
287 |     std::cout << "    " + toolchain_info.compiler + ": " + toolchain_info.compilervers << std::endl;
288 |     std::cout << "    libc: " + toolchain_info.libcvers << std::endl;
289 |     for (auto &[key, lib] : libraries_info)
290 |         std::cout << "    " + lib.name + ": " + lib.version << std::endl;
291 | 
292 |     std::set<std::string> input_keys = parse_args(argc - 1, argv + 1);
293 | 
294 |     auto &af_funs_dx4 = sf::functions::af::get_funs_dx4();
295 |     auto &af_funs_dx8 = sf::functions::af::get_funs_dx8();
296 |     auto &af_funs_fx8 = sf::functions::af::get_funs_fx8();
297 |     auto &af_funs_fx16 = sf::functions::af::get_funs_fx16();
298 | 
299 |     auto &amdlibm_funs_dx1 = sf::functions::amd::get_funs_dx1();
300 |     auto &amdlibm_funs_dx4 = sf::functions::amd::get_funs_dx4();
301 |     auto &amdlibm_funs_fx1 = sf::functions::amd::get_funs_fx1();
302 |     auto &amdlibm_funs_fx8 = sf::functions::amd::get_funs_fx8();
303 | 
304 |     auto &boost_funs_fx1 = sf::functions::boost::get_funs_fx1();
305 |     auto &boost_funs_dx1 = sf::functions::boost::get_funs_dx1();
306 | 
307 |     auto &eigen_funs = sf::functions::eigen::get_funs();
308 | 
309 |     auto &fort_funs = sf::functions::fort::get_funs_dx1();
310 | 
311 |     auto &gsl_funs = sf::functions::gsl::get_funs_dx1();
312 |     auto &gsl_complex_funs = sf::functions::gsl::get_funs_cdx1();
313 | 
314 |     auto &misc_funs_cdx1_x2 = sf::functions::misc::get_funs_cdx1_x2();
315 | 
316 |     auto &sctl_funs_dx4 = sf::functions::SCTL::get_funs_dx4();
317 |     auto &sctl_funs_dx8 = sf::functions::SCTL::get_funs_dx8();
318 |     auto &sctl_funs_fx8 = sf::functions::SCTL::get_funs_fx8();
319 |     auto &sctl_funs_fx16 = sf::functions::SCTL::get_funs_fx16();
320 | 
321 |     auto &sleef_funs_dx1 = sf::functions::sleef::get_funs_dx1();
322 |     auto &sleef_funs_dx4 = sf::functions::sleef::get_funs_dx4();
323 |     auto &sleef_funs_dx8 = sf::functions::sleef::get_funs_dx8();
324 |     auto &sleef_funs_fx1 = sf::functions::sleef::get_funs_fx1();
325 |     auto &sleef_funs_fx8 = sf::functions::sleef::get_funs_fx8();
326 |     auto &sleef_funs_fx16 = sf::functions::sleef::get_funs_fx16();
327 | 
328 |     auto &stl_funs_fx1 = sf::functions::stl::get_funs_fx1();
329 |     auto &stl_funs_dx1 = sf::functions::stl::get_funs_dx1();
330 | 
331 |     std::set<std::string> fun_union;
332 | #define merge_into_set(FUNS)                                                                                           \
333 |     for (auto kv : FUNS)                                                                                               \
334 |         fun_union.insert(kv.first);
335 | 
336 |     merge_into_set(af_funs_fx8);
337 |     merge_into_set(amdlibm_funs_fx1);
338 |     merge_into_set(boost_funs_fx1);
339 |     merge_into_set(eigen_funs);
340 |     merge_into_set(fort_funs);
341 |     merge_into_set(gsl_funs);
342 |     merge_into_set(misc_funs_cdx1_x2);
343 |     merge_into_set(sctl_funs_fx8);
344 |     merge_into_set(sleef_funs_fx1);
345 |     merge_into_set(stl_funs_fx1);
346 | #undef merge_into_set
347 | 
348 |     std::set<std::string> keys_to_eval;
349 |     if (input_keys.size() > 0)
350 |         std::set_intersection(fun_union.begin(), fun_union.end(), input_keys.begin(), input_keys.end(),
351 |                               std::inserter(keys_to_eval, keys_to_eval.end()));
352 |     else
353 |         keys_to_eval = fun_union;
354 | 
355 |     std::vector<std::pair<int, int>> run_sets;
356 |     for (uint8_t shift = 0; shift <= 14; shift += 14)
357 |         run_sets.push_back({1 << (11 + shift), 1 << (14 - shift)});
358 | 
359 |     std::unordered_map<std::string, configuration_t> base_configurations = {
360 |         {"acos", {.lbound = -1.0, .ubound = 1.0}},
361 |         {"acosh", {.lbound = 1.0, .ubound = 1000.0}},
362 |         {"asin", {.lbound = -1.0, .ubound = 1.0}},
363 |         {"asinh", {.lbound = -100.0, .ubound = 100.0}},
364 |         {"atan", {.lbound = -100.0, .ubound = 100.0}},
365 |         {"atanh", {.lbound = -0.9, .ubound = 0.9}},
366 |         {"bessel_I0", {.lbound = 0.1, .ubound = 30.0}},
367 |         {"bessel_I1", {.lbound = 0.1, .ubound = 30.0}},
368 |         {"bessel_I2", {.lbound = 0.1, .ubound = 30.0}},
369 |         {"bessel_J0", {.lbound = 0.1, .ubound = 30.0}},
370 |         {"bessel_J1", {.lbound = 0.1, .ubound = 30.0}},
371 |         {"bessel_J2", {.lbound = 0.1, .ubound = 30.0}},
372 |         {"bessel_K0", {.lbound = 0.1, .ubound = 30.0}},
373 |         {"bessel_K1", {.lbound = 0.1, .ubound = 30.0}},
374 |         {"bessel_K2", {.lbound = 0.1, .ubound = 30.0}},
375 |         {"bessel_Y0", {.lbound = 0.1, .ubound = 30.0}},
376 |         {"bessel_Y1", {.lbound = 0.1, .ubound = 30.0}},
377 |         {"bessel_Y2", {.lbound = 0.1, .ubound = 30.0}},
378 |         {"bessel_j0", {.lbound = 0.1, .ubound = 30.0}},
379 |         {"bessel_j1", {.lbound = 0.1, .ubound = 30.0}},
380 |         {"bessel_j2", {.lbound = 0.1, .ubound = 30.0}},
381 |         {"bessel_y0", {.lbound = 0.1, .ubound = 30.0}},
382 |         {"bessel_y1", {.lbound = 0.1, .ubound = 30.0}},
383 |         {"bessel_y2", {.lbound = 0.1, .ubound = 30.0}},
384 |         {"cos", {.lbound = 0.0, .ubound = 2 * M_PI, .ilbound = 0.0, .iubound = 2 * M_PI}},
385 |         {"cos_pi", {.lbound = 0.0, .ubound = 2.0}},
386 |         {"cosh", {.lbound = 0.0, .ubound = 1.0}},
387 |         {"digamma", {.lbound = 0.0, .ubound = 1.0}},
388 |         {"erf", {.lbound = -1.0, .ubound = 1.0}},
389 |         {"erfc", {.lbound = -1.0, .ubound = 1.0}},
390 |         {"exp", {.lbound = -1.0, .ubound = 1.0}},
391 |         {"exp10", {.lbound = -1.0, .ubound = 1.0}},
392 |         {"exp2", {.lbound = -1.0, .ubound = 1.0}},
393 |         {"hank103", {.lbound = 0.0, .ubound = 10.0, .ilbound = 0.0, .iubound = 10.0}},
394 |         {"hermite_0", {.lbound = 0.0, .ubound = 10.0}},
395 |         {"hermite_1", {.lbound = 0.0, .ubound = 10.0}},
396 |         {"hermite_2", {.lbound = 0.0, .ubound = 10.0}},
397 |         {"hermite_3", {.lbound = 0.0, .ubound = 10.0}},
398 |         {"lgamma", {.lbound = 0.0, .ubound = 10.0}},
399 |         {"log", {.lbound = 0.0, .ubound = 10.0}},
400 |         {"log10", {.lbound = 0.0, .ubound = 10.0}},
401 |         {"log2", {.lbound = 0.0, .ubound = 10.0}},
402 |         {"memcpy", {.lbound = 0.0, .ubound = 1.0}},
403 |         {"memset", {.lbound = 0.0, .ubound = 1.0}},
404 |         {"ndtri", {.lbound = 0.0, .ubound = 1.0}},
405 |         {"pow13", {.lbound = 0.0, .ubound = 1.0}},
406 |         {"pow3.5", {.lbound = 0.0, .ubound = 1.0}},
407 |         {"riemann_zeta", {.lbound = 0.0, .ubound = 10.0}},
408 |         {"rsqrt", {.lbound = 0.0, .ubound = 10.0}},
409 |         {"sin", {.lbound = 0.0, .ubound = 2 * M_PI, .ilbound = 0.0, .iubound = 2 * M_PI}},
410 |         {"sin_pi", {.lbound = 0.0, .ubound = 2.0}},
411 |         {"sinc", {.lbound = 0.0, .ubound = 2 * M_PI, .ilbound = 0.0, .iubound = 2 * M_PI}},
412 |         {"sinc_pi", {.lbound = 0.0, .ubound = 2.0}},
413 |         {"sinh", {.lbound = 0.0, .ubound = 2.0}},
414 |         {"sqrt", {.lbound = 0.0, .ubound = 10.0}},
415 |         {"tan", {.lbound = 0.0, .ubound = 2 * M_PI}},
416 |         {"tanh", {.lbound = -1.0, .ubound = 1.0}},
417 |         {"tgamma", {.lbound = -0.0, .ubound = 1.0}},
418 |     };
419 | 
420 |     std::unordered_map<std::string, multi_eval_func<double>> double_refs = {
421 |         {"acos", stl_funs_dx1["acos"]},
422 |         {"acosh", stl_funs_dx1["acosh"]},
423 |         {"asin", stl_funs_dx1["asin"]},
424 |         {"asinh", stl_funs_dx1["asinh"]},
425 |         {"atan", stl_funs_dx1["atan"]},
426 |         {"atanh", stl_funs_dx1["atanh"]},
427 |         {"bessel_I0", gsl_funs["bessel_I0"]},
428 |         {"bessel_I1", gsl_funs["bessel_I1"]},
429 |         {"bessel_I2", gsl_funs["bessel_I2"]},
430 |         {"bessel_J0", gsl_funs["bessel_J0"]},
431 |         {"bessel_J1", gsl_funs["bessel_J1"]},
432 |         {"bessel_J2", gsl_funs["bessel_J2"]},
433 |         {"bessel_K0", gsl_funs["bessel_K0"]},
434 |         {"bessel_K1", gsl_funs["bessel_K1"]},
435 |         {"bessel_K2", gsl_funs["bessel_K2"]},
436 |         {"bessel_Y0", gsl_funs["bessel_Y0"]},
437 |         {"bessel_Y1", gsl_funs["bessel_Y1"]},
438 |         {"bessel_Y2", gsl_funs["bessel_Y2"]},
439 |         {"bessel_j0", gsl_funs["bessel_j0"]},
440 |         {"bessel_j1", gsl_funs["bessel_j1"]},
441 |         {"bessel_j2", gsl_funs["bessel_j2"]},
442 |         {"bessel_y0", gsl_funs["bessel_y0"]},
443 |         {"bessel_y1", gsl_funs["bessel_y1"]},
444 |         {"bessel_y2", gsl_funs["bessel_y2"]},
445 |         {"memcpy", sctl_funs_dx4["memcpy"]},
446 |         {"cos", stl_funs_dx1["cos"]},
447 |         {"cos_pi", boost_funs_dx1["cos_pi"]},
448 |         {"cosh", stl_funs_dx1["cosh"]},
449 |         {"digamma", boost_funs_dx1["digamma"]},
450 |         {"erf", stl_funs_dx1["erf"]},
451 |         {"erfc", stl_funs_dx1["erfc"]},
452 |         {"exp", stl_funs_dx1["exp"]},
453 |         {"exp10", stl_funs_dx1["exp10"]},
454 |         {"exp2", stl_funs_dx1["exp2"]},
455 |         {"hermite_0", boost_funs_dx1["hermite_0"]},
456 |         {"hermite_1", boost_funs_dx1["hermite_1"]},
457 |         {"hermite_2", boost_funs_dx1["hermite_2"]},
458 |         {"hermite_3", boost_funs_dx1["hermite_3"]},
459 |         {"lgamma", gsl_funs["lgamma"]},
460 |         {"log", stl_funs_dx1["log"]},
461 |         {"log10", stl_funs_dx1["log10"]},
462 |         {"log2", stl_funs_dx1["log2"]},
463 |         {"pow13", stl_funs_dx1["pow13"]},
464 |         {"pow3.5", stl_funs_dx1["pow3.5"]},
465 |         {"riemann_zeta", gsl_funs["riemann_zeta"]},
466 |         {"rsqrt", stl_funs_dx1["rsqrt"]},
467 |         {"sin", stl_funs_dx1["sin"]},
468 |         {"sin_pi", boost_funs_dx1["sin_pi"]},
469 |         {"sinc", gsl_funs["sinc"]},
470 |         {"sinc_pi", gsl_funs["sinc_pi"]},
471 |         {"sinh", stl_funs_dx1["sinh"]},
472 |         {"sqrt", stl_funs_dx1["sqrt"]},
473 |         {"tan", stl_funs_dx1["tan"]},
474 |         {"tanh", stl_funs_dx1["tanh"]},
475 |         {"tgamma", stl_funs_dx1["tgamma"]},
476 |     };
477 | 
478 |     for (auto key : keys_to_eval)
479 |         std::cout << key << std::endl;
480 | 
481 |     auto &baobzi_funs = sf::functions::baobzi::get_funs_dx1(keys_to_eval, base_configurations);
482 | 
483 |     for (auto &run_set : run_sets) {
484 |         const auto &[n_eval, n_repeat] = run_set;
485 |         std::cerr << "Running benchmark with input vector of length " << n_eval << " and " << n_repeat << " repeats.\n";
486 |         Eigen::VectorXd vals = 0.5 * (Eigen::ArrayXd::Random(n_eval) + 1.0);
487 |         Eigen::VectorXf fvals = vals.cast<float>();
488 |         Eigen::VectorX<cdouble> cvals = 0.5 * (Eigen::ArrayX<cdouble>::Random(n_eval) + std::complex<double>{1.0, 1.0});
489 | 
490 |         for (auto key : keys_to_eval) {
491 |             auto insert_measurement = [&storage](measurement_t &meas) -> void {
492 |                 if (meas)
493 |                     storage.insert(meas);
494 |             };
495 | 
496 |             auto get_conf_data = [&storage, &base_configurations](const std::string &name,
497 |                                                                   const std::string &ftype) -> configuration_t {
498 |                 configuration_t config = base_configurations[name];
499 |                 config.func = name;
500 |                 config.ftype = ftype;
501 | 
502 |                 using namespace sqlite_orm;
503 |                 auto conf_ids = storage.select(columns(&configuration_t::id),
504 |                                                where(is_equal(&configuration_t::ftype, config.ftype) and
505 |                                                      is_equal(&configuration_t::func, config.func) and
506 |                                                      is_equal(&configuration_t::lbound, config.lbound) and
507 |                                                      is_equal(&configuration_t::ubound, config.ubound) and
508 |                                                      is_equal(&configuration_t::ilbound, config.ilbound) and
509 |                                                      is_equal(&configuration_t::iubound, config.iubound)));
510 |                 config.id = conf_ids.size() ? std::get<int>(conf_ids[0]) : storage.insert(config);
511 |                 return config;
512 |             };
513 | 
514 |             Eigen::VectorXd vals_ref = sf::utils::transform_domain<double>(vals, base_configurations[key].lbound,
515 |                                                                            base_configurations[key].ubound);
516 | 
517 |             Eigen::VectorXd dref;
518 |             if (double_refs.count(key)) {
519 |                 dref.resize(vals_ref.size());
520 |                 double_refs[key](vals_ref.data(), dref.data(), vals_ref.size());
521 |             }
522 | 
523 |             std::vector<measurement_t> ms;
524 |             auto &libs = libraries_info;
525 | 
526 |             auto conf_f = get_conf_data(key, "f");
527 |             ms.push_back(test_func<float>(amdlibm_funs_fx1[key], 1, libs["amdlibm"], conf_f, fvals, dref, n_repeat));
528 |             ms.push_back(test_func<float>(amdlibm_funs_fx8[key], 8, libs["amdlibm"], conf_f, fvals, dref, n_repeat));
529 |             ms.push_back(test_func<float>(af_funs_fx8[key], 8, libs["agnerfog"], conf_f, fvals, dref, n_repeat));
530 |             ms.push_back(test_func<float>(af_funs_fx16[key], 16, libs["agnerfog"], conf_f, fvals, dref, n_repeat));
531 |             ms.push_back(test_func<float>(boost_funs_fx1[key], 1, libs["boost"], conf_f, fvals, dref, n_repeat));
532 |             ms.push_back(test_func<float>(eigen_funs[key], 0, libs["eigen"], conf_f, fvals, dref, n_repeat));
533 |             ms.push_back(test_func<float>(sleef_funs_fx1[key], 1, libs["sleef"], conf_f, fvals, dref, n_repeat));
534 |             ms.push_back(test_func<float>(sleef_funs_fx8[key], 8, libs["sleef"], conf_f, fvals, dref, n_repeat));
535 |             ms.push_back(test_func<float>(sleef_funs_fx16[key], 16, libs["sleef"], conf_f, fvals, dref, n_repeat));
536 |             ms.push_back(test_func<float>(sctl_funs_fx8[key], 8, libs["sctl"], conf_f, fvals, dref, n_repeat));
537 |             ms.push_back(test_func<float>(sctl_funs_fx16[key], 16, libs["sctl"], conf_f, fvals, dref, n_repeat));
538 |             ms.push_back(test_func<float>(stl_funs_fx1[key], 1, libs["stl"], conf_f, fvals, dref, n_repeat));
539 | 
540 |             auto conf_d = get_conf_data(key, "d");
541 |             ms.push_back(test_func<double>(af_funs_dx4[key], 4, libs["agnerfog"], conf_d, vals, dref, n_repeat));
542 |             ms.push_back(test_func<double>(af_funs_dx8[key], 8, libs["agnerfog"], conf_d, vals, dref, n_repeat));
543 |             ms.push_back(test_func<double>(amdlibm_funs_dx1[key], 1, libs["amdlibm"], conf_d, vals, dref, n_repeat));
544 |             ms.push_back(test_func<double>(amdlibm_funs_dx4[key], 4, libs["amdlibm"], conf_d, vals, dref, n_repeat));
545 |             ms.push_back(test_func<double>(baobzi_funs[key], 1, libs["baobzi"], conf_d, vals, dref, n_repeat));
546 |             ms.push_back(test_func<double>(boost_funs_dx1[key], 1, libs["boost"], conf_d, vals, dref, n_repeat));
547 |             ms.push_back(test_func<double>(eigen_funs[key], 0, libs["eigen"], conf_d, vals, dref, n_repeat));
548 |             ms.push_back(test_func<double>(fort_funs[key], 1, libs["fort"], conf_d, vals, dref, n_repeat));
549 |             ms.push_back(test_func<double>(gsl_funs[key], 1, libs["gsl"], conf_d, vals, dref, n_repeat));
550 |             ms.push_back(test_func<double>(sctl_funs_dx4[key], 4, libs["sctl"], conf_d, vals, dref, n_repeat));
551 |             ms.push_back(test_func<double>(sctl_funs_dx8[key], 8, libs["sctl"], conf_d, vals, dref, n_repeat));
552 |             ms.push_back(test_func<double>(sleef_funs_dx1[key], 1, libs["sleef"], conf_d, vals, dref, n_repeat));
553 |             ms.push_back(test_func<double>(sleef_funs_dx4[key], 4, libs["sleef"], conf_d, vals, dref, n_repeat));
554 |             ms.push_back(test_func<double>(sleef_funs_dx8[key], 8, libs["sleef"], conf_d, vals, dref, n_repeat));
555 |             ms.push_back(test_func<double>(stl_funs_dx1[key], 1, libs["stl"], conf_d, vals, dref, n_repeat));
556 | 
557 |             for (auto &meas : ms) {
558 |                 if (!meas)
559 |                     continue;
560 |                 std::cout << meas;
561 |                 storage.insert(meas);
562 |             }
563 |             // test_func(gsl_complex_funs, [key], "gsl_cdx1", params, cvals, n_repeat);
564 |             // test_func(misc_funs_cdx1_x2[key], "misc_cdx1_x2", params, cvals, n_repeat);
565 | 
566 |             std::cout << "\n";
567 |         }
568 |     }
569 |     return 0;
570 | }
571 | 


--------------------------------------------------------------------------------
/src/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include <sf_libraries.hpp>
 2 | #include <sf_utils.hpp>
 3 | 
 4 | namespace sf::utils {
 5 | 
 6 | host_info_t::host_info_t() {
 7 |     cpuname = exec("grep -m1 'model name' /proc/cpuinfo | cut -d' ' --complement -f1-3");
 8 |     L1d = exec("lscpu | grep L1d | awk '{print $3}'");
 9 |     L1i = exec("lscpu | grep L1i | awk '{print $3}'");
10 |     L2 = exec("lscpu | grep L2 | awk '{print $3}'");
11 |     L3 = exec("lscpu | grep L3 | awk '{print $3}'");
12 | }
13 | 
14 | toolchain_info_t::toolchain_info_t() {
15 | #ifdef __GNUC__
16 |     compiler = "gcc";
17 |     compilervers =
18 |         std::to_string(__GNUC__) + "." + std::to_string(__GNUC_MINOR__) + "." + std::to_string(__GNUC_PATCHLEVEL__);
19 | #endif
20 | 
21 |     libcvers = gnu_get_libc_version();
22 | }
23 | 
24 | std::string exec(const char *cmd) {
25 |     // https://stackoverflow.com/a/478960
26 |     std::array<char, 128> buffer;
27 |     std::string result;
28 |     std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
29 |     if (!pipe) {
30 |         throw std::runtime_error("popen() failed!");
31 |     }
32 |     while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
33 |         result += buffer.data();
34 |     }
35 |     result.pop_back();
36 |     return result;
37 | }
38 | 
39 | std::string get_alm_version() {
40 |     std::string offset_str = "0x" + exec("objdump -t ../extern/amd-libm/lib/libalm.so --section=.rodata | grep -m1 "
41 |                                          "ALM_VERSION_STRING | cut -d' ' -f 1");
42 |     size_t offset = strtol(offset_str.c_str(), NULL, 0);
43 |     FILE *obj = fopen("../extern/amd-libm/lib/libalm.so", "r");
44 |     fseek(obj, offset, 0);
45 |     char buf[16];
46 |     fread(buf, sizeof(char), 16, obj);
47 |     fclose(obj);
48 |     return buf;
49 | }
50 | 
51 | std::string get_sleef_version() {
52 |     return std::to_string(SLEEF_VERSION_MAJOR) + "." + std::to_string(SLEEF_VERSION_MINOR) + "." +
53 |            std::to_string(SLEEF_VERSION_PATCHLEVEL);
54 | }
55 | 
56 | std::string get_af_version() {
57 |     return std::to_string(VECTORCLASS_H / 10000) + "." + std::to_string((VECTORCLASS_H / 100) % 100) + "." +
58 |            std::to_string(VECTORCLASS_H % 10);
59 | }
60 | 
61 | std::string get_boost_version() {
62 |     return std::to_string(BOOST_VERSION / 100000) + "." + std::to_string((BOOST_VERSION / 100) % 1000) + "." +
63 |            std::to_string(BOOST_VERSION % 100);
64 | }
65 | 
66 | std::string get_gsl_version() { return std::to_string(GSL_MAJOR_VERSION) + "." + std::to_string(GSL_MINOR_VERSION); }
67 | 
68 | std::string get_sctl_version() { return exec("cd ../extern/SCTL; git describe --tags"); }
69 | 
70 | std::string get_baobzi_version() { return exec("cd ../extern/baobzi; git describe --tags").substr(1); }
71 | 
72 | std::string get_eigen_version() {
73 |     return std::to_string(EIGEN_WORLD_VERSION) + "." + std::to_string(EIGEN_MAJOR_VERSION) + "." +
74 |            std::to_string(EIGEN_MINOR_VERSION);
75 | }
76 | 
77 | } // namespace sf::utils
78 | 


--------------------------------------------------------------------------------