├── .gitignore ├── .gitmodules ├── Doxyfile ├── LICENSE ├── README.md ├── configure.py ├── include ├── fpplus.h └── fpplus │ ├── common.h │ ├── dd.h │ ├── eft.h │ ├── fpaddre.h │ └── polevl.h ├── references.bib ├── src ├── code.py ├── ddgemm │ ├── benchmark.c │ ├── common.h │ ├── ddgemm.py │ └── options.c ├── dot │ ├── benchmark.c │ ├── common.h │ ├── dot.py │ └── options.c ├── low-level │ ├── benchmark.c │ ├── common.h │ ├── doubledouble.c │ ├── options.c │ ├── polevl.c │ └── quad.c ├── simd.py ├── utils.c └── utils.h └── test ├── ddgemm-tester.h ├── dot-tester.h ├── double-double.cpp └── error-free-transform.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.d 3 | *.pyc 4 | 5 | # Binaries 6 | bin/ 7 | build/ 8 | 9 | # Generated files 10 | doc/ 11 | src/ddgemm/ddgemm.h 12 | src/ddgemm/ddgemm-avx.c 13 | src/ddgemm/ddgemm-mic.c 14 | test/ddgemm.cpp 15 | test/dot.cpp 16 | src/dot/dot.h 17 | src/dot/dot-avx.c 18 | src/dot/dot-mic.c 19 | 20 | # Ninja files 21 | .ninja_deps 22 | .ninja_log 23 | build.ninja 24 | 25 | # System files 26 | .DS_Store 27 | .Rhistory 28 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third-party/googletest"] 2 | path = third-party/googletest 3 | url = git@github.com:google/googletest.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-2016, Georgia Institute of Technology 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FPplus 2 | Scientific library for high-precision computations and research 3 | 4 | FPplus was originally developed for a research project on instructions to accelerate high-precision computations, but it is also useful as a general-purpose library. FPplus features: 5 | 6 | - Header-only library for error-free transforms and double-double computations 7 | - Implements error-free addition, multiplication, and fused multiply-add 8 | - Implements double-double addition and multiplication in multiple variants 9 | - Compatible with C99, C++, OpenCL, and CUDA 10 | - Special versions of error-free transforms in SIMD intrinsics: 11 | - x86 SIMD (128-bit and 256-bit AVX + FMA, 512-bit wide MIC and AVX-512) 12 | - IBM VSX (POWER 7 and POWER 8) and QPX (Blue Gene/Q) 13 | - ARMv8 SIMD 14 | - Extensive documentation with references to scientific literature 15 | - Testsuite based on [MPFR](http://www.mpfr.org/) and [Google Test](https://github.com/google/googletest) 16 | - Examples and code-generators for high-precision algorithms: 17 | - Polynomial evaluation with compensated Horner scheme 18 | - Compensated dot product algorithm 19 | - Inner kernel of matrix multiplication (GEMM) operation in double-double precision 20 | 21 | ## Requirements 22 | 23 | ##### CPU targets: 24 | - gcc-compatible compiler (tested on gcc, clang and icc) 25 | - Hardware FMA support 26 | - Precise floating-point semantics 27 | - No `-ffast-math` option when compiling with `gcc` or `clang` 28 | - `-fp-model precise` when compiling with `icc` 29 | 30 | ##### OpenCL targets: 31 | - `cl_khr_fp64`, `cl_amd_fp64`, or `cl_APPLE_fp64_basic_ops` extension 32 | - Hardware FMA support (`FP_FAST_FMA` must be defined by OpenCL compiler) 33 | - Precise floating-point semantics 34 | - No `-cl-fast-relaxed-math` option 35 | 36 | ##### CUDA targets: 37 | - Compute capability 2.0 or higher 38 | 39 | ## Using FPplus 40 | 41 | ```c 42 | #include 43 | ``` 44 | 45 | ## Publications 46 | 47 | Marat Dukhan, Richard Vuduc and Jason Riedy ["Wanted: Floating-Point Add Round-off Error instruction"](http://arxiv.org/abs/1603.00491). arXiv preprint 1603.00491 (2016) 48 | 49 | Acknowledgements 50 | ---------------- 51 | 52 | [![HPC Garage logo](https://github.com/Maratyszcza/PeachPy/blob/master/logo/hpcgarage.png)](http://hpcgarage.org) 53 | [![Georgia Tech College of Computing logo](https://github.com/Maratyszcza/PeachPy/blob/master/logo/college-of-computing.gif)](http://www.cse.gatech.edu/) 54 | 55 | The library was developed by [Marat Dukhan](http://www.maratdukhan.com) as a research project at [Richard Vuduc](http://vuduc.org)'s HPC Garage lab in the Georgia Institute of Technology, College of Computing, School of Computational Science and Engineering. FPplus is based on algorithms in [Handbook of Floating-Point Arithmetics](https://www.springer.com/us/book/9780817647049), [David Bailey](http://davidhbailey.com/)'s QD library, the works of [Jonathan Shewchuk](http://www.cs.berkeley.edu/~jrs/papers/robustr.pdf), [Theodorus Dekker](https://dl.acm.org/citation.cfm?id=2717032), [Donald Knuth](https://dl.acm.org/citation.cfm?id=270146), and [Sylvie Boldo and Jean-Michel Muller](http://www.ens-lyon.fr/LIP/Pub/Rapports/RR/RR2004/RR2004-41.pdf). We thank [Jason Riedy](http://www.cc.gatech.edu/~jriedy) for his feedback and support. 56 | 57 | This material is based upon work supported by the U.S. National Science Foundation (NSF) Award Number 1339745 and the U.S. Dept. of Energy (DOE), Office of Science, Advanced Scientific Computing Research under award DE-FC02-10ER26006/DE-SC0004915. Any opinions, findings and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect those of NSF or DOE. 58 | -------------------------------------------------------------------------------- /configure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import glob 8 | import argparse 9 | import ninja_syntax 10 | 11 | 12 | def get_program_info(program, arguments, use_stdout=True): 13 | from subprocess import PIPE, Popen 14 | 15 | if not isinstance(arguments, list): 16 | arguments = [str(arguments)] 17 | process = Popen([program] + arguments, stdout=PIPE, stderr=PIPE, bufsize=1) 18 | outdata, errdata = process.communicate() 19 | if use_stdout: 20 | return outdata 21 | else: 22 | return errdata 23 | 24 | 25 | def detect_compiler(program): 26 | banner = get_program_info(program, "--version") 27 | if banner: 28 | identification = banner.splitlines()[0].decode("utf-8") 29 | import re 30 | intel_match = re.match(r"(icc|icpc) \(ICC\) (\d+(:?\.\d+)+)", identification) 31 | if intel_match: 32 | return "Intel", intel_match.group(2) 33 | gnu_match = re.match(r"(gcc|g\+\+)(?:\-\d+)? \(.*\) (\d+(:?\.\d+)+)", identification) 34 | if gnu_match: 35 | return "GNU", gnu_match.group(2) 36 | clang_match = re.match(r".*\bclang version (\d+(:?\.\d+)+)", identification) 37 | if clang_match: 38 | return "Clang", clang_match.group(1) 39 | apple_match = re.match(r"Apple LLVM version (\d+(:?\.\d+)+)", identification) 40 | if apple_match: 41 | return "Apple", apple_match.group(1) 42 | 43 | 44 | class Configuration: 45 | def __init__(self, options, root_dir): 46 | self.build = Configuration.detect_build_platform() 47 | print("Build platform: {build}".format(build=self.build)) 48 | self.host = Configuration.detect_host_platform(self.build, options.uarch) 49 | print("Host platform: {host}".format(host=self.host)) 50 | 51 | self.writer = ninja_syntax.Writer(open(os.path.join(root_dir, "build.ninja"), "w")) 52 | self.root_dir = root_dir 53 | self.source_dir = os.path.join(root_dir, "src") 54 | self.build_dir = os.path.join(root_dir, "build") 55 | self.include_dirs = [os.path.join(root_dir, "include")] 56 | self.binaries_dir = os.path.join(root_dir, "bin") 57 | self.mflags = [] 58 | self.cflags = [] 59 | self.cxxflags = [] 60 | self.ldflags = [] 61 | self.lddirs = [] 62 | self.ldlibs = [] 63 | self.macros = [] 64 | self.object_ext = ".o" 65 | 66 | cc, cxx = self.setup_compilers(options.cc, options.cxx) 67 | self.setup_compiler_options(options.uarch) 68 | 69 | if options.fpaddre: 70 | self.macros.append("FPPLUS_EMULATE_FPADDRE") 71 | if options.quad: 72 | self.macros.append("FPPLUS_HAVE_FLOAT128") 73 | self.macros.append("FPPLUS_UARCH_" + options.uarch.upper()) 74 | 75 | 76 | # Variables 77 | self.writer.variable("cc", cc) 78 | self.writer.variable("cxx", cxx) 79 | self.writer.variable("mflags", " ".join(self.mflags)) 80 | self.writer.variable("cflags", " ".join(self.cflags)) 81 | self.writer.variable("cxxflags", " ".join(self.cxxflags)) 82 | self.writer.variable("ldflags", " ".join(self.ldflags)) 83 | self.writer.variable("macros", " ".join("-D" + macro for macro in self.macros)) 84 | 85 | # Rules 86 | self.writer.rule("cc", "$cc $mflags $cflags $includes -o $out -c $in -MMD -MF $out.d", 87 | deps="gcc", depfile="$out.d", 88 | description="CC $descpath") 89 | self.writer.rule("cxx", "$cxx $mflags $cxxflags $includes -o $out -c $in -MMD -MF $out.d", 90 | deps="gcc", depfile="$out.d", 91 | description="CXX $descpath") 92 | self.writer.rule("ccld", "$cc $mflags $ldflags $lddirs -o $out $in $ldlibs", 93 | description="CCLD $descpath") 94 | self.writer.rule("cxxld", "$cxx $mflags $ldflags $lddirs -o $out $in $ldlibs", 95 | description="CXXLD $descpath") 96 | self.writer.rule("dot", "python $in --unroll-min $unroll_min --unroll-max $unroll_max --simd $simd --implementation $implementation --header $header --unittest $unittest", 97 | description="GEN $descpath") 98 | self.writer.rule("gemm", "python $in --mr-min $mr_min --mr-max $mr_max --nr-min $nr_min --nr-max $nr_max --simd $simd --implementation $implementation --header $header --unittest $unittest", 99 | description="GEN $descpath") 100 | 101 | 102 | @staticmethod 103 | def detect_build_platform(): 104 | import sys 105 | if sys.platform.startswith("linux"): 106 | return "x86_64-linux-gnu" 107 | elif sys.platform == "darwin": 108 | return "x86_64-osx" 109 | else: 110 | print("Error: failed to detect build platform: sys.platform = {platform}" 111 | .format(platform=sys.platform), file=sys.stdout) 112 | sys.exit(1) 113 | 114 | 115 | @staticmethod 116 | def detect_host_platform(build, uarch): 117 | if uarch == "knc": 118 | return "k1om-linux-gnu" 119 | else: 120 | return build 121 | 122 | 123 | def setup_compilers(self, cc, cxx): 124 | if cc is None and cxx is None: 125 | cc, cxx = { 126 | "x86_64-linux-gnu": ("gcc", "g++"), 127 | "x86_64-osx": ("clang", "clang++"), 128 | "k1om-linux-gnu": ("icc", "icpc"), 129 | }[self.host] 130 | elif cc is None: 131 | import re 132 | cc = cxx 133 | cc = re.sub(r"\bclang\+\+\b", "clang", cc) 134 | cc = re.sub(r"\bicpc\b", "icc", cc) 135 | cc = re.sub(r"\bg\+\+\b", "gcc", cc) 136 | if cc == cxx: 137 | print("Error: failed to auto-detect C compiler from C++ compiler: use --with-cc to specify C compiler explicitly", file=sys.stderr) 138 | sys.exit(1) 139 | elif cxx is None: 140 | import re 141 | cxx = cc 142 | cxx = re.sub(r"\bclang\b", "clang++", cxx) 143 | cxx = re.sub(r"\bicc\b", "icpc", cxx) 144 | cxx = re.sub(r"\bgcc\b", "g++", cxx) 145 | if cxx == cc: 146 | print("Error: failed to auto-detect C++ compiler from C compiler: use --with-cxx to specify C++ compiler explicitly", file=sys.stderr) 147 | sys.exit(1) 148 | cc_name, cc_version = detect_compiler(cc) 149 | cxx_name, cxx_version = detect_compiler(cxx) 150 | print("C compiler: {name} {version}".format(name=cc_name, version=cc_version)) 151 | print("C++ compiler: {name} {version}".format(name=cxx_name, version=cxx_version)) 152 | if (cc_name, cc_version) != (cxx_name, cxx_version): 153 | print("Error: C and C++ toolchain mismatch", file=sys.stderr) 154 | sys.exit(1) 155 | self.compiler_id = cc_name 156 | return cc, cxx 157 | 158 | 159 | def setup_compiler_options(self, uarch): 160 | if uarch == "knc": 161 | self.mflags = ["-mmic"] 162 | else: 163 | self.mflags = ["-m64"] 164 | if self.compiler_id == "Intel": 165 | isaflag = { 166 | "haswell": "-xCORE-AVX2", 167 | "broadwell": "-xCORE-AVX2", 168 | "skylake": "-xCORE-AVX2", 169 | }[uarch] 170 | else: 171 | isaflag = { 172 | "haswell": "-march=core-avx2", 173 | "broadwell": "-march=broadwell", 174 | "skylake": "-mavx2 -mfma", 175 | "bulldozer": "-march=bdver1", 176 | "piledriver": "-march=bdver2", 177 | "steamroller": "-march=bdver3", 178 | }[uarch] 179 | self.cflags.append(isaflag) 180 | self.cxxflags.append(isaflag) 181 | 182 | self.cflags += ["-std=gnu99", "-g", "-O3", "-Wall", "-Wextra", "-Wno-unused-parameter"] 183 | self.cxxflags += ["-std=gnu++11", "-g", "-O3", "-Wall", "-Wextra", "-Wno-unused-parameter", "-Wno-missing-field-initializers"] 184 | if self.compiler_id == "Intel": 185 | self.cflags += ["-fp-model", "precise", "-no-fma"] 186 | self.cxxflags += ["-fp-model", "precise", "-no-fma"] 187 | else: 188 | self.cflags.append("-ffp-contract=off") 189 | self.cxxflags.append("-ffp-contract=off") 190 | 191 | if self.host in ["x86_64-linux-gnu", "k1om-linux-gnu"]: 192 | self.cxxflags.append("-pthread") 193 | self.ldflags.append("-pthread") 194 | 195 | 196 | def cc(self, source_file, object_file=None): 197 | if not os.path.isabs(source_file): 198 | source_file = os.path.join(self.source_dir, source_file) 199 | if object_file is None: 200 | object_file = os.path.join(self.build_dir, os.path.relpath(source_file, self.source_dir)) + self.object_ext 201 | variables = { 202 | "descpath": os.path.relpath(source_file, self.source_dir) 203 | } 204 | if self.include_dirs: 205 | variables["includes"] = " ".join(map(lambda include_dir: "-I" + include_dir, self.include_dirs)) 206 | if self.macros: 207 | variables["cflags"] = "$cflags " + " ".join(map(lambda macro: "-D" + macro, self.macros)) 208 | self.writer.build(object_file, "cc", source_file, variables=variables) 209 | return object_file 210 | 211 | 212 | def cxx(self, source_file, object_file=None): 213 | if not os.path.isabs(source_file): 214 | source_file = os.path.join(self.source_dir, source_file) 215 | if object_file is None: 216 | object_file = os.path.join(self.build_dir, os.path.relpath(source_file, self.source_dir)) + self.object_ext 217 | variables = { 218 | "descpath": os.path.relpath(source_file, self.source_dir) 219 | } 220 | if self.include_dirs: 221 | variables["includes"] = " ".join(map(lambda include_dir: "-I" + include_dir, self.include_dirs)) 222 | if self.macros: 223 | variables["cxxflags"] = "$cxxflags " + " ".join(map(lambda macro: "-D" + macro, self.macros)) 224 | self.writer.build(object_file, "cxx", source_file, variables=variables) 225 | return object_file 226 | 227 | 228 | def ccld(self, object_files, executable_file, lddirs=[], ldlibs=[]): 229 | if not os.path.isabs(executable_file): 230 | executable_file = os.path.join(self.binaries_dir, executable_file) 231 | variables = { 232 | "descpath": os.path.relpath(executable_file, self.binaries_dir) 233 | } 234 | if self.lddirs or lddirs: 235 | variables["libdirs"] = " ".join("-L" + lddir for lddir in self.lddirs + lddirs) 236 | if self.ldlibs or ldlibs: 237 | variables["ldlibs"] = " ".join("-l" + ldlib for ldlib in self.ldlibs + ldlibs) 238 | self.writer.build(executable_file, "ccld", object_files, variables=variables) 239 | return executable_file 240 | 241 | 242 | def cxxld(self, object_files, executable_file, lddirs=[], ldlibs=[]): 243 | if not os.path.isabs(executable_file): 244 | executable_file = os.path.join(self.binaries_dir, executable_file) 245 | variables = { 246 | "descpath": os.path.relpath(executable_file, self.binaries_dir) 247 | } 248 | if self.lddirs or lddirs: 249 | variables["libdirs"] = " ".join("-L" + lddir for lddir in self.lddirs + lddirs) 250 | if self.ldlibs or ldlibs: 251 | variables["ldlibs"] = " ".join("-l" + ldlib for ldlib in self.ldlibs + ldlibs) 252 | self.writer.build(executable_file, "cxxld", object_files, variables=variables) 253 | return executable_file 254 | 255 | 256 | def dot(self, unroll_min, unroll_max, simd): 257 | implementation_file = os.path.join(self.source_dir, "dot", "dot-{simd}.c".format(simd=simd)) 258 | header_file = os.path.join(self.source_dir, "dot", "dot.h") 259 | unittest_file = os.path.join(self.root_dir, "test", "dot.cpp") 260 | script_file = os.path.join(self.source_dir, "dot", "dot.py") 261 | variables = { 262 | "descpath": os.path.relpath(implementation_file, self.source_dir), 263 | "unroll_min": str(unroll_min), 264 | "unroll_max": str(unroll_max), 265 | "simd": simd, 266 | "implementation": implementation_file, 267 | "header": header_file, 268 | "unittest": unittest_file 269 | } 270 | self.writer.build( 271 | [implementation_file, header_file, unittest_file], 272 | "dot", script_file, variables=variables) 273 | return implementation_file, header_file, unittest_file 274 | 275 | 276 | def gemm(self, mr_min, mr_max, nr_min, nr_max, simd): 277 | implementation_file = os.path.join(self.source_dir, "ddgemm", "ddgemm-{simd}.c".format(simd=simd)) 278 | header_file = os.path.join(self.source_dir, "ddgemm", "ddgemm.h") 279 | unittest_file = os.path.join(self.root_dir, "test", "ddgemm.cpp") 280 | script_file = os.path.join(self.source_dir, "ddgemm", "ddgemm.py") 281 | variables = { 282 | "descpath": simd, 283 | "mr_min": str(mr_min), 284 | "mr_max": str(mr_max), 285 | "nr_min": str(nr_min), 286 | "nr_max": str(nr_max), 287 | "simd": simd, 288 | "implementation": implementation_file, 289 | "header": header_file, 290 | "unittest": unittest_file 291 | } 292 | self.writer.build( 293 | [implementation_file, header_file, unittest_file], 294 | "gemm", script_file, variables=variables) 295 | return implementation_file, header_file, unittest_file 296 | 297 | 298 | parser = argparse.ArgumentParser(description="FP+ configuration script") 299 | parser.add_argument("--enable-fpaddre", dest="fpaddre", action="store_true", default=False, 300 | help="Emulate FPADDRE instruction") 301 | parser.add_argument("--enable-quad", dest="quad", action="store_true", default=False, 302 | help="Enable quad-precision benchmark (requires gcc or icc)") 303 | parser.add_argument("--uarch", dest="uarch", required=True, 304 | choices=("haswell", "broadwell", "skylake", "bulldozer", "piledriver", "steamroller", "knc"), 305 | help="Target micro-architecture") 306 | parser.add_argument("--with-cc", dest="cc", default=os.getenv("CC"), 307 | help="C compiler to use") 308 | parser.add_argument("--with-cxx", dest="cxx", default=os.getenv("CXX"), 309 | help="C++ compiler to use") 310 | parser.add_argument("--with-gmp", dest="gmp", 311 | help="Path to GNU MP prefix dir") 312 | parser.add_argument("--with-mpfr", dest="mpfr", 313 | help="Path to MPFR prefix dir") 314 | 315 | 316 | def main(): 317 | options = parser.parse_args() 318 | root_dir = os.path.dirname(os.path.abspath(__file__)) 319 | config = Configuration(options, root_dir) 320 | 321 | # Build gtest 322 | gtest_dir = os.path.join(root_dir, "third-party", "googletest") 323 | config.source_dir = os.path.join(gtest_dir, "src") 324 | config.build_dir = os.path.join(root_dir, "build", "gtest") 325 | config.include_dirs = [os.path.join(gtest_dir, "include"), gtest_dir] 326 | gtest_object = config.cxx("gtest-all.cc") 327 | 328 | # Setup 329 | config.source_dir = os.path.join(root_dir, "src") 330 | config.build_dir = os.path.join(root_dir, "build") 331 | config.include_dirs = [ 332 | os.path.join(root_dir, "include"), 333 | os.path.join(root_dir, "src"), 334 | ] 335 | config.ldlibs = ["m"] 336 | if sys.platform.startswith("linux"): 337 | config.ldlibs.append("rt") 338 | 339 | # Build benchmarks 340 | simd = "avx" 341 | simd_width = 4 342 | if options.uarch == "knc": 343 | simd = "mic" 344 | simd_width = 8 345 | 346 | utils_object = config.cc("utils.c") 347 | 348 | dot_source, dot_header, dot_test_source = config.dot(1, 8, simd) 349 | dot_object = config.cc(dot_source) 350 | dot_sources = ["dot.c", "dot/dot.c"] 351 | config.ccld([ 352 | config.cc("dot/benchmark.c"), 353 | config.cc("dot/options.c"), 354 | dot_object, utils_object], "dot-bench") 355 | 356 | gemm_source, gemm_header, gemm_test_source = config.gemm(simd_width, simd_width * 3, 1, 8, simd) 357 | gemm_object = config.cc(gemm_source) 358 | config.ccld([ 359 | config.cc("ddgemm/benchmark.c"), 360 | config.cc("ddgemm/options.c"), 361 | gemm_object, utils_object], "ddgemm-bench") 362 | 363 | ubench_objects = [ 364 | config.cc("low-level/benchmark.c"), 365 | config.cc("low-level/options.c"), 366 | config.cc("low-level/doubledouble.c"), 367 | config.cc("low-level/polevl.c")] 368 | if options.quad: 369 | ubench_objects.append(config.cc("low-level/quad.c")) 370 | config.ccld(ubench_objects, "ubench") 371 | 372 | # Build tests 373 | config.source_dir = os.path.join(root_dir, "test") 374 | config.build_dir = os.path.join(root_dir, "build", "test") 375 | config.include_dirs = [ 376 | os.path.join(root_dir, "include"), 377 | os.path.join(root_dir, "src"), 378 | os.path.join(gtest_dir, "include") 379 | ] 380 | 381 | if config.host != config.build and (options.mpfr is None or options.gmp is None): 382 | print("Warning: cannot cross-compile tests without explicitly specified paths to MPFR and GMP, see --with-mpfr and --with-gmp options") 383 | elif not options.fpaddre: 384 | test_ldlibs, test_ldobjs = [], [] 385 | if options.mpfr: 386 | test_ldobjs.append(os.path.join(options.mpfr, "lib", "libmpfr.a")) 387 | config.include_dirs.append(os.path.join(options.mpfr, "include")) 388 | else: 389 | test_ldlibs.append("mpfr") 390 | if options.gmp: 391 | test_ldobjs.append(os.path.join(options.gmp, "lib", "libgmp.a")) 392 | config.include_dirs.append(os.path.join(options.gmp, "include")) 393 | else: 394 | test_ldlibs.append("gmp") 395 | config.cxxld([config.cxx("error-free-transform.cpp"), gtest_object] + test_ldobjs, 396 | "eft-test", ldlibs=test_ldlibs) 397 | config.cxxld([config.cxx("double-double.cpp"), gtest_object] + test_ldobjs, 398 | "dd-test", ldlibs=test_ldlibs) 399 | config.cxxld([config.cxx("dot.cpp"), dot_object, gtest_object] + test_ldobjs, 400 | "dot-test", ldlibs=test_ldlibs) 401 | config.cxxld([config.cxx("ddgemm.cpp"), gemm_object, gtest_object] + test_ldobjs, 402 | "ddgemm-test", ldlibs=test_ldlibs) 403 | 404 | 405 | if __name__ == "__main__": 406 | sys.exit(main()) 407 | -------------------------------------------------------------------------------- /include/fpplus.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef FPPLUS_H 3 | #define FPPLUS_H 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #endif /* FPPLUS_H */ 10 | -------------------------------------------------------------------------------- /include/fpplus/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef FPPLUS_COMMON_H 3 | #define FPPLUS_COMMON_H 4 | 5 | #if defined(__OPENCL_VERSION__) 6 | #if defined(cl_khr_fp64) 7 | /* 8 | * Since OpenCL 1.2 cl_khr_fp64 is an optional core feature and doesn't need a pragma to enable. 9 | * In fact, using the pragma results in warning on some OpenCL implementations. 10 | */ 11 | #if __OPENCL_VERSION__ < 120 12 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 13 | #endif 14 | #elif defined(cl_amd_fp64) 15 | #pragma OPENCL EXTENSION cl_amd_fp64 : enable 16 | #elif defined(cl_APPLE_fp64_basic_ops) 17 | #pragma OPENCL EXTENSION cl_APPLE_fp64_basic_ops : enable 18 | #else 19 | #error "The code must be compiled for a device with double-precision support" 20 | #endif 21 | #ifndef FP_FAST_FMA 22 | #error "The code must be compiler for a device with performant fma operation" 23 | #endif 24 | #elif defined(__CUDA_ARCH__) 25 | /* nvcc targeting a CUDA device */ 26 | #if __CUDA_ARCH__ < 200 27 | #error "The code must be compiled for a CUDA device with fused multiply-add (compute capability 2.0+)" 28 | #endif 29 | #elif !defined(__FP_FAST_FMA) && !defined(__FMA__) && !defined(__AVX2__) && !defined(__KNC__) 30 | #error "The code must be compiled for a processor with fused multiply-add (FMA)" 31 | #endif 32 | 33 | #if defined(__OPENCL_VERSION__) && defined(__FAST_RELAXED_MATH__) 34 | #error "The code must be compiled without -cl-fast-relaxed-math options: the implemented algorithms depend on precise floating-point behaviour" 35 | #elif defined(__FAST_MATH__) && !defined(__CUDA_ARCH__) 36 | /* On CUDA the code uses intrinsics which guarantee floating-point behaviour regardless of optimization mode */ 37 | #error "The code must be compiled without -ffast-math option: the implemented algorithms depend on precise floating-point behaviour" 38 | #endif 39 | 40 | #ifndef FPPLUS_USE_FPADDRE 41 | #ifdef FPPLUS_EMULATE_FPADDRE 42 | #define FPPLUS_USE_FPADDRE 1 43 | #else 44 | #define FPPLUS_USE_FPADDRE 0 45 | #endif 46 | #endif /* !defined(FPPLUS_USE_FPADDRE) */ 47 | 48 | #if defined(__GNUC__) && defined(__x86_64__) 49 | #include 50 | #elif defined(__VSX__) 51 | #include 52 | #elif defined(__ARM_ARCH_8A__) 53 | #include 54 | #endif 55 | 56 | #if defined(__STDC__) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) 57 | #define FPPLUS_C99_SYNTAX 1 58 | #else 59 | #define FPPLUS_C99_SYNTAX 0 60 | #endif 61 | 62 | #if defined(__STDC__) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__OPENCL_VERSION__) 63 | #define FPPLUS_RESTRICT restrict 64 | #elif defined(__GNUC__) 65 | #define FPPLUS_RESTRICT __restrict__ 66 | #elif defined(_MSC_VER) 67 | #define FPPLUS_RESTRICT __restrict 68 | #else 69 | #define FPPLUS_RESTRICT 70 | #endif 71 | 72 | #if defined(_MSC_VER) 73 | #define FPPLUS_STATIC_INLINE static __forceinline 74 | #elif defined(__STDC__) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__cplusplus) || defined(__OPENCL_VERSION__) 75 | #define FPPLUS_STATIC_INLINE static inline 76 | #else 77 | #define FPPLUS_STATIC_INLINE static 78 | #endif 79 | 80 | #if defined(__GNUC__) 81 | #define FPPLUS_NONNULL_POINTER_ARGUMENTS __attribute__((__nonnull__)) 82 | #else 83 | #define FPPLUS_NONNULL_POINTER_ARGUMENTS 84 | #endif 85 | 86 | #if defined(__STDC__) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__OPENCL_VERSION__) 87 | #define FPPLUS_NONNULL_POINTER(name) name[restrict static 1] 88 | #else 89 | #define FPPLUS_NONNULL_POINTER(name) *FPPLUS_RESTRICT name 90 | #endif 91 | 92 | #if defined(__STDC__) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__OPENCL_VERSION__) 93 | #define FPPLUS_ARRAY_POINTER(name, min_size) name[restrict static min_size] 94 | #else 95 | #define FPPLUS_ARRAY_POINTER(name, min_size) *FPPLUS_RESTRICT name 96 | #endif 97 | 98 | #ifdef FPPLUS_EMULATE_FPADDRE 99 | #include 100 | #endif 101 | 102 | #endif /* FPPLUS_COMMON_H */ 103 | -------------------------------------------------------------------------------- /include/fpplus/dd.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef FPPLUS_DD_H 3 | #define FPPLUS_DD_H 4 | 5 | #include 6 | 7 | /** 8 | * @defgroup DD Double-double arithmetic 9 | */ 10 | 11 | 12 | /** 13 | * @ingroup DD 14 | * @brief Double-double number. 15 | */ 16 | typedef struct { 17 | /** 18 | * @brief The high (largest in magnitude) part of the number. 19 | * @note The high part is the best double-precision approximation of the double-double number. 20 | */ 21 | double hi; 22 | /** 23 | * @brief The low (smallest in magnitude) part of the number. 24 | */ 25 | double lo; 26 | } doubledouble; 27 | 28 | 29 | /** 30 | * @ingroup DD 31 | * @brief Long addition of double-precision numbers. 32 | * @details Adds two double-precision numbers and produces double-double result. 33 | * The algorith is a version of error-free addition due to @cite Knuth1997. 34 | * 35 | * @par Computational complexity 36 | * 37 | * 38 | * 39 | * 40 | *
Operation Count (default ISA) Count (with ADDRE)
FP ADD 6 1
FP ADDRE 1
41 | * 42 | * @param[in] a - addend, the first double-precision number to be added. 43 | * @param[in] b - augend, the second double-precision number to be added. 44 | * @return The sum of @b a and @b b as a double-double number. 45 | */ 46 | FPPLUS_STATIC_INLINE doubledouble ddaddl(const double a, const double b) { 47 | doubledouble sum; 48 | sum.hi = efadd(a, b, &sum.lo); 49 | return sum; 50 | } 51 | 52 | /** 53 | * @ingroup DD 54 | * @brief Wide addition of double-precision number to a double-double number. 55 | * @details Adds double-precision number to a double-double number and produces a double-double result. 56 | * 57 | * Implementation follows @cite QD2000, Figure 7. 58 | * 59 | * @par Computational complexity 60 | * 61 | * 62 | * 63 | * 64 | *
Operation Count (default ISA) Count (with ADDRE)
FP ADD 10 3
FP ADDRE 2
65 | * 66 | * @param[in] a - addend, the double-double number to be added to. 67 | * @param[in] b - augend, the double-precision number to be added. 68 | * @return The sum of @b a and @b b as a double-double number. 69 | */ 70 | FPPLUS_STATIC_INLINE doubledouble ddaddw(const doubledouble a, const double b) { 71 | doubledouble sum = ddaddl(a.lo, b); 72 | double e; 73 | /* QD uses efaddord here. I think it is a bug (what if b > a.hi -> sum.hi > a.hi ?). */ 74 | sum.hi = efadd(a.hi, sum.hi, &e); 75 | #ifdef __CUDA_ARCH__ 76 | sum.lo = __dadd_rn(sum.lo, e); 77 | #else 78 | sum.lo += e; 79 | #endif 80 | return sum; 81 | } 82 | 83 | /** 84 | * @ingroup DD 85 | * @brief Addition of two double-double numbers. 86 | * @details Adds two double-double numbers and produces a double-double result. 87 | * 88 | * According to a comment in the source of @cite QD2000, the algorithm due to Briggs and Kahan. 89 | * Implementation follows @cite FPHandbook2009. 90 | * 91 | * @par Computational complexity 92 | * 93 | * 94 | * 95 | *
OperationCount
FP ADD20
96 | * 97 | * @param[in] a - addend, the first double-double number to be added. 98 | * @param[in] b - augend, the second double-double number to be added. 99 | * @return The sum of @b a and @b b as a double-double number. 100 | */ 101 | FPPLUS_STATIC_INLINE doubledouble ddadd(const doubledouble a, const doubledouble b) { 102 | const doubledouble s = ddaddl(a.hi, b.hi); 103 | const doubledouble t = ddaddl(a.lo, b.lo); 104 | doubledouble v; 105 | #ifdef __CUDA_ARCH__ 106 | v.hi = efaddord(s.hi, __dadd_rn(s.lo, t.hi), &v.lo); 107 | #else 108 | v.hi = efaddord(s.hi, s.lo + t.hi, &v.lo); 109 | #endif 110 | doubledouble z; 111 | #ifdef __CUDA_ARCH__ 112 | z.hi = efaddord(v.hi, __dadd_rn(t.lo, v.lo), &z.lo); 113 | #else 114 | z.hi = efaddord(v.hi, t.lo + v.lo, &z.lo); 115 | #endif 116 | return z; 117 | } 118 | 119 | /** 120 | * @ingroup DD 121 | * @brief Fast addition of two double-double numbers with weaker error guarantees. 122 | * @details Adds two double-double numbers and produces a double-double result. 123 | * 124 | * Implementation based on @cite Dekker1971, Section 8, function add2. 125 | * 126 | * @par Computational complexity 127 | * 128 | * 129 | * 130 | * 131 | *
OperationCount (default ISA)Count (with ADDRE)
FP ADD 11 4
FP ADDRE 2
132 | * 133 | * @param[in] a - addend, the first double-double number to be added. 134 | * @param[in] b - augend, the second double-double number to be added. 135 | * @return The sum of @b a and @b b as a double-double number. 136 | */ 137 | FPPLUS_STATIC_INLINE doubledouble ddadd_fast(const doubledouble a, const doubledouble b) { 138 | doubledouble sum = ddaddl(a.hi, b.hi); 139 | #ifdef __CUDA_ARCH__ 140 | sum.lo = __dadd_rn(sum.lo, __dadd_rn(a.lo, b.lo)); 141 | #else 142 | sum.lo += a.lo + b.lo; 143 | #endif 144 | sum.hi = efaddord(sum.hi, sum.lo, &sum.lo); 145 | return sum; 146 | } 147 | 148 | /** 149 | * @ingroup DD 150 | * @brief Long multiplication of double-precision numbers. 151 | * @details Multiplies two double-precision numbers and produces double-double result. 152 | * The algorith is a version of error-free multiplication. 153 | * 154 | * @par Computational complexity 155 | * 156 | * 157 | * 158 | * 159 | *
OperationCount (default ISA)
FP MUL1
FP FMA1
160 | * 161 | * @param[in] a - multiplicand, the double-precision number to be multiplied. 162 | * @param[in] b - multiplier, the double-precision number to multipliy by. 163 | * @return The product of @b a and @b b as a double-double number. 164 | */ 165 | FPPLUS_STATIC_INLINE doubledouble ddmull(const double a, const double b) { 166 | doubledouble product; 167 | product.hi = efmul(a, b, &product.lo); 168 | return product; 169 | } 170 | 171 | /** 172 | * @ingroup DD 173 | * @brief Multiplication of double-double numbers. 174 | * @details Multiplies two double-double numbers and produces double-double result. 175 | * 176 | * Implementation mostly follows @cite Dekker1971, Section 8, function mul2. 177 | * 178 | * @par Computational complexity 179 | * 180 | * 181 | * 182 | * 183 | * 184 | *
OperationCount (default ISA)
FP ADD3
FP MUL1
FP FMA3
185 | * 186 | * @param[in] a - multiplicand, the double-double number to be multiplied. 187 | * @param[in] b - multiplier, the double-double number to multipliy by. 188 | * @return The product of @b a and @b b as a double-double number. 189 | */ 190 | FPPLUS_STATIC_INLINE doubledouble ddmul(const doubledouble a, const doubledouble b) { 191 | doubledouble product = ddmull(a.hi, b.hi); 192 | 193 | /* 194 | * Dekker's paper used product.lo += (a.lo * b.hi) + (a.hi * b.lo) here, 195 | * but FMA-based implementation should be slightly faster and more accurate 196 | */ 197 | #if defined(__CUDA_ARCH__) 198 | product.lo = __fma_rn(a.lo, b.hi, product.lo); 199 | product.lo = __fma_rn(a.hi, b.lo, product.lo); 200 | #elif defined(__GNUC__) 201 | product.lo = __builtin_fma(a.lo, b.hi, product.lo); 202 | product.lo = __builtin_fma(a.hi, b.lo, product.lo); 203 | #else 204 | product.lo = fma(a.lo, b.hi, product.lo); 205 | product.lo = fma(a.hi, b.lo, product.lo); 206 | #endif 207 | 208 | product.hi = efaddord(product.hi, product.lo, &product.lo); 209 | return product; 210 | } 211 | 212 | #if defined(__AVX__) && (defined(__FMA__) || defined(__FMA4__) || defined(__AVX2__)) 213 | 214 | typedef struct { 215 | __m128d hi; 216 | __m128d lo; 217 | } __m128dd; 218 | 219 | FPPLUS_STATIC_INLINE __m128dd _mm_setzero_pdd(void) { 220 | return (__m128dd) { _mm_setzero_pd(), _mm_setzero_pd() }; 221 | } 222 | 223 | FPPLUS_STATIC_INLINE __m128dd _mm_broadcast_sdd( 224 | const doubledouble FPPLUS_NONNULL_POINTER(pointer)) 225 | { 226 | return (__m128dd) { _mm_loaddup_pd(&pointer->hi), _mm_loaddup_pd(&pointer->lo) }; 227 | } 228 | 229 | FPPLUS_STATIC_INLINE __m128dd _mm_loaddeinterleave_pdd( 230 | const doubledouble FPPLUS_ARRAY_POINTER(pointer, 2)) 231 | { 232 | const __m128d number0 = _mm_load_pd(&pointer[0].hi); 233 | const __m128d number1 = _mm_load_pd(&pointer[1].hi); 234 | return (__m128dd) { 235 | _mm_unpacklo_pd(number0, number1), 236 | _mm_unpackhi_pd(number0, number1) 237 | }; 238 | } 239 | 240 | FPPLUS_STATIC_INLINE __m128dd _mm_loaddeinterleaveu_pdd( 241 | const doubledouble FPPLUS_ARRAY_POINTER(pointer, 2)) 242 | { 243 | const __m128d number0 = _mm_loadu_pd(&pointer[0].hi); 244 | const __m128d number1 = _mm_loadu_pd(&pointer[1].hi); 245 | return (__m128dd) { 246 | _mm_unpacklo_pd(number0, number1), 247 | _mm_unpackhi_pd(number0, number1) 248 | }; 249 | } 250 | 251 | FPPLUS_STATIC_INLINE __m128dd _mm_addl_sd(const __m128d a, const __m128d b) { 252 | __m128dd sum; 253 | sum.hi = _mm_efadd_sd(a, b, &sum.lo); 254 | return sum; 255 | } 256 | 257 | FPPLUS_STATIC_INLINE __m128dd _mm_addl_pd(const __m128d a, const __m128d b) { 258 | __m128dd sum; 259 | sum.hi = _mm_efadd_pd(a, b, &sum.lo); 260 | return sum; 261 | } 262 | 263 | FPPLUS_STATIC_INLINE __m128dd _mm_addw_sdd(const __m128dd a, const __m128d b) { 264 | __m128dd sum = _mm_addl_sd(a.lo, b); 265 | __m128d e; 266 | sum.hi = _mm_efadd_sd(a.hi, sum.hi, &e); 267 | sum.lo = _mm_add_sd(sum.lo, e); 268 | return sum; 269 | } 270 | 271 | FPPLUS_STATIC_INLINE __m128dd _mm_addw_pdd(const __m128dd a, const __m128d b) { 272 | __m128dd sum = _mm_addl_pd(a.lo, b); 273 | __m128d e; 274 | sum.hi = _mm_efadd_pd(a.hi, sum.hi, &e); 275 | sum.lo = _mm_add_pd(sum.lo, e); 276 | return sum; 277 | } 278 | 279 | FPPLUS_STATIC_INLINE __m128dd _mm_add_sdd(const __m128dd a, const __m128dd b) { 280 | const __m128dd s = _mm_addl_sd(a.hi, b.hi); 281 | const __m128dd t = _mm_addl_sd(a.lo, b.lo); 282 | __m128dd v; 283 | v.hi = _mm_efaddord_sd(s.hi, s.lo + t.hi, &v.lo); 284 | __m128dd z; 285 | z.hi = _mm_efaddord_sd(v.hi, t.lo + v.lo, &z.lo); 286 | return z; 287 | } 288 | 289 | FPPLUS_STATIC_INLINE __m128dd _mm_add_pdd(const __m128dd a, const __m128dd b) { 290 | const __m128dd s = _mm_addl_pd(a.hi, b.hi); 291 | const __m128dd t = _mm_addl_pd(a.lo, b.lo); 292 | __m128dd v; 293 | v.hi = _mm_efaddord_pd(s.hi, s.lo + t.hi, &v.lo); 294 | __m128dd z; 295 | z.hi = _mm_efaddord_pd(v.hi, t.lo + v.lo, &z.lo); 296 | return z; 297 | } 298 | 299 | FPPLUS_STATIC_INLINE __m128dd _mm_add_fast_sdd(const __m128dd a, const __m128dd b) { 300 | __m128dd sum = _mm_addl_sd(a.hi, b.hi); 301 | sum.lo += a.lo + b.lo; 302 | sum.hi = _mm_efaddord_sd(sum.hi, sum.lo, &sum.lo); 303 | return sum; 304 | } 305 | 306 | FPPLUS_STATIC_INLINE __m128dd _mm_add_fast_pdd(const __m128dd a, const __m128dd b) { 307 | __m128dd sum = _mm_addl_pd(a.hi, b.hi); 308 | sum.lo += a.lo + b.lo; 309 | sum.hi = _mm_efaddord_pd(sum.hi, sum.lo, &sum.lo); 310 | return sum; 311 | } 312 | 313 | FPPLUS_STATIC_INLINE __m128dd _mm_mull_sd(const __m128d a, const __m128d b) { 314 | __m128dd product; 315 | product.hi = _mm_efmul_sd(a, b, &product.lo); 316 | return product; 317 | } 318 | 319 | FPPLUS_STATIC_INLINE __m128dd _mm_mull_pd(const __m128d a, const __m128d b) { 320 | __m128dd product; 321 | product.hi = _mm_efmul_pd(a, b, &product.lo); 322 | return product; 323 | } 324 | 325 | FPPLUS_STATIC_INLINE __m128dd _mm_mul_sdd(const __m128dd a, const __m128dd b) { 326 | __m128dd product = _mm_mull_sd(a.hi, b.hi); 327 | #if defined(__FMA__) || defined(__AVX2__) 328 | product.lo = _mm_fmadd_sd(a.lo, b.hi, product.lo); 329 | product.lo = _mm_fmadd_sd(a.hi, b.lo, product.lo); 330 | #else 331 | product.lo = _mm_macc_sd(a.lo, b.hi, product.lo); 332 | product.lo = _mm_macc_sd(a.hi, b.lo, product.lo); 333 | #endif 334 | product.hi = _mm_efaddord_sd(product.hi, product.lo, &product.lo); 335 | return product; 336 | } 337 | 338 | FPPLUS_STATIC_INLINE __m128dd _mm_mul_pdd(const __m128dd a, const __m128dd b) { 339 | __m128dd product = _mm_mull_pd(a.hi, b.hi); 340 | #if defined(__FMA__) || defined(__AVX2__) 341 | product.lo = _mm_fmadd_pd(a.lo, b.hi, product.lo); 342 | product.lo = _mm_fmadd_pd(a.hi, b.lo, product.lo); 343 | #else 344 | product.lo = _mm_macc_pd(a.lo, b.hi, product.lo); 345 | product.lo = _mm_macc_pd(a.hi, b.lo, product.lo); 346 | #endif 347 | product.hi = _mm_efaddord_pd(product.hi, product.lo, &product.lo); 348 | return product; 349 | } 350 | 351 | FPPLUS_STATIC_INLINE doubledouble _mm_cvtsdd_f64dd(const __m128dd x) { 352 | return (doubledouble) { _mm_cvtsd_f64(x.hi), _mm_cvtsd_f64(x.lo) }; 353 | } 354 | 355 | FPPLUS_STATIC_INLINE doubledouble _mm_reduce_add_pdd(const __m128dd x) { 356 | const __m128dd x1 = { 357 | _mm_unpackhi_pd(x.hi, x.hi), 358 | _mm_unpackhi_pd(x.lo, x.lo) 359 | }; 360 | return _mm_cvtsdd_f64dd(_mm_add_sdd(x, x1)); 361 | } 362 | 363 | typedef struct { 364 | __m256d hi; 365 | __m256d lo; 366 | } __m256dd; 367 | 368 | FPPLUS_STATIC_INLINE __m256dd _mm256_setzero_pdd(void) { 369 | return (__m256dd) { _mm256_setzero_pd(), _mm256_setzero_pd() }; 370 | } 371 | 372 | FPPLUS_STATIC_INLINE __m256dd _mm256_broadcast_sdd( 373 | const doubledouble FPPLUS_NONNULL_POINTER(pointer)) 374 | { 375 | return (__m256dd) { _mm256_broadcast_sd(&pointer->hi), _mm256_broadcast_sd(&pointer->lo) }; 376 | } 377 | 378 | FPPLUS_STATIC_INLINE __m256dd _mm256_loaddeinterleave_pdd( 379 | const doubledouble FPPLUS_ARRAY_POINTER(pointer, 4)) 380 | { 381 | const __m256d numbers01 = _mm256_loadu_pd(&pointer[0].hi); 382 | const __m256d numbers23 = _mm256_loadu_pd(&pointer[2].hi); 383 | const __m256d numbers12 = _mm256_permute2f128_pd(numbers01, numbers23, 0x21); 384 | const __m256d numbers02 = _mm256_blend_pd(numbers01, numbers12, 0xC); 385 | const __m256d numbers13 = _mm256_blend_pd(numbers23, numbers12, 0x3); 386 | return (__m256dd) { 387 | _mm256_unpacklo_pd(numbers02, numbers13), 388 | _mm256_unpackhi_pd(numbers02, numbers13) 389 | }; 390 | } 391 | 392 | FPPLUS_STATIC_INLINE __m256dd _mm256_loaddeinterleaveu_pdd( 393 | const doubledouble FPPLUS_ARRAY_POINTER(pointer, 4)) 394 | { 395 | const __m256d numbers01 = _mm256_loadu_pd(&pointer[0].hi); 396 | const __m256d numbers23 = _mm256_loadu_pd(&pointer[2].hi); 397 | const __m256d numbers12 = _mm256_permute2f128_pd(numbers01, numbers23, 0x21); 398 | const __m256d numbers02 = _mm256_blend_pd(numbers01, numbers12, 0xC); 399 | const __m256d numbers13 = _mm256_blend_pd(numbers23, numbers12, 0x3); 400 | return (__m256dd) { 401 | _mm256_unpacklo_pd(numbers02, numbers13), 402 | _mm256_unpackhi_pd(numbers02, numbers13) 403 | }; 404 | } 405 | 406 | FPPLUS_STATIC_INLINE void _mm256_interleavestore_pdd( 407 | doubledouble FPPLUS_ARRAY_POINTER(pointer, 4), 408 | __m256dd numbers) 409 | { 410 | const __m256d numbers02 = _mm256_unpacklo_pd(numbers.lo, numbers.hi); 411 | const __m256d numbers13 = _mm256_unpackhi_pd(numbers.lo, numbers.hi); 412 | const __m256d numbers21 = _mm256_permute2f128_pd(numbers02, numbers13, 0x21); 413 | const __m256d numbers01 = _mm256_blend_pd(numbers02, numbers21, 0xC); 414 | const __m256d numbers23 = _mm256_blend_pd(numbers13, numbers21, 0x3); 415 | _mm256_store_pd(&pointer[0].hi, numbers01); 416 | _mm256_store_pd(&pointer[2].hi, numbers23); 417 | } 418 | 419 | FPPLUS_STATIC_INLINE void _mm256_interleavestoreu_pdd( 420 | doubledouble FPPLUS_ARRAY_POINTER(pointer, 4), 421 | __m256dd numbers) 422 | { 423 | const __m256d numbers02 = _mm256_unpacklo_pd(numbers.lo, numbers.hi); 424 | const __m256d numbers13 = _mm256_unpackhi_pd(numbers.lo, numbers.hi); 425 | const __m256d numbers21 = _mm256_permute2f128_pd(numbers02, numbers13, 0x21); 426 | const __m256d numbers01 = _mm256_blend_pd(numbers02, numbers21, 0xC); 427 | const __m256d numbers23 = _mm256_blend_pd(numbers13, numbers21, 0x3); 428 | _mm256_storeu_pd(&pointer[0].hi, numbers01); 429 | _mm256_storeu_pd(&pointer[2].hi, numbers23); 430 | } 431 | 432 | FPPLUS_STATIC_INLINE __m256dd _mm256_addl_pd(const __m256d a, const __m256d b) { 433 | __m256dd sum; 434 | sum.hi = _mm256_efadd_pd(a, b, &sum.lo); 435 | return sum; 436 | } 437 | 438 | FPPLUS_STATIC_INLINE __m256dd _mm256_addw_pdd(const __m256dd a, const __m256d b) { 439 | __m256dd sum = _mm256_addl_pd(a.lo, b); 440 | __m256d e; 441 | sum.hi = _mm256_efadd_pd(a.hi, sum.hi, &e); 442 | sum.lo = _mm256_add_pd(sum.lo, e); 443 | return sum; 444 | } 445 | 446 | FPPLUS_STATIC_INLINE __m256dd _mm256_add_pdd(const __m256dd a, const __m256dd b) { 447 | const __m256dd s = _mm256_addl_pd(a.hi, b.hi); 448 | const __m256dd t = _mm256_addl_pd(a.lo, b.lo); 449 | __m256dd v; 450 | v.hi = _mm256_efaddord_pd(s.hi, s.lo + t.hi, &v.lo); 451 | __m256dd z; 452 | z.hi = _mm256_efaddord_pd(v.hi, t.lo + v.lo, &z.lo); 453 | return z; 454 | } 455 | 456 | FPPLUS_STATIC_INLINE __m256dd _mm256_add_fast_pdd(const __m256dd a, const __m256dd b) { 457 | __m256dd sum = _mm256_addl_pd(a.hi, b.hi); 458 | sum.lo += a.lo + b.lo; 459 | sum.hi = _mm256_efaddord_pd(sum.hi, sum.lo, &sum.lo); 460 | return sum; 461 | } 462 | 463 | FPPLUS_STATIC_INLINE __m256dd _mm256_mull_pd(const __m256d a, const __m256d b) { 464 | __m256dd product; 465 | product.hi = _mm256_efmul_pd(a, b, &product.lo); 466 | return product; 467 | } 468 | 469 | FPPLUS_STATIC_INLINE __m256dd _mm256_mul_pdd(const __m256dd a, const __m256dd b) { 470 | __m256dd product = _mm256_mull_pd(a.hi, b.hi); 471 | #if defined(__FMA__) || defined(__AVX2__) 472 | product.lo = _mm256_fmadd_pd(a.lo, b.hi, product.lo); 473 | product.lo = _mm256_fmadd_pd(a.hi, b.lo, product.lo); 474 | #else 475 | product.lo = _mm256_macc_pd(a.lo, b.hi, product.lo); 476 | product.lo = _mm256_macc_pd(a.hi, b.lo, product.lo); 477 | #endif 478 | product.hi = _mm256_efaddord_pd(product.hi, product.lo, &product.lo); 479 | return product; 480 | } 481 | 482 | FPPLUS_STATIC_INLINE doubledouble _mm256_reduce_add_pdd(const __m256dd x) { 483 | const __m128dd x01 = { 484 | _mm256_castpd256_pd128(x.hi), 485 | _mm256_castpd256_pd128(x.lo) 486 | }; 487 | const __m128dd x23 = { 488 | _mm256_extractf128_pd(x.hi, 1), 489 | _mm256_extractf128_pd(x.lo, 1) 490 | }; 491 | return _mm_reduce_add_pdd(_mm_add_pdd(x01, x23)); 492 | } 493 | 494 | #endif /* AVX */ 495 | 496 | #if defined(__AVX512F__) || defined(__KNC__) 497 | 498 | typedef struct { 499 | __m512d hi; 500 | __m512d lo; 501 | } __m512dd; 502 | 503 | FPPLUS_STATIC_INLINE __m512dd _mm512_setzero_pdd(void) { 504 | return (__m512dd) { _mm512_setzero_pd(), _mm512_setzero_pd() }; 505 | } 506 | 507 | FPPLUS_STATIC_INLINE __m512dd _mm512_broadcast_sdd( 508 | const doubledouble FPPLUS_ARRAY_POINTER(pointer, 8)) 509 | { 510 | return (__m512dd) { 511 | _mm512_extload_pd(&pointer->hi, _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE), 512 | _mm512_extload_pd(&pointer->lo, _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE), 513 | }; 514 | } 515 | 516 | FPPLUS_STATIC_INLINE __m512dd _mm512_loaddeinterleave_pdd( 517 | const doubledouble FPPLUS_ARRAY_POINTER(pointer, 8)) 518 | { 519 | const __m512d numbers0123 = _mm512_load_pd(&pointer[0].hi); 520 | const __m512d numbers4567 = _mm512_load_pd(&pointer[4].hi); 521 | const __mmask16 mask_lo = _mm512_int2mask(0xAAAA); 522 | const __mmask16 mask_hi = _mm512_knot(mask_hi); 523 | const __m512d hi04152637 = _mm512_mask_swizzle_pd(numbers0123, mask_lo, numbers4567, _MM_SWIZ_REG_CDAB); 524 | const __m512d lo04152637 = _mm512_mask_swizzle_pd(numbers4567, mask_hi, numbers0123, _MM_SWIZ_REG_CDAB); 525 | const __m512i mask_shuffle = _mm512_setr_epi32(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); 526 | const __m512d hi01234567 = _mm512_castsi512_pd(_mm512_permutevar_epi32(mask_shuffle, _mm512_castpd_si512(hi04152637))); 527 | const __m512d lo01234567 = _mm512_castsi512_pd(_mm512_permutevar_epi32(mask_shuffle, _mm512_castpd_si512(lo04152637))); 528 | return (__m512dd) { hi01234567, lo01234567 }; 529 | } 530 | 531 | FPPLUS_STATIC_INLINE __m512dd _mm512_loaddeinterleaveu_pdd( 532 | const doubledouble FPPLUS_ARRAY_POINTER(pointer, 8)) 533 | { 534 | const __m512i index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, 0, 16, 32, 48, 64, 80, 96, 112); 535 | return (__m512dd) { 536 | _mm512_i32loextgather_pd(index, &pointer->hi, _MM_UPCONV_PD_NONE, 1, _MM_HINT_NONE), 537 | _mm512_i32loextgather_pd(index, &pointer->lo, _MM_UPCONV_PD_NONE, 1, _MM_HINT_NONE) 538 | }; 539 | } 540 | 541 | FPPLUS_STATIC_INLINE void _mm512_interleavestore_pdd( 542 | doubledouble FPPLUS_ARRAY_POINTER(pointer, 8), 543 | __m512dd numbers) 544 | { 545 | const __m512i mask_shuffle = _mm512_setr_epi32(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15); 546 | const __m512d hi04152637 = _mm512_castsi512_pd(_mm512_permutevar_epi32(mask_shuffle, _mm512_castpd_si512(numbers.hi))); 547 | const __m512d lo04152637 = _mm512_castsi512_pd(_mm512_permutevar_epi32(mask_shuffle, _mm512_castpd_si512(numbers.lo))); 548 | 549 | const __mmask16 mask_lo = _mm512_int2mask(0xAAAA); 550 | const __mmask16 mask_hi = _mm512_knot(mask_hi); 551 | const __m512d numbers0123 = _mm512_mask_swizzle_pd(hi04152637, mask_lo, lo04152637, _MM_SWIZ_REG_CDAB); 552 | const __m512d numbers4567 = _mm512_mask_swizzle_pd(lo04152637, mask_hi, hi04152637, _MM_SWIZ_REG_CDAB); 553 | _mm512_store_pd(&pointer[0].hi, numbers0123); 554 | _mm512_store_pd(&pointer[4].hi, numbers4567); 555 | } 556 | 557 | FPPLUS_STATIC_INLINE void _mm512_interleavestoreu_pdd( 558 | doubledouble FPPLUS_ARRAY_POINTER(pointer, 8), 559 | __m512dd numbers) 560 | { 561 | const __m512i index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, 0, 16, 32, 48, 64, 80, 96, 112); 562 | _mm512_i32loextscatter_pd(&pointer->hi, index, numbers.hi, _MM_DOWNCONV_PD_NONE, 1, _MM_HINT_NONE); 563 | _mm512_i32loextscatter_pd(&pointer->lo, index, numbers.lo, _MM_DOWNCONV_PD_NONE, 1, _MM_HINT_NONE); 564 | } 565 | 566 | FPPLUS_STATIC_INLINE __m512dd _mm512_addl_pd(const __m512d a, const __m512d b) { 567 | __m512dd sum; 568 | sum.hi = _mm512_efadd_pd(a, b, &sum.lo); 569 | return sum; 570 | } 571 | 572 | FPPLUS_STATIC_INLINE __m512dd _mm512_addw_pdd(const __m512dd a, const __m512d b) { 573 | __m512dd sum = _mm512_addl_pd(a.lo, b); 574 | __m512d e; 575 | sum.hi = _mm512_efadd_pd(a.hi, sum.hi, &e); 576 | sum.lo = _mm512_add_round_pd(sum.lo, e, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 577 | return sum; 578 | } 579 | 580 | FPPLUS_STATIC_INLINE __m512dd _mm512_add_pdd(const __m512dd a, const __m512dd b) { 581 | const __m512dd s = _mm512_addl_pd(a.hi, b.hi); 582 | const __m512dd t = _mm512_addl_pd(a.lo, b.lo); 583 | __m512dd v; 584 | v.hi = _mm512_efaddord_pd(s.hi, _mm512_add_round_pd(s.lo, t.hi, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), &v.lo); 585 | __m512dd z; 586 | z.hi = _mm512_efaddord_pd(v.hi, _mm512_add_round_pd(t.lo, v.lo, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), &z.lo); 587 | return z; 588 | } 589 | 590 | FPPLUS_STATIC_INLINE __m512dd _mm512_add_fast_pdd(const __m512dd a, const __m512dd b) { 591 | __m512dd sum = _mm512_addl_pd(a.hi, b.hi); 592 | sum.lo = _mm512_add_round_pd(sum.lo, _mm512_add_round_pd(a.lo, b.lo, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 593 | sum.hi = _mm512_efaddord_pd(sum.hi, sum.lo, &sum.lo); 594 | return sum; 595 | } 596 | 597 | FPPLUS_STATIC_INLINE __m512dd _mm512_mull_pd(const __m512d a, const __m512d b) { 598 | __m512dd product; 599 | product.hi = _mm512_efmul_pd(a, b, &product.lo); 600 | return product; 601 | } 602 | 603 | FPPLUS_STATIC_INLINE __m512dd _mm512_mul_pdd(const __m512dd a, const __m512dd b) { 604 | __m512dd product = _mm512_mull_pd(a.hi, b.hi); 605 | product.lo = _mm512_fmadd_round_pd(a.lo, b.hi, product.lo, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 606 | product.lo = _mm512_fmadd_round_pd(a.hi, b.lo, product.lo, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 607 | product.hi = _mm512_efaddord_pd(product.hi, product.lo, &product.lo); 608 | return product; 609 | } 610 | 611 | FPPLUS_STATIC_INLINE doubledouble _mm512_reduce_add_pdd(const __m512dd x) { 612 | const __m512dd x01234567 = x; 613 | const __m512dd x45670123 = { 614 | _mm512_castps_pd(_mm512_permute4f128_ps(_mm512_castpd_ps(x01234567.hi), _MM_PERM_BADC)), 615 | _mm512_castps_pd(_mm512_permute4f128_ps(_mm512_castpd_ps(x01234567.lo), _MM_PERM_BADC)) 616 | }; 617 | 618 | const __m512dd y0123 = _mm512_add_pdd(x01234567, x45670123); 619 | const __m512dd y2301 = { 620 | _mm512_swizzle_pd(y0123.hi, _MM_SWIZ_REG_BADC), 621 | _mm512_swizzle_pd(y0123.lo, _MM_SWIZ_REG_BADC) 622 | }; 623 | 624 | 625 | const __m512dd z01 = _mm512_add_pdd(y0123, y2301); 626 | const __m512dd z10 = { 627 | _mm512_swizzle_pd(z01.hi, _MM_SWIZ_REG_CDAB), 628 | _mm512_swizzle_pd(z01.lo, _MM_SWIZ_REG_CDAB), 629 | }; 630 | 631 | const __m512dd r = _mm512_add_pdd(z01, z10); 632 | 633 | union { 634 | __m512d as_vector; 635 | double as_scalar; 636 | } hi, lo; 637 | 638 | hi.as_vector = r.hi; 639 | lo.as_vector = r.lo; 640 | return (doubledouble) { hi.as_scalar, lo.as_scalar }; 641 | } 642 | 643 | #endif /* Intel KNC or AVX-512 */ 644 | 645 | #endif /* FPPLUS_DD_H */ 646 | -------------------------------------------------------------------------------- /include/fpplus/eft.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef FPPLUS_EFT_H 3 | #define FPPLUS_EFT_H 4 | 5 | #include 6 | 7 | /** 8 | * @defgroup EFT Error-free transforms 9 | */ 10 | 11 | /** 12 | * @ingroup EFT 13 | * @brief Error-free addition. 14 | * @details Computes @a s and @a e such that 15 | * - s = a + b rounded to the nearest double 16 | * - a + b = s + e exactly 17 | * 18 | * The algorith is due to @cite Knuth1997. Implementation follows @cite Shewchuk1997, Theorem 7. 19 | * 20 | * @par Computational complexity 21 | * 22 | * 23 | * 24 | * 25 | *
OperationCount (default ISA)Count (with ADD3)
FP ADD61
FP ADD31
26 | * 27 | * @param[in] a - addend, the first floating-point number to be added. 28 | * @param[in] b - augend, the second floating-point number to be added. 29 | * @param[out] e - the roundoff error in floating-point addition. 30 | * @return The sum @a s of @a and @b rounded to the nearest double-precision number (result of normal floating-point addition). 31 | * 32 | * @post @f$ s = \circ(a + b) @f$ 33 | * @post @f$ s + e = a + b @f$ 34 | */ 35 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 36 | double efadd( 37 | double a, 38 | double b, 39 | double FPPLUS_NONNULL_POINTER(e)) 40 | { 41 | #if defined(__CUDA_ARCH__) 42 | /* CUDA-specific version */ 43 | const double sum = __dadd_rn(a, b); 44 | #if FPPLUS_USE_FPADDRE == 0 45 | const double b_virtual = __dsub_rn(sum, a); 46 | const double a_virtual = __dsub_rn(sum, b_virtual); 47 | const double b_roundoff = __dsub_rn(b, b_virtual); 48 | const double a_roundoff = __dsub_rn(a, a_virtual); 49 | *e = __dadd_rn(a_roundoff, b_roundoff); 50 | #else 51 | *e = addre(a, b); 52 | #endif 53 | /* End of CUDA-specific version */ 54 | #else 55 | /* Generic version */ 56 | const double sum = a + b; 57 | #if FPPLUS_USE_FPADDRE == 0 58 | const double b_virtual = sum - a; 59 | const double a_virtual = sum - b_virtual; 60 | const double b_roundoff = b - b_virtual; 61 | const double a_roundoff = a - a_virtual; 62 | *e = a_roundoff + b_roundoff; 63 | #else 64 | *e = addre(a, b); 65 | #endif 66 | /* End of generic version */ 67 | #endif 68 | return sum; 69 | } 70 | 71 | /** 72 | * @ingroup EFT 73 | * @brief Fast error-free addition of ordered, in magnitude, values. 74 | * @details Computes @a s and @p e such that 75 | * - s = a + b rounded to the nearest double 76 | * - a + b = s + e exactly 77 | * 78 | * The algorith is due to @cite Dekker1971. Implementation follows @cite Shewchuk1997, Theorem 6. 79 | * 80 | * @par Computational complexity 81 | * 82 | * 83 | * 84 | * 85 | *
OperationCount (default ISA)Count (with ADD3)
FP ADD31
FP ADD31
86 | * 87 | * @param[in] a - addend, the first floating-point number to be added. Must be not smaller in magnitude than @p b. 88 | * @param[in] b - augend, the second floating-point number to be added. Must be not larger in magnitude than @p a. 89 | * @param[out] e - the roundoff error in floating-point addition. 90 | * @return The sum @a s of @p a and @p b rounded to the nearest double-precision number (result of normal floating-point addition). 91 | * 92 | * @pre @f$ |a| >= |b| @f$ 93 | * @post @f$ s = \circ(a + b) @f$ 94 | * @post @f$ s + e = a + b @f$ 95 | */ 96 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 97 | double efaddord( 98 | double a, 99 | double b, 100 | double FPPLUS_NONNULL_POINTER(e)) 101 | { 102 | #if defined(__CUDA_ARCH__) 103 | /* CUDA-specific version */ 104 | const double sum = __dadd_rn(a, b); 105 | #if FPPLUS_USE_FPADDRE == 0 106 | const double b_virtual = __dsub_rn(sum, a); 107 | *e = __dsub_rn(b, b_virtual); 108 | #else 109 | *e = addre(a, b); 110 | #endif 111 | /* End of CUDA-specific version */ 112 | #else 113 | /* Generic version */ 114 | const double sum = a + b; 115 | #if FPPLUS_USE_FPADDRE == 0 116 | const double b_virtual = sum - a; 117 | *e = b - b_virtual; 118 | #else 119 | *e = addre(a, b); 120 | #endif 121 | /* End of generic version */ 122 | #endif 123 | return sum; 124 | } 125 | 126 | /** 127 | * @ingroup EFT 128 | * @brief Error-free multiplication. 129 | * @details Computes @a p and @p e such that 130 | * - p = a * b rounded to the nearest double 131 | * - a * b = p + e exactly 132 | * 133 | * The implementation follows @cite QD2000, Algorithm 7. 134 | * 135 | * @par Computational complexity 136 | * 137 | * 138 | * 139 | * 140 | *
OperationCount
FP MUL1
FP FMA1
141 | * 142 | * @param[in] a - multiplicand, the first floating-point number to be multiplied. 143 | * @param[in] b - multiplier, the second floating-point number to be multiplied. 144 | * @param[out] e - the roundoff error in floating-point multiplication. 145 | * @return The product @a p of @p a and @p b rounded to the nearest double-precision number (result of normal floating-point multiplication). 146 | * 147 | * @post @f$ p = \circ(a \times b) @f$ 148 | * @post @f$ p + e = a \times b @f$ 149 | */ 150 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 151 | double efmul( 152 | double a, 153 | double b, 154 | double FPPLUS_NONNULL_POINTER(e)) 155 | { 156 | #if defined(__CUDA_ARCH__) 157 | /* CUDA-specific version */ 158 | const double product = __dmul_rn(a, b); 159 | *e = __fma_rn(a, b, -product); 160 | /* End of CUDA-specific version */ 161 | #else 162 | /* Generic version */ 163 | const double product = a * b; 164 | #if defined(__GNUC__) 165 | *e = __builtin_fma(a, b, -product); 166 | #else 167 | *e = fma(a, b, -product); 168 | #endif 169 | /* End of generic version */ 170 | #endif 171 | return product; 172 | } 173 | 174 | /** 175 | * @ingroup EFT 176 | * @brief Error-free fused multiply-add. 177 | * @details Computes @a m, @p e_high and @p e_low such that 178 | * - m = a * b + c rounded to the nearest double 179 | * - a * b + c = m + e_high + e_low exactly 180 | * - |e_high + e_low| <= 0.5 ulp(m) 181 | * 182 | * The implementation follows @cite BoldoMuller2005, Property 11 with the second enhancement in Section 5.4. 183 | * 184 | * @note The mantissa bits in e_high and e_low may overlap. If you want normalized error, i.e. |e_low| <= 0.5 ulp(u_high), add 185 | * e_high = efaddord(e_high, e_low, &e_low) 186 | * after a call to effma. 187 | * 188 | * @par Computational complexity 189 | * 190 | * 191 | * 192 | * 193 | * 194 | *
OperationCount
FP ADD13
FP MUL1
FP FMA2
195 | * 196 | * @param[in] a - multiplicand, the first floating-point number to be multiplied. 197 | * @param[in] b - multiplier, the second floating-point number to be multiplied. 198 | * @param[in] c - augend, the floating-point number to be added to the intermediate product. 199 | * @param[out] e_high - the high part of the roundoff error in floating-point fused multiply-add operation. 200 | * @param[out] e_low - the low part of the roundoff error in floating-point fused multiply-add operation. 201 | * @return The result @a m of @p a * @p b + @p c rounded to the nearest double-precision number (result of normal fused multiply-add). 202 | * 203 | * @post @f$ m = \circ(a \times b + c) @f$ 204 | * @post @f$ m + e_{high} + e_{low} = a \times b + c @f$ 205 | * @post @f$ | e_{high} + e_{low} | \leq = \frac{1}{2} \mathrm{ulp}(m) @f$ 206 | */ 207 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 208 | double effma( 209 | double a, 210 | double b, 211 | double c, 212 | double FPPLUS_NONNULL_POINTER(e_high), 213 | double FPPLUS_NONNULL_POINTER(e_low)) 214 | { 215 | #if defined(__CUDA_ARCH__) 216 | /* CUDA-specific version */ 217 | const double mac = __fma_rn(a, b, c); 218 | double u2; 219 | const double u1 = efmul(a, b, &u2); 220 | const double alpha1 = efadd(c, u2, e_low); 221 | double beta2; 222 | const double beta1 = efadd(u1, alpha1, &beta2); 223 | *e_high = __dadd_rn(__dsub_rn(beta1, mac), beta2); 224 | /* End of CUDA-specific version */ 225 | #else 226 | /* Generic version */ 227 | #if defined(__GNUC__) 228 | const double mac = __builtin_fma(a, b, c); 229 | #else 230 | const double mac = fma(a, b, c); 231 | #endif 232 | double u2; 233 | const double u1 = efmul(a, b, &u2); 234 | const double alpha1 = efadd(c, u2, e_low); 235 | double beta2; 236 | const double beta1 = efadd(u1, alpha1, &beta2); 237 | *e_high = (beta1 - mac) + beta2; 238 | /* End of generic version */ 239 | #endif 240 | return mac; 241 | } 242 | 243 | #if defined(__AVX__) && (defined(__FMA__) || defined(__FMA4__) || defined(__AVX2__)) 244 | 245 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 246 | __m128d _mm_efadd_sd( 247 | __m128d a, 248 | __m128d b, 249 | __m128d FPPLUS_NONNULL_POINTER(e)) 250 | { 251 | const __m128d sum = _mm_add_sd(a, b); 252 | #if FPPLUS_USE_FPADDRE == 0 253 | const __m128d b_virtual = _mm_sub_sd(sum, a); 254 | const __m128d a_virtual = _mm_sub_sd(sum, b_virtual); 255 | const __m128d b_roundoff = _mm_sub_sd(b, b_virtual); 256 | const __m128d a_roundoff = _mm_sub_sd(a, a_virtual); 257 | *e = _mm_add_sd(a_roundoff, b_roundoff); 258 | #else 259 | *e = _mm_addre_sd(a, b); 260 | #endif 261 | return sum; 262 | } 263 | 264 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 265 | __m128d _mm_efadd_pd( 266 | __m128d a, 267 | __m128d b, 268 | __m128d FPPLUS_NONNULL_POINTER(e)) 269 | { 270 | const __m128d sum = _mm_add_pd(a, b); 271 | #if FPPLUS_USE_FPADDRE == 0 272 | const __m128d b_virtual = _mm_sub_pd(sum, a); 273 | const __m128d a_virtual = _mm_sub_pd(sum, b_virtual); 274 | const __m128d b_roundoff = _mm_sub_pd(b, b_virtual); 275 | const __m128d a_roundoff = _mm_sub_pd(a, a_virtual); 276 | *e = _mm_add_pd(a_roundoff, b_roundoff); 277 | #else 278 | *e = _mm_addre_pd(a, b); 279 | #endif 280 | return sum; 281 | } 282 | 283 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 284 | __m128d _mm_efaddord_sd( 285 | __m128d a, 286 | __m128d b, 287 | __m128d FPPLUS_NONNULL_POINTER(e)) 288 | { 289 | const __m128d sum = _mm_add_sd(a, b); 290 | #if FPPLUS_USE_FPADDRE == 0 291 | const __m128d b_virtual = _mm_sub_sd(sum, a); 292 | *e = _mm_sub_sd(b, b_virtual); 293 | #else 294 | *e = _mm_addre_sd(a, b); 295 | #endif 296 | return sum; 297 | } 298 | 299 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 300 | __m128d _mm_efaddord_pd( 301 | __m128d a, 302 | __m128d b, 303 | __m128d FPPLUS_NONNULL_POINTER(e)) 304 | { 305 | const __m128d sum = _mm_add_pd(a, b); 306 | #if FPPLUS_USE_FPADDRE == 0 307 | const __m128d b_virtual = _mm_sub_pd(sum, a); 308 | *e = _mm_sub_pd(b, b_virtual); 309 | #else 310 | *e = _mm_addre_pd(a, b); 311 | #endif 312 | return sum; 313 | } 314 | 315 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 316 | __m128d _mm_efmul_sd( 317 | __m128d a, 318 | __m128d b, 319 | __m128d FPPLUS_NONNULL_POINTER(e)) 320 | { 321 | const __m128d product = _mm_mul_sd(a, b); 322 | #if defined(__FMA__) || defined(__AVX2__) 323 | *e = _mm_fmsub_sd(a, b, product); 324 | #else 325 | *e = _mm_msub_sd(a, b, product); 326 | #endif 327 | return product; 328 | } 329 | 330 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 331 | __m128d _mm_efmul_pd( 332 | __m128d a, 333 | __m128d b, 334 | __m128d FPPLUS_NONNULL_POINTER(e)) 335 | { 336 | const __m128d product = _mm_mul_pd(a, b); 337 | #if defined(__FMA__) || defined(__AVX2__) 338 | *e = _mm_fmsub_pd(a, b, product); 339 | #else 340 | *e = _mm_msub_pd(a, b, product); 341 | #endif 342 | return product; 343 | } 344 | 345 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 346 | __m128d _mm_effma_pd( 347 | __m128d a, 348 | __m128d b, 349 | __m128d c, 350 | __m128d FPPLUS_NONNULL_POINTER(e_high), 351 | __m128d FPPLUS_NONNULL_POINTER(e_low)) 352 | { 353 | #if defined(__FMA__) || defined(__AVX2__) 354 | const __m128d mac = _mm_fmadd_pd(a, b, c); 355 | #else 356 | const __m128d mac = _mm_macc_pd(a, b, c); 357 | #endif 358 | __m128d u2; 359 | const __m128d u1 = _mm_efmul_pd(a, b, &u2); 360 | const __m128d alpha1 = _mm_efadd_pd(c, u2, e_low); 361 | __m128d beta2; 362 | const __m128d beta1 = _mm_efadd_pd(u1, alpha1, &beta2); 363 | *e_high = _mm_add_pd(_mm_sub_pd(beta1, mac), beta2); 364 | return mac; 365 | } 366 | 367 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 368 | __m256d _mm256_efadd_pd( 369 | __m256d a, 370 | __m256d b, 371 | __m256d FPPLUS_NONNULL_POINTER(e)) 372 | { 373 | const __m256d sum = _mm256_add_pd(a, b); 374 | #if FPPLUS_USE_FPADDRE == 0 375 | const __m256d b_virtual = _mm256_sub_pd(sum, a); 376 | const __m256d a_virtual = _mm256_sub_pd(sum, b_virtual); 377 | const __m256d b_roundoff = _mm256_sub_pd(b, b_virtual); 378 | const __m256d a_roundoff = _mm256_sub_pd(a, a_virtual); 379 | *e = _mm256_add_pd(a_roundoff, b_roundoff); 380 | #else 381 | *e = _mm256_addre_pd(a, b); 382 | #endif 383 | return sum; 384 | } 385 | 386 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 387 | __m256d _mm256_efaddord_pd( 388 | __m256d a, 389 | __m256d b, 390 | __m256d FPPLUS_NONNULL_POINTER(e)) 391 | { 392 | const __m256d sum = _mm256_add_pd(a, b); 393 | #if FPPLUS_USE_FPADDRE == 0 394 | const __m256d b_virtual = _mm256_sub_pd(sum, a); 395 | *e = _mm256_sub_pd(b, b_virtual); 396 | #else 397 | *e = _mm256_addre_pd(a, b); 398 | #endif 399 | return sum; 400 | } 401 | 402 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 403 | __m256d _mm256_efmul_pd( 404 | __m256d a, 405 | __m256d b, 406 | __m256d FPPLUS_NONNULL_POINTER(e)) 407 | { 408 | const __m256d product = _mm256_mul_pd(a, b); 409 | #if defined(__FMA__) || defined(__AVX2__) 410 | *e = _mm256_fmsub_pd(a, b, product); 411 | #else 412 | *e = _mm256_msub_pd(a, b, product); 413 | #endif 414 | return product; 415 | } 416 | 417 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 418 | __m256d _mm256_effma_pd( 419 | __m256d a, 420 | __m256d b, 421 | __m256d c, 422 | __m256d FPPLUS_NONNULL_POINTER(e_high), 423 | __m256d FPPLUS_NONNULL_POINTER(e_low)) 424 | { 425 | #if defined(__FMA__) || defined(__AVX2__) 426 | const __m256d mac = _mm256_fmadd_pd(a, b, c); 427 | #else 428 | const __m256d mac = _mm256_macc_pd(a, b, c); 429 | #endif 430 | __m256d u2; 431 | const __m256d u1 = _mm256_efmul_pd(a, b, &u2); 432 | const __m256d alpha1 = _mm256_efadd_pd(c, u2, e_low); 433 | __m256d beta2; 434 | const __m256d beta1 = _mm256_efadd_pd(u1, alpha1, &beta2); 435 | *e_high = _mm256_add_pd(_mm256_sub_pd(beta1, mac), beta2); 436 | return mac; 437 | } 438 | 439 | #endif /* AVX */ 440 | 441 | #if defined(__AVX512F__) || defined(__KNC__) 442 | 443 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 444 | __m512d _mm512_efadd_pd( 445 | __m512d a, 446 | __m512d b, 447 | __m512d FPPLUS_NONNULL_POINTER(e)) 448 | { 449 | const __m512d sum = _mm512_add_round_pd(a, b, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); 450 | #if FPPLUS_USE_FPADDRE == 0 451 | const __m512d b_virtual = _mm512_sub_round_pd(sum, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 452 | const __m512d a_virtual = _mm512_sub_round_pd(sum, b_virtual, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 453 | const __m512d b_roundoff = _mm512_sub_round_pd(b, b_virtual, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 454 | const __m512d a_roundoff = _mm512_sub_round_pd(a, a_virtual, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 455 | *e = _mm512_add_round_pd(a_roundoff, b_roundoff, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 456 | #else 457 | *e = _mm512_addre_pd(a, b); 458 | #endif 459 | return sum; 460 | } 461 | 462 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 463 | __m512d _mm512_efaddord_pd( 464 | __m512d a, 465 | __m512d b, 466 | __m512d FPPLUS_NONNULL_POINTER(e)) 467 | { 468 | const __m512d sum = _mm512_add_round_pd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 469 | #if FPPLUS_USE_FPADDRE == 0 470 | const __m512d b_virtual = _mm512_sub_round_pd(sum, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 471 | *e = _mm512_sub_round_pd(b, b_virtual, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 472 | #else 473 | *e = _mm512_addre_pd(a, b); 474 | #endif 475 | return sum; 476 | } 477 | 478 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 479 | __m512d _mm512_efmul_pd( 480 | __m512d a, 481 | __m512d b, 482 | __m512d FPPLUS_NONNULL_POINTER(e)) 483 | { 484 | const __m512d product = _mm512_mul_round_pd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 485 | *e = _mm512_fmsub_round_pd(a, b, product, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 486 | return product; 487 | } 488 | 489 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 490 | __m512d _mm512_effma_pd( 491 | __m512d a, 492 | __m512d b, 493 | __m512d c, 494 | __m512d FPPLUS_NONNULL_POINTER(e_high), 495 | __m512d FPPLUS_NONNULL_POINTER(e_low)) 496 | { 497 | const __m512d mac = _mm512_fmadd_round_pd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 498 | __m512d u2; 499 | const __m512d u1 = _mm512_efmul_pd(a, b, &u2); 500 | const __m512d alpha1 = _mm512_efadd_pd(c, u2, e_low); 501 | __m512d beta2; 502 | const __m512d beta1 = _mm512_efadd_pd(u1, alpha1, &beta2); 503 | *e_high = _mm512_add_round_pd( 504 | _mm512_sub_round_pd(beta1, mac, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 505 | beta2, 506 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 507 | return mac; 508 | } 509 | 510 | #endif /* Intel KNC or AVX-512 */ 511 | 512 | #if defined(__bgq__) 513 | 514 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 515 | vector4double vec_efadd( 516 | vector4double a, 517 | vector4double b, 518 | vector4double FPPLUS_NONNULL_POINTER(e)) 519 | { 520 | const vector4double sum = vec_add(a, b); 521 | #if FPPLUS_USE_FPADDRE == 0 522 | const vector4double b_virtual = vec_sub(sum, a); 523 | const vector4double a_virtual = vec_sub(sum, b_virtual); 524 | const vector4double b_roundoff = vec_sub(b, b_virtual); 525 | const vector4double a_roundoff = vec_sub(a, a_virtual); 526 | *e = vec_add(a_roundoff, b_roundoff); 527 | #else 528 | *e = vec_addre(a, b); 529 | #endif 530 | return sum; 531 | } 532 | 533 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 534 | vector4double vec_efaddord( 535 | vector4double a, 536 | vector4double b, 537 | vector4double FPPLUS_NONNULL_POINTER(e)) 538 | { 539 | const vector4double sum = vec_add(a, b); 540 | #if FPPLUS_USE_FPADDRE == 0 541 | const vector4double b_virtual = vec_sub(sum, a); 542 | *e = vec_sub(b, b_virtual); 543 | #else 544 | *e = vec_addre(a, b); 545 | #endif 546 | return sum; 547 | } 548 | 549 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 550 | vector4double vec_efmul( 551 | vector4double a, 552 | vector4double b, 553 | vector4double FPPLUS_NONNULL_POINTER(e)) 554 | { 555 | const vector4double product = vec_mul(a, b); 556 | *e = vec_msub(a, b, product); 557 | return product; 558 | } 559 | 560 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 561 | vector4double vec_effma( 562 | vector4double a, 563 | vector4double b, 564 | vector4double c, 565 | vector4double FPPLUS_NONNULL_POINTER(e_high), 566 | vector4double FPPLUS_NONNULL_POINTER(e_low)) 567 | { 568 | const vector4double mac = vec_madd(a, b, c); 569 | vector4double u2; 570 | const vector4double u1 = vec_efmul(a, b, &u2); 571 | const vector4double alpha1 = vec_efadd(c, u2, e_low); 572 | vector4double beta2; 573 | const vector4double beta1 = vec_efadd(u1, alpha1, &beta2); 574 | *e_high = vec_add(vec_sub(beta1, mac), beta2); 575 | return mac; 576 | } 577 | 578 | #endif /* Blue Gene/Q */ 579 | 580 | #if defined(__VSX__) 581 | 582 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 583 | __vector double vec_efadd( 584 | __vector double a, 585 | __vector double b, 586 | __vector double FPPLUS_NONNULL_POINTER(e)) 587 | { 588 | const __vector double sum = vec_add(a, b); 589 | #ifndef FPPLUS_EMULATE_FPADDRE 590 | const __vector double b_virtual = vec_sub(sum, a); 591 | const __vector double a_virtual = vec_sub(sum, b_virtual); 592 | const __vector double b_roundoff = vec_sub(b, b_virtual); 593 | const __vector double a_roundoff = vec_sub(a, a_virtual); 594 | *e = vec_add(a_roundoff, b_roundoff); 595 | #else 596 | *e = vec_addre(a, b); 597 | #endif 598 | return sum; 599 | } 600 | 601 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 602 | __vector double vec_efaddord( 603 | __vector double a, 604 | __vector double b, 605 | __vector double FPPLUS_NONNULL_POINTER(e)) 606 | { 607 | const __vector double sum = vec_add(a, b); 608 | #ifndef FPPLUS_EMULATE_FPADDRE 609 | const __vector double b_virtual = vec_sub(sum, a); 610 | *e = vec_sub(b, b_virtual); 611 | #else 612 | *e = vec_addre(a, b); 613 | #endif 614 | return sum; 615 | } 616 | 617 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 618 | __vector double vec_efmul( 619 | __vector double a, 620 | __vector double b, 621 | __vector double FPPLUS_NONNULL_POINTER(e)) 622 | { 623 | const __vector double product = vec_mul(a, b); 624 | *e = vec_msub(a, b, product); 625 | return product; 626 | } 627 | 628 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 629 | __vector double vec_effma( 630 | __vector double a, 631 | __vector double b, 632 | __vector double c, 633 | __vector double FPPLUS_NONNULL_POINTER(e_high), 634 | __vector double FPPLUS_NONNULL_POINTER(e_low)) 635 | { 636 | const __vector double mac = vec_madd(a, b, c); 637 | __vector double u2; 638 | const __vector double u1 = vec_efmul(a, b, &u2); 639 | const __vector double alpha1 = vec_efadd(c, u2, e_low); 640 | __vector double beta2; 641 | const __vector double beta1 = vec_efadd(u1, alpha1, &beta2); 642 | *e_high = vec_add(vec_sub(beta1, mac), beta2); 643 | return mac; 644 | } 645 | 646 | #endif /* IBM VSX */ 647 | 648 | #if defined(__ARM_ARCH_8A__) 649 | 650 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 651 | float64x1_t vefadd_f64( 652 | float64x1_t a, 653 | float64x1_t b, 654 | float64x1_t FPPLUS_NONNULL_POINTER(e)) 655 | { 656 | const float64x1_t sum = vadd_f64(a, b); 657 | #ifndef FPPLUS_EMULATE_FPADDRE 658 | const float64x1_t b_virtual = vsub_f64(sum, a); 659 | const float64x1_t a_virtual = vsub_f64(sum, b_virtual); 660 | const float64x1_t b_roundoff = vsub_f64(b, b_virtual); 661 | const float64x1_t a_roundoff = vsub_f64(a, a_virtual); 662 | *e = vadd_f64(a_roundoff, b_roundoff); 663 | #else 664 | *e = vaddre_f64(a, b); 665 | #endif 666 | return sum; 667 | } 668 | 669 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 670 | float64x1_t vefaddord_f64( 671 | float64x1_t a, 672 | float64x1_t b, 673 | float64x1_t FPPLUS_NONNULL_POINTER(e)) 674 | { 675 | const float64x1_t sum = vadd_f64(a, b); 676 | #ifndef FPPLUS_EMULATE_FPADDRE 677 | const float64x1_t b_virtual = vsub_f64(sum, a); 678 | *e = vsub_f64(b, b_virtual); 679 | #else 680 | *e = vaddre_f64(a, b); 681 | #endif 682 | return sum; 683 | } 684 | 685 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 686 | float64x1_t vefmulq_f64( 687 | float64x1_t a, 688 | float64x1_t b, 689 | float64x1_t FPPLUS_NONNULL_POINTER(e)) 690 | { 691 | const float64x1_t product = vmul_f64(a, b); 692 | *e = vfma_f64(vneg_f64(product), a, b); 693 | return product; 694 | } 695 | 696 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 697 | float64x1_t veffma_f64( 698 | float64x1_t a, 699 | float64x1_t b, 700 | float64x1_t c, 701 | float64x1_t FPPLUS_NONNULL_POINTER(e_high), 702 | float64x1_t FPPLUS_NONNULL_POINTER(e_low)) 703 | { 704 | const float64x1_t mac = vfma_f64(a, b, c); 705 | float64x1_t u2; 706 | const float64x1_t u1 = vefmul_f64(a, b, &u2); 707 | const float64x1_t alpha1 = vefadd_f64(c, u2, e_low); 708 | float64x1_t beta2; 709 | const float64x1_t beta1 = vefadd_f64(u1, alpha1, &beta2); 710 | *e_high = vadd_f64(vsub_f64(beta1, mac), beta2); 711 | return mac; 712 | } 713 | 714 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 715 | float64x2_t vefaddq_f64( 716 | float64x2_t a, 717 | float64x2_t b, 718 | float64x2_t FPPLUS_NONNULL_POINTER(e)) 719 | { 720 | const float64x2_t sum = vaddq_f64(a, b); 721 | #ifndef FPPLUS_EMULATE_FPADDRE 722 | const float64x2_t b_virtual = vsubq_f64(sum, a); 723 | const float64x2_t a_virtual = vsubq_f64(sum, b_virtual); 724 | const float64x2_t b_roundoff = vsubq_f64(b, b_virtual); 725 | const float64x2_t a_roundoff = vsubq_f64(a, a_virtual); 726 | *e = vaddq_f64(a_roundoff, b_roundoff); 727 | #else 728 | *e = vaddreq_f64(a, b); 729 | #endif 730 | return sum; 731 | } 732 | 733 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 734 | float64x2_t vefaddordq_f64( 735 | float64x2_t a, 736 | float64x2_t b, 737 | float64x2_t FPPLUS_NONNULL_POINTER(e)) 738 | { 739 | const float64x2_t sum = vaddq_f64(a, b); 740 | #ifndef FPPLUS_EMULATE_FPADDRE 741 | const float64x2_t b_virtual = vsubq_f64(sum, a); 742 | *e = vsubq_f64(b, b_virtual); 743 | #else 744 | *e = vaddreq_f64(a, b); 745 | #endif 746 | return sum; 747 | } 748 | 749 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 750 | float64x2_t vefmulq_f64( 751 | float64x2_t a, 752 | float64x2_t b, 753 | float64x2_t FPPLUS_NONNULL_POINTER(e)) 754 | { 755 | const float64x2_t product = vmulq_f64(a, b); 756 | *e = vfmaq_f64(vnegq_f64(product), a, b); 757 | return product; 758 | } 759 | 760 | FPPLUS_STATIC_INLINE FPPLUS_NONNULL_POINTER_ARGUMENTS 761 | float64x2_t veffmaq_f64( 762 | float64x2_t a, 763 | float64x2_t b, 764 | float64x2_t c, 765 | float64x2_t FPPLUS_NONNULL_POINTER(e_high), 766 | float64x2_t FPPLUS_NONNULL_POINTER(e_low)) 767 | { 768 | const float64x2_t mac = vfmaq_f64(a, b, c); 769 | float64x2_t u2; 770 | const float64x2_t u1 = vefmulq_f64(a, b, &u2); 771 | const float64x2_t alpha1 = vefaddq_f64(c, u2, e_low); 772 | float64x2_t beta2; 773 | const float64x2_t beta1 = vefaddq_f64(u1, alpha1, &beta2); 774 | *e_high = vaddq_f64(vsubq_f64(beta1, mac), beta2); 775 | return mac; 776 | } 777 | 778 | #endif /* ARMv8-A */ 779 | 780 | #endif /* FPPLUS_EFT_H */ 781 | -------------------------------------------------------------------------------- /include/fpplus/fpaddre.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef FPPLUS_FPADDRE_H 3 | #define FPPLUS_FPADDRE_H 4 | 5 | #include 6 | 7 | FPPLUS_STATIC_INLINE double addre(double a, double b) { 8 | #if defined(FPPLUS_UARCH_STEAMROLLER) 9 | return __builtin_fma(a, a, b); 10 | #else 11 | return a < b ? a : b; 12 | #endif 13 | } 14 | 15 | #if defined(__SSE2__) 16 | FPPLUS_STATIC_INLINE __m128d _mm_addre_sd(__m128d a, __m128d b) { 17 | #if defined(FPPLUS_UARCH_STEAMROLLER) 18 | return _mm_fmadd_sd(a, a, b); 19 | #else 20 | return _mm_min_sd(a, b); 21 | #endif 22 | } 23 | 24 | FPPLUS_STATIC_INLINE __m128d _mm_addre_pd(__m128d a, __m128d b) { 25 | #if defined(FPPLUS_UARCH_STEAMROLLER) 26 | return _mm_fmadd_pd(a, a, b); 27 | #else 28 | return _mm_min_pd(a, b); 29 | #endif 30 | } 31 | #endif /* SSE2 */ 32 | 33 | #if defined(__AVX__) 34 | FPPLUS_STATIC_INLINE __m256d _mm256_addre_pd(__m256d a, __m256d b) { 35 | #if defined(FPPLUS_UARCH_STEAMROLLER) 36 | return _mm256_fmadd_pd(a, a, b); 37 | #else 38 | return _mm256_min_pd(a, b); 39 | #endif 40 | } 41 | #endif /* AVX */ 42 | 43 | #if defined(__AVX512F__) || defined(__KNC__) 44 | FPPLUS_STATIC_INLINE __m512d _mm512_addre_pd(__m512d a, __m512d b) { 45 | return _mm512_min_pd(a, b); 46 | } 47 | #endif /* AVX-512 or MIC */ 48 | 49 | #if defined(__bgq__) 50 | FPPLUS_STATIC_INLINE vector4double vec_addre(vector4double a, vector4double b) { 51 | return vec_min(a, b); 52 | } 53 | #endif /* Blue Gene/Q */ 54 | 55 | #if defined(__VSX__) 56 | FPPLUS_STATIC_INLINE __vector double vec_addre(__vector double a, __vector double b) { 57 | return vec_min(a, b); 58 | } 59 | #endif /* IBM VSX */ 60 | 61 | #if defined(__ARM_ARCH_8A__) 62 | FPPLUS_STATIC_INLINE float64x1_t vaddre_f64(float64x1_t a, float64x1_t) { 63 | return vmin_f64(a, b); 64 | } 65 | 66 | FPPLUS_STATIC_INLINE float64x2_t vaddreq_f64(float64x2_t a, float64x2_t b) { 67 | return vminq_f64(a, b); 68 | } 69 | #endif /* ARMv8-A */ 70 | 71 | #endif /* FPPLUS_FPADDRE_H */ 72 | -------------------------------------------------------------------------------- /include/fpplus/polevl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef FPPLUS_POLEVL_H 3 | #define FPPLUS_POLEVL_H 4 | 5 | #include 6 | 7 | inline static double muladd_horner15(double x, double c0, double c1, double c2, double c3, double c4, double c5, double c6, double c7, double c8, double c9, double c10, double c11, double c12, double c13, double c14, double c15) { 8 | double y = c15; 9 | y = y * x + c14; 10 | y = y * x + c13; 11 | y = y * x + c12; 12 | y = y * x + c11; 13 | y = y * x + c10; 14 | y = y * x + c9; 15 | y = y * x + c8; 16 | y = y * x + c7; 17 | y = y * x + c6; 18 | y = y * x + c5; 19 | y = y * x + c4; 20 | y = y * x + c3; 21 | y = y * x + c2; 22 | y = y * x + c1; 23 | y = y * x + c0; 24 | return y; 25 | } 26 | 27 | inline static double fma_horner15(double x, double c0, double c1, double c2, double c3, double c4, double c5, double c6, double c7, double c8, double c9, double c10, double c11, double c12, double c13, double c14, double c15) { 28 | double y = c15; 29 | y = __builtin_fma(y, x, c14); 30 | y = __builtin_fma(y, x, c13); 31 | y = __builtin_fma(y, x, c12); 32 | y = __builtin_fma(y, x, c11); 33 | y = __builtin_fma(y, x, c10); 34 | y = __builtin_fma(y, x, c9); 35 | y = __builtin_fma(y, x, c8); 36 | y = __builtin_fma(y, x, c7); 37 | y = __builtin_fma(y, x, c6); 38 | y = __builtin_fma(y, x, c5); 39 | y = __builtin_fma(y, x, c4); 40 | y = __builtin_fma(y, x, c3); 41 | y = __builtin_fma(y, x, c2); 42 | y = __builtin_fma(y, x, c1); 43 | y = __builtin_fma(y, x, c0); 44 | return y; 45 | } 46 | 47 | inline static double complensated_horner15(double x, double c0, double c1, double c2, double c3, double c4, double c5, double c6, double c7, double c8, double c9, double c10, double c11, double c12, double c13, double c14, double c15) { 48 | double addc, mulc; 49 | 50 | double y = efadd(efmul(c15, x, &mulc), c14, &addc); 51 | double yc = addc + mulc; 52 | 53 | y = efadd(efmul(y, x, &mulc), c13, &addc); 54 | yc = __builtin_fma(yc, x, addc + mulc); 55 | 56 | y = efadd(efmul(y, x, &mulc), c12, &addc); 57 | yc = __builtin_fma(yc, x, addc + mulc); 58 | 59 | y = efadd(efmul(y, x, &mulc), c11, &addc); 60 | yc = __builtin_fma(yc, x, addc + mulc); 61 | 62 | y = efadd(efmul(y, x, &mulc), c10, &addc); 63 | yc = __builtin_fma(yc, x, addc + mulc); 64 | 65 | y = efadd(efmul(y, x, &mulc), c9, &addc); 66 | yc = __builtin_fma(yc, x, addc + mulc); 67 | 68 | y = efadd(efmul(y, x, &mulc), c8, &addc); 69 | yc = __builtin_fma(yc, x, addc + mulc); 70 | 71 | y = efadd(efmul(y, x, &mulc), c7, &addc); 72 | yc = __builtin_fma(yc, x, addc + mulc); 73 | 74 | y = efadd(efmul(y, x, &mulc), c6, &addc); 75 | yc = __builtin_fma(yc, x, addc + mulc); 76 | 77 | y = efadd(efmul(y, x, &mulc), c5, &addc); 78 | yc = __builtin_fma(yc, x, addc + mulc); 79 | 80 | y = efadd(efmul(y, x, &mulc), c4, &addc); 81 | yc = __builtin_fma(yc, x, addc + mulc); 82 | 83 | y = efadd(efmul(y, x, &mulc), c3, &addc); 84 | yc = __builtin_fma(yc, x, addc + mulc); 85 | 86 | y = efadd(efmul(y, x, &mulc), c2, &addc); 87 | yc = __builtin_fma(yc, x, addc + mulc); 88 | 89 | y = efadd(efmul(y, x, &mulc), c1, &addc); 90 | yc = __builtin_fma(yc, x, addc + mulc); 91 | 92 | y = efadd(efmul(y, x, &mulc), c0, &addc); 93 | yc = __builtin_fma(yc, x, addc + mulc); 94 | 95 | return y + yc; 96 | } 97 | 98 | #ifdef __MIC__ 99 | 100 | inline static __m512d _mm512_muladd_horner15_pd(__m512d x, double c0, double c1, double c2, double c3, double c4, double c5, double c6, double c7, double c8, double c9, double c10, double c11, double c12, double c13, double c14, double c15) { 101 | __m512d y = _mm512_set1_pd(c15); 102 | y = _mm512_add_round_pd( 103 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 104 | _mm512_set1_pd(c14), 105 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 106 | y = _mm512_add_round_pd( 107 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 108 | _mm512_set1_pd(c13), 109 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 110 | y = _mm512_add_round_pd( 111 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 112 | _mm512_set1_pd(c12), 113 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 114 | y = _mm512_add_round_pd( 115 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 116 | _mm512_set1_pd(c11), 117 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 118 | y = _mm512_add_round_pd( 119 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 120 | _mm512_set1_pd(c10), 121 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 122 | y = _mm512_add_round_pd( 123 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 124 | _mm512_set1_pd(c9), 125 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 126 | y = _mm512_add_round_pd( 127 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 128 | _mm512_set1_pd(c8), 129 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 130 | y = _mm512_add_round_pd( 131 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 132 | _mm512_set1_pd(c7), 133 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 134 | y = _mm512_add_round_pd( 135 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 136 | _mm512_set1_pd(c6), 137 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 138 | y = _mm512_add_round_pd( 139 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 140 | _mm512_set1_pd(c5), 141 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 142 | y = _mm512_add_round_pd( 143 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 144 | _mm512_set1_pd(c4), 145 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 146 | y = _mm512_add_round_pd( 147 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 148 | _mm512_set1_pd(c3), 149 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 150 | y = _mm512_add_round_pd( 151 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 152 | _mm512_set1_pd(c2), 153 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 154 | y = _mm512_add_round_pd( 155 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 156 | _mm512_set1_pd(c1), 157 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 158 | y = _mm512_add_round_pd( 159 | _mm512_mul_round_pd(y, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), 160 | _mm512_set1_pd(c0), 161 | _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 162 | return y; 163 | } 164 | 165 | inline static __m512d _mm512_fma_horner15_pd(__m512d x, double c0, double c1, double c2, double c3, double c4, double c5, double c6, double c7, double c8, double c9, double c10, double c11, double c12, double c13, double c14, double c15) { 166 | __m512d y = _mm512_set1_pd(c15); 167 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c14), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 168 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c13), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 169 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c12), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 170 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c11), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 171 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c10), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 172 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c9), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 173 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c8), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 174 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c7), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 175 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c6), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 176 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c5), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 177 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c4), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 178 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c3), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 179 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c2), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 180 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c1), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 181 | y = _mm512_fmadd_round_pd(y, x, _mm512_set1_pd(c0), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 182 | return y; 183 | } 184 | 185 | inline static __m512d _mm512_comp_horner15_pd(__m512d x, double c0, double c1, double c2, double c3, double c4, double c5, double c6, double c7, double c8, double c9, double c10, double c11, double c12, double c13, double c14, double c15) { 186 | __m512d addc, mulc; 187 | 188 | __m512d y = _mm512_efadd_pd(_mm512_efmul_pd(_mm512_set1_pd(c15), x, &mulc), _mm512_set1_pd(c14), &addc); 189 | __m512d yc = _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 190 | 191 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c13), &addc); 192 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 193 | 194 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c12), &addc); 195 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 196 | 197 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c11), &addc); 198 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 199 | 200 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c10), &addc); 201 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 202 | 203 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c9), &addc); 204 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 205 | 206 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c8), &addc); 207 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 208 | 209 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c7), &addc); 210 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 211 | 212 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c6), &addc); 213 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 214 | 215 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c5), &addc); 216 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 217 | 218 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c4), &addc); 219 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 220 | 221 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c3), &addc); 222 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 223 | 224 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c2), &addc); 225 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 226 | 227 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c1), &addc); 228 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 229 | 230 | y = _mm512_efadd_pd(_mm512_efmul_pd(y, x, &mulc), _mm512_set1_pd(c0), &addc); 231 | yc = _mm512_fmadd_round_pd(yc, x, _mm512_add_round_pd(addc, mulc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 232 | 233 | return _mm512_add_round_pd(y, yc, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 234 | } 235 | 236 | #endif 237 | 238 | #endif /* FPPLUS_EFT_H */ 239 | -------------------------------------------------------------------------------- /references.bib: -------------------------------------------------------------------------------- 1 | @article{Shewchuk1997, 2 | title={Adaptive precision floating-point arithmetic and fast robust geometric predicates}, 3 | author={Shewchuk, Jonathan Richard}, 4 | journal={Discrete \& Computational Geometry}, 5 | volume={18}, 6 | number={3}, 7 | pages={305--363}, 8 | year={1997}, 9 | publisher={Springer} 10 | } 11 | 12 | @article{Dekker1971, 13 | title={A floating-point technique for extending the available precision}, 14 | author={Dekker, Theodorus Jozef}, 15 | journal={Numerische Mathematik}, 16 | volume={18}, 17 | number={3}, 18 | pages={224--242}, 19 | year={1971}, 20 | publisher={Springer} 21 | } 22 | 23 | @book{Knuth1997, 24 | author = {Knuth, Donald E.}, 25 | title = {Seminumerical Algorithms}, 26 | series = {The Art of Computer Programming}, 27 | volume = 2, 28 | year = {1997}, 29 | isbn = {0-201-89684-2}, 30 | publisher = {Addison-Wesley}, 31 | address = {Boston, MA, USA}, 32 | } 33 | 34 | @inproceedings{BoldoMuller2005, 35 | title={Some functions computable with a fused-mac}, 36 | author={Boldo, Sylvie and Muller, J-M}, 37 | booktitle={Computer Arithmetic, 2005. ARITH-17 2005. 17th IEEE Symposium on}, 38 | pages={52--58}, 39 | year={2005}, 40 | organization={IEEE} 41 | } 42 | 43 | @inproceedings{QD2000, 44 | title={Quad-double arithmetic: Algorithms, implementation, and application}, 45 | author={Hida, Yozo and Li, Xiaoye S and Bailey, David H}, 46 | booktitle={15th IEEE Symposium on Computer Arithmetic}, 47 | pages={155--162}, 48 | year={2000}, 49 | organization={Citeseer} 50 | } 51 | 52 | @book{FPHandbook2009, 53 | title={Handbook of floating-point arithmetic}, 54 | author={Muller, Jean-Michel and Brisebarre, Nicolas and De Dinechin, Florent and Jeannerod, Claude-Pierre and Lefevre, Vincent and Melquiond, Guillaume and Revol, Nathalie and Stehl{\'e}, Damien and Torres, Serge}, 55 | year={2009}, 56 | publisher={Springer Science \& Business Media} 57 | } 58 | -------------------------------------------------------------------------------- /src/code.py: -------------------------------------------------------------------------------- 1 | # This file is part of Peach-Py package and is licensed under the Simplified BSD license. 2 | # See license.rst for the full text of the license. 3 | 4 | active_writer = None 5 | 6 | 7 | class CodeWriter: 8 | def __init__(self): 9 | self.lines = list() 10 | self.indent = 0 11 | self.previous_writer = None 12 | 13 | def __enter__(self): 14 | global active_writer 15 | self.previous_writer = active_writer 16 | active_writer = self 17 | return self 18 | 19 | def __exit__(self, exc_type, exc_value, traceback): 20 | global active_writer 21 | active_writer = self.previous_writer 22 | self.previous_writer = None 23 | 24 | def line(self, line="", indent=0): 25 | if line != "": 26 | self.lines.append(" "*(self.indent+int(indent)) + str(line)) 27 | else: 28 | self.lines.append(line) 29 | 30 | def indent_line(self, line=""): 31 | self.line(line, indent=1) 32 | 33 | def __str__(self): 34 | return "\n".join(self.lines) 35 | 36 | 37 | class CodeBlock: 38 | def __init__(self, indent=True): 39 | self.indent = bool(indent) 40 | 41 | def __enter__(self): 42 | global active_writer 43 | active_writer.indent += int(self.indent) 44 | return self 45 | 46 | def __exit__(self, exc_type, exc_value, traceback): 47 | global active_writer 48 | active_writer.indent -= int(self.indent) 49 | -------------------------------------------------------------------------------- /src/ddgemm/benchmark.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | 13 | static void benchmark( 14 | ddgemm_function ddgemm, 15 | size_t nr, size_t mr, 16 | size_t iterations, 17 | size_t block_size, 18 | const double a[restrict], 19 | const doubledouble b[restrict], 20 | doubledouble c[restrict]) 21 | { 22 | double iteration_times[iterations]; 23 | const size_t kc = block_size / ((nr + mr) * sizeof(doubledouble)); 24 | for (size_t iteration = 0; iteration < iterations; iteration++) { 25 | const double start_time = high_precision_time(); 26 | 27 | ddgemm(kc, a, b, c); 28 | 29 | iteration_times[iteration] = high_precision_time() - start_time; 30 | } 31 | const double median_time_ns = median_double(iteration_times, iterations); 32 | const double gflops = 2.0 * nr * mr * kc / median_time_ns; 33 | printf("%zu\t" "%zu\t" "%zu\t" "%zu\t" "%.1lf\n", block_size, mr, nr, kc, gflops * 1000.0); 34 | } 35 | 36 | int main(int argc, char *argv[]) { 37 | const struct benchmark_options options = parse_options(argc, argv); 38 | 39 | void* a_array = valloc(options.block_size); 40 | void* b_array = valloc(options.block_size); 41 | void* c_array = valloc(DDGEMM_MR_MAX * DDGEMM_NR_MAX * sizeof(doubledouble)); 42 | for (double* double_array = a_array; double_array != a_array + options.block_size; double_array++) { 43 | *double_array = M_PI; 44 | } 45 | for (double* double_array = b_array; double_array != b_array + options.block_size; double_array++) { 46 | *double_array = M_E; 47 | } 48 | memset(c_array, 0, DDGEMM_MR_MAX * DDGEMM_NR_MAX * sizeof(doubledouble)); 49 | 50 | for (size_t mr = DDGEMM_MR_MIN; mr <= DDGEMM_MR_MAX; mr += DDGEMM_MR_STEP) { 51 | for (size_t nr = DDGEMM_NR_MIN; nr <= DDGEMM_NR_MAX; nr += 1) { 52 | ddgemm_function ddgemm = select_ddgemm_kernel(mr, nr); 53 | benchmark(ddgemm, mr, nr, options.iterations, options.block_size, a_array, b_array, c_array); 54 | } 55 | } 56 | 57 | free(a_array); 58 | free(b_array); 59 | free(c_array); 60 | } 61 | -------------------------------------------------------------------------------- /src/ddgemm/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #include 8 | #include 9 | 10 | 11 | struct benchmark_options { 12 | size_t iterations; 13 | size_t block_size; 14 | }; 15 | 16 | struct benchmark_options parse_options(int argc, char** argv); 17 | 18 | #ifdef __cplusplus 19 | } /* extern "C" */ 20 | #endif 21 | -------------------------------------------------------------------------------- /src/ddgemm/ddgemm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import division 3 | 4 | import sys 5 | import os 6 | import argparse 7 | 8 | root_dir = os.path.dirname(os.path.abspath(__file__)) 9 | sys.path.insert(0, os.path.join(root_dir, "..")) 10 | 11 | 12 | parser = argparse.ArgumentParser(description="DDGEMM kernel generator") 13 | parser.add_argument("--nr-min", dest="nr_min", required=True, type=int, 14 | help="Minimum register tiling of N dimension") 15 | parser.add_argument("--nr-max", dest="nr_max", required=True, type=int, 16 | help="Maximum register tiling of N dimension") 17 | parser.add_argument("--mr-min", dest="mr_min", required=True, type=int, 18 | help="Minimum register tiling of M dimension") 19 | parser.add_argument("--mr-max", dest="mr_max", required=True, type=int, 20 | help="Maximum register tiling of M dimension") 21 | parser.add_argument("--simd", dest="simd", required=True, 22 | choices=("scalar", "sse", "avx", "mic", "armv8", "vsx", "qpx"), 23 | help="SIMD intrinsics") 24 | parser.add_argument("--implementation", dest="implementation", required=True, 25 | help="Output file name for C implementation") 26 | parser.add_argument("--header", dest="header", required=True, 27 | help="Output file name for C/C++ header") 28 | parser.add_argument("--unittest", dest="unittest", required=True, 29 | help="Output file name for C++ unit test") 30 | 31 | 32 | def main(): 33 | options = parser.parse_args() 34 | 35 | from code import CodeWriter, CodeBlock 36 | from simd import SimdOperations 37 | with CodeWriter() as impl: 38 | impl.line("#include ") 39 | impl.line("#include ") 40 | impl.line() 41 | 42 | simd = SimdOperations(options.simd) 43 | for mr in range(options.mr_min, options.mr_max + 1, simd.width): 44 | for nr in range(options.nr_min, options.nr_max + 1): 45 | impl.line("""\ 46 | void ddgemm{mr}x{nr}(size_t k, 47 | const double a[restrict static 2*k*{mr}], 48 | const doubledouble b[restrict static k*{nr}], 49 | doubledouble c[restrict static {mr}*{nr}]) 50 | {{""".format(mr=mr, nr=nr)) 51 | with CodeBlock(): 52 | for m in range(mr // simd.width): 53 | impl.line("{ddvec} {vars};". 54 | format(ddvec=simd.ddvec, 55 | vars=", ".join("va{m}b{n} = {ddzero}()".format(m=m, n=n, ddzero=simd.ddzero) for n in range(nr)))) 56 | 57 | impl.line("do {") 58 | with CodeBlock(): 59 | for m in range(mr // simd.width): 60 | impl.line("const {ddvec} va{m} = {{ {dload}(a + {index_hi}), {dload}(a + {index_lo}) }};" 61 | .format(ddvec = simd.ddvec, m=m, dload=simd._dload, 62 | index_hi=(2*m)*simd.width, index_lo=(2*m+1)*simd.width)) 63 | impl.line() 64 | 65 | for n in range(nr): 66 | impl.line("{ddvec} vb{n} = {ddbroadcast}(b+{n});".format(ddvec=simd.ddvec, ddbroadcast=simd.ddbroadcast, n=n)) 67 | for m in range(mr // simd.width): 68 | vanbm = "va{m}b{n}".format(m=m, n=n) 69 | impl.line(vanbm + " = " + simd.ddadd(vanbm, simd.ddmul("va" + str(m), "vb" + str(n))) + ";") 70 | impl.line() 71 | 72 | impl.line("a += 2*{mr};".format(mr=mr)) 73 | impl.line("b += {nr};".format(nr=nr)) 74 | impl.line("} while (--k);") 75 | impl.line() 76 | 77 | for m in range(mr // simd.width): 78 | for n in range(nr): 79 | impl.line("{ddvec} vc{m}{n} = {ddloaddeinterleave}(&c[{n}*{mr}+{m}*{simd_width}]);".format( 80 | ddvec=simd.ddvec, m=m, n=n, mr=mr, simd_width=simd.width, ddloaddeinterleave=simd.ddloaddeinterleave)) 81 | impl.line() 82 | 83 | for m in range(mr // simd.width): 84 | for n in range(nr): 85 | impl.line("vc{m}{n} = {ddadd}(vc{m}{n}, va{m}b{n});".format(m=m, n=n, ddadd=simd._ddadd)) 86 | impl.line() 87 | 88 | for m in range(mr // simd.width): 89 | for n in range(nr): 90 | impl.line("{ddinterleavestore}(&c[{n}*{mr}+{m}*{simd_width}], vc{m}{n});".format( 91 | m=m, n=n, mr=mr, simd_width=simd.width, ddinterleavestore=simd.ddinterleavestore)) 92 | impl.line("}") 93 | impl.line() 94 | 95 | with CodeWriter() as header: 96 | header.line("""\ 97 | #pragma once 98 | 99 | #include 100 | 101 | #ifdef __cplusplus 102 | extern "C" {{ 103 | #endif 104 | 105 | #define DDGEMM_MR_MIN {mr_min} 106 | #define DDGEMM_MR_MAX {mr_max} 107 | #define DDGEMM_MR_STEP {mr_step} 108 | #define DDGEMM_NR_MIN {nr_min} 109 | #define DDGEMM_NR_MAX {nr_max} 110 | 111 | typedef void (*ddgemm_function)(size_t, const double*, const doubledouble*, doubledouble*); 112 | 113 | """.format(mr_min=options.mr_min, mr_max=options.mr_max, mr_step=simd.width, nr_min=options.nr_min, nr_max=options.nr_max)) 114 | for mr in range(options.mr_min, options.mr_max + 1, simd.width): 115 | for nr in range(options.nr_min, options.nr_max + 1): 116 | header.line("void ddgemm{mr}x{nr}(size_t k, const double a[], const doubledouble b[], doubledouble c[]);" 117 | .format(mr=mr, nr=nr)) 118 | 119 | header.line() 120 | header.line() 121 | 122 | header.line("static inline ddgemm_function select_ddgemm_kernel(size_t mr, size_t nr) {") 123 | with CodeBlock(): 124 | header.line("switch (mr) {") 125 | with CodeBlock(): 126 | for mr in range(options.mr_min, options.mr_max + 1, simd.width): 127 | header.line("case {mr}:".format(mr=mr)) 128 | with CodeBlock(): 129 | header.line("switch (nr) {") 130 | with CodeBlock(): 131 | for nr in range(options.nr_min, options.nr_max + 1): 132 | header.line("case {nr}:".format(nr=nr)) 133 | header.indent_line("return ddgemm{mr}x{nr};".format(mr=mr, nr=nr)) 134 | header.line("default:") 135 | header.indent_line("return NULL;") 136 | header.line("}") 137 | header.line("default:") 138 | header.indent_line("return NULL;") 139 | header.line("}") 140 | header.line("}") 141 | 142 | header.line(""" 143 | 144 | #ifdef __cplusplus 145 | } /* extern "C" */ 146 | #endif 147 | """) 148 | 149 | 150 | with CodeWriter() as unittest: 151 | unittest.line("""\ 152 | #include 153 | #include 154 | 155 | #include 156 | 157 | #include 158 | 159 | #include "ddgemm-tester.h" 160 | 161 | """) 162 | for mr in range(options.mr_min, options.mr_max + 1, simd.width): 163 | for nr in range(options.nr_min, options.nr_max + 1): 164 | unittest.line("""\ 165 | TEST(ddgemm, ukernel{mr}x{nr}) {{ 166 | DDGEMMTester<{mr}, {nr}, {simd_width}, ddgemm{mr}x{nr}>().test(); 167 | }} 168 | """.format(mr=mr, nr=nr, simd_width=simd.width)) 169 | 170 | unittest.line("""\ 171 | int main(int argc, char* argv[]) { 172 | testing::InitGoogleTest(&argc, argv); 173 | return RUN_ALL_TESTS(); 174 | } 175 | """) 176 | 177 | with open(options.implementation, "w") as impl_file: 178 | impl_file.write(str(impl)) 179 | 180 | with open(options.header, "w") as header_file: 181 | header_file.write(str(header)) 182 | 183 | with open(options.unittest, "w") as unittest_file: 184 | unittest_file.write(str(unittest)) 185 | 186 | 187 | if __name__ == "__main__": 188 | sys.exit(main()) 189 | -------------------------------------------------------------------------------- /src/ddgemm/options.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | 8 | static void print_options_help(const char* program_name) { 9 | printf( 10 | "%s -b block-size [-i iterations]\n" 11 | "Required parameters:\n" 12 | " -b --block-size The size of block processed in micro-kernel (usually L1 cache size)\n" 13 | "Optional parameters:\n" 14 | " -i --iterations The number of benchmark iterations (default: 1000)\n", 15 | program_name); 16 | } 17 | 18 | struct benchmark_options parse_options(int argc, char** argv) { 19 | struct benchmark_options options = { 20 | .iterations = 1000, 21 | .block_size = 0, 22 | }; 23 | for (int argi = 1; argi < argc; argi += 1) { 24 | if ((strcmp(argv[argi], "--block-size") == 0) || (strcmp(argv[argi], "-b") == 0)) { 25 | if (argi + 1 == argc) { 26 | fprintf(stderr, "Error: expected block size value\n"); 27 | exit(EXIT_FAILURE); 28 | } 29 | if (sscanf(argv[argi + 1], "%zu", &options.block_size) != 1) { 30 | fprintf(stderr, "Error: can not parse %s as an unsigned integer\n", argv[argi + 1]); 31 | exit(EXIT_FAILURE); 32 | } 33 | if (options.block_size == 0) { 34 | fprintf(stderr, "Error: invalid value %s for the block size: positive value expected\n", argv[argi + 1]); 35 | exit(EXIT_FAILURE); 36 | } 37 | argi += 1; 38 | } else if ((strcmp(argv[argi], "--iterations") == 0) || (strcmp(argv[argi], "-i") == 0)) { 39 | if (argi + 1 == argc) { 40 | fprintf(stderr, "Error: expected iterations value\n"); 41 | exit(EXIT_FAILURE); 42 | } 43 | if (sscanf(argv[argi + 1], "%zu", &options.iterations) != 1) { 44 | fprintf(stderr, "Error: can not parse %s as an unsigned integer\n", argv[argi + 1]); 45 | exit(EXIT_FAILURE); 46 | } 47 | if (options.iterations == 0) { 48 | fprintf(stderr, "Error: invalid value %s for the number of iterations: positive value expected\n", argv[argi + 1]); 49 | exit(EXIT_FAILURE); 50 | } 51 | argi += 1; 52 | } else if ((strcmp(argv[argi], "--help") == 0) || (strcmp(argv[argi], "-h") == 0)) { 53 | print_options_help(argv[0]); 54 | exit(EXIT_SUCCESS); 55 | } else { 56 | fprintf(stderr, "Error: unknown argument '%s'\n", argv[argi]); 57 | print_options_help(argv[0]); 58 | exit(EXIT_FAILURE); 59 | } 60 | } 61 | if (options.block_size == 0) { 62 | fprintf(stderr, "Error: the block size is not specified\n"); 63 | print_options_help(argv[0]); 64 | exit(EXIT_FAILURE); 65 | } 66 | return options; 67 | } 68 | -------------------------------------------------------------------------------- /src/dot/benchmark.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | 13 | static void benchmark_dot_product( 14 | dot_product_function dot, 15 | const char* name, 16 | size_t unroll_factor, 17 | size_t iterations, 18 | size_t elements, const double a[restrict static elements], const double b[restrict static elements]) 19 | { 20 | uint64_t iteration_ticks[iterations]; 21 | for (size_t iteration = 0; iteration < iterations; iteration++) { 22 | const uint64_t start_ticks = cpu_ticks(); 23 | 24 | dot(elements, a, b); 25 | 26 | iteration_ticks[iteration] = cpu_ticks() - start_ticks; 27 | } 28 | const uint64_t median_ticks = median_uint64(iteration_ticks, iterations); 29 | printf("double\t" "%s\t" "%zu\t" "%10zu\t" "%.2lf\n", 30 | name, unroll_factor, elements, ((double) median_ticks) / ((double) elements)); 31 | } 32 | 33 | static void benchmark_compensated_dot_product( 34 | compensated_dot_product_function dot, 35 | const char* name, 36 | size_t unroll_factor, 37 | size_t iterations, 38 | size_t elements, const double a[restrict static elements], const double b[restrict static elements]) 39 | { 40 | uint64_t iteration_ticks[iterations]; 41 | for (size_t iteration = 0; iteration < iterations; iteration++) { 42 | const uint64_t start_ticks = cpu_ticks(); 43 | 44 | dot(elements, a, b); 45 | 46 | iteration_ticks[iteration] = cpu_ticks() - start_ticks; 47 | } 48 | const uint64_t median_ticks = median_uint64(iteration_ticks, iterations); 49 | printf("compensated\t" "%s\t" "%zu\t" "%10zu\t" "%.2lf\n", 50 | name, unroll_factor, elements, ((double) median_ticks) / ((double) elements)); 51 | } 52 | 53 | int main(int argc, char *argv[]) { 54 | const struct benchmark_options options = parse_options(argc, argv); 55 | 56 | void* a_array = valloc(options.array_size); 57 | void* b_array = valloc(options.array_size); 58 | for (double* double_array = a_array; double_array != a_array + options.array_size; double_array++) { 59 | *double_array = M_PI; 60 | } 61 | for (double* double_array = b_array; double_array != b_array + options.array_size; double_array++) { 62 | *double_array = M_E; 63 | } 64 | const size_t array_elements = options.array_size / sizeof(double); 65 | 66 | benchmark_dot_product(dot_product_muladd_unroll1, "mul+add", 1, 67 | options.iterations, array_elements, a_array, b_array); 68 | benchmark_dot_product(dot_product_muladd_unroll2, "mul+add", 2, 69 | options.iterations, array_elements, a_array, b_array); 70 | benchmark_dot_product(dot_product_muladd_unroll3, "mul+add", 3, 71 | options.iterations, array_elements, a_array, b_array); 72 | benchmark_dot_product(dot_product_muladd_unroll4, "mul+add", 4, 73 | options.iterations, array_elements, a_array, b_array); 74 | benchmark_dot_product(dot_product_muladd_unroll5, "mul+add", 5, 75 | options.iterations, array_elements, a_array, b_array); 76 | benchmark_dot_product(dot_product_muladd_unroll6, "mul+add", 6, 77 | options.iterations, array_elements, a_array, b_array); 78 | benchmark_dot_product(dot_product_muladd_unroll7, "mul+add", 7, 79 | options.iterations, array_elements, a_array, b_array); 80 | benchmark_dot_product(dot_product_muladd_unroll8, "mul+add", 8, 81 | options.iterations, array_elements, a_array, b_array); 82 | 83 | benchmark_dot_product(dot_product_fma_unroll1, "fma", 1, 84 | options.iterations, array_elements, a_array, b_array); 85 | benchmark_dot_product(dot_product_fma_unroll2, "fma", 2, 86 | options.iterations, array_elements, a_array, b_array); 87 | benchmark_dot_product(dot_product_fma_unroll3, "fma", 3, 88 | options.iterations, array_elements, a_array, b_array); 89 | benchmark_dot_product(dot_product_fma_unroll4, "fma", 4, 90 | options.iterations, array_elements, a_array, b_array); 91 | benchmark_dot_product(dot_product_fma_unroll5, "fma", 5, 92 | options.iterations, array_elements, a_array, b_array); 93 | benchmark_dot_product(dot_product_fma_unroll6, "fma", 6, 94 | options.iterations, array_elements, a_array, b_array); 95 | benchmark_dot_product(dot_product_fma_unroll7, "fma", 7, 96 | options.iterations, array_elements, a_array, b_array); 97 | benchmark_dot_product(dot_product_fma_unroll8, "fma", 8, 98 | options.iterations, array_elements, a_array, b_array); 99 | 100 | benchmark_compensated_dot_product(compensated_dot_product_efmuladd_unroll1, "efmul+efadd", 1, 101 | options.iterations, array_elements, a_array, b_array); 102 | benchmark_compensated_dot_product(compensated_dot_product_efmuladd_unroll2, "efmul+efadd", 2, 103 | options.iterations, array_elements, a_array, b_array); 104 | benchmark_compensated_dot_product(compensated_dot_product_efmuladd_unroll3, "efmul+efadd", 3, 105 | options.iterations, array_elements, a_array, b_array); 106 | benchmark_compensated_dot_product(compensated_dot_product_efmuladd_unroll4, "efmul+efadd", 4, 107 | options.iterations, array_elements, a_array, b_array); 108 | benchmark_compensated_dot_product(compensated_dot_product_efmuladd_unroll5, "efmul+efadd", 5, 109 | options.iterations, array_elements, a_array, b_array); 110 | benchmark_compensated_dot_product(compensated_dot_product_efmuladd_unroll6, "efmul+efadd", 6, 111 | options.iterations, array_elements, a_array, b_array); 112 | benchmark_compensated_dot_product(compensated_dot_product_efmuladd_unroll7, "efmul+efadd", 7, 113 | options.iterations, array_elements, a_array, b_array); 114 | benchmark_compensated_dot_product(compensated_dot_product_efmuladd_unroll8, "efmul+efadd", 8, 115 | options.iterations, array_elements, a_array, b_array); 116 | 117 | free(a_array); 118 | free(b_array); 119 | } 120 | -------------------------------------------------------------------------------- /src/dot/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #include 8 | #include 9 | 10 | 11 | struct benchmark_options { 12 | size_t iterations; 13 | size_t array_size; 14 | }; 15 | 16 | struct benchmark_options parse_options(int argc, char** argv); 17 | 18 | #ifdef __cplusplus 19 | } /* extern "C" */ 20 | #endif 21 | -------------------------------------------------------------------------------- /src/dot/dot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | import argparse 6 | 7 | root_dir = os.path.dirname(os.path.abspath(__file__)) 8 | sys.path.insert(0, os.path.join(root_dir, "..")) 9 | 10 | from code import CodeWriter, CodeBlock 11 | 12 | 13 | parser = argparse.ArgumentParser(description="Dot product kernel generator") 14 | parser.add_argument("--unroll-min", dest="unroll_min", required=True, type=int, 15 | help="Minimum unroll factor") 16 | parser.add_argument("--unroll-max", dest="unroll_max", required=True, type=int, 17 | help="Maximum unroll factor") 18 | parser.add_argument("--simd", dest="simd", choices=("scalar", "sse", "avx", "mic", "armv8", "vsx", "qpx"), 19 | help="SIMD intrinsics") 20 | parser.add_argument("--implementation", dest="implementation", required=True, 21 | help="Output file name for C implementation") 22 | parser.add_argument("--header", dest="header", required=True, 23 | help="Output file name for C/C++ header") 24 | parser.add_argument("--unittest", dest="unittest", required=True, 25 | help="Output file name for C++ unit test") 26 | 27 | 28 | def generate_dot_product(code, simd, unroll_factor, fma): 29 | code.line("""\ 30 | double dot_product_{fma_or_mac}_unroll{unroll_factor}( 31 | size_t n, 32 | const double a[restrict static n], 33 | const double b[restrict static n]) 34 | {{""".format(unroll_factor=unroll_factor, fma_or_mac="fma" if fma else "muladd")) 35 | with CodeBlock(): 36 | for i in range(unroll_factor): 37 | code.line("{dvec} vsum{i} = {dzero};" 38 | .format(dvec=simd.dvec, dzero=simd.dzero(), i=i)) 39 | code.line("for (; n >= {elements_per_loop}; n -= {elements_per_loop}) {{" 40 | .format(elements_per_loop=simd.width * unroll_factor)) 41 | 42 | with CodeBlock() as vector_loop: 43 | for i in range(unroll_factor): 44 | code.line("const {dvec} va{i} = {dload}(a+{offset});" 45 | .format(dvec=simd.dvec, dload=simd._dload, i=i, offset=i*simd.width)) 46 | for i in range(unroll_factor): 47 | code.line("const {dvec} vb{i} = {dload}(b+{offset});" 48 | .format(dvec=simd.dvec, dload=simd._dload, i=i, offset=i*simd.width)) 49 | for i in range(unroll_factor): 50 | if fma: 51 | code.line("vsum{i} = {dfma}(va{i}, vb{i}, vsum{i});" 52 | .format(dvec=simd.dvec, dfma=simd._dfma, i=i)) 53 | else: 54 | code.line("vsum{i} = {dadd}(vsum{i}, {dmul}(va{i}, vb{i}));" 55 | .format(dvec=simd.dvec, dadd=simd._dadd, dmul=simd._dmul, i=i)) 56 | 57 | code.line("a += {elements_per_loop};".format(elements_per_loop=simd.width * unroll_factor)) 58 | code.line("b += {elements_per_loop};".format(elements_per_loop=simd.width * unroll_factor)) 59 | code.line("}") 60 | 61 | # Reduction of multiple SIMD vectors into a single SIMD vector 62 | reduction_offset = 1 63 | while reduction_offset <= unroll_factor: 64 | for i in range(0, unroll_factor - reduction_offset, 2 * reduction_offset): 65 | code.line("vsum{i} = {dadd}(vsum{i}, vsum{next_i});" 66 | .format(dadd=simd._dadd, i=i, next_i=i + reduction_offset)) 67 | reduction_offset *= 2 68 | 69 | # Reduction of a SIMD vector into a scalar 70 | assert simd.name in ["avx", "mic"] 71 | if simd.name == "avx": 72 | code.line("double sum = _mm256_reduce_add_pd(vsum0);") 73 | elif simd.name == "mic": 74 | code.line("double sum = _mm512_reduce_add_pd(vsum0);") 75 | 76 | code.line("while (n--) {") 77 | with CodeBlock() as scalar_loop: 78 | if fma: 79 | code.line("#if defined(__GNUC__)") 80 | code.indent_line("sum = __builtin_fma(*a++, *b++, sum);") 81 | code.line("#else") 82 | code.indent_line("sum = fma(*a++, *b++, sum);") 83 | code.line("#endif") 84 | else: 85 | code.line("sum += (*a++) * (*b++);") 86 | code.line("}") 87 | code.line("return sum;"); 88 | 89 | code.line("}") 90 | code.line() 91 | 92 | 93 | def generate_compensated_dot_product(code, simd, unroll_factor): 94 | code.line(""" 95 | doubledouble compensated_dot_product_efmuladd_unroll{unroll_factor}( 96 | size_t n, 97 | const double a[restrict static n], 98 | const double b[restrict static n]) 99 | {{""".format(unroll_factor=unroll_factor)) 100 | with CodeBlock(): 101 | for i in range(unroll_factor): 102 | code.line("{ddvec} vsum{i} = {ddzero}();".format(ddvec=simd.ddvec, ddzero=simd.ddzero, i=i)) 103 | code.line("for (; n>= {elements_per_loop}; n -= {elements_per_loop}) {{" 104 | .format(elements_per_loop=simd.width * unroll_factor)) 105 | with CodeBlock(): 106 | for index in range(unroll_factor): 107 | code.line("const {dvec} va{index} = {dload}(a+{offset});" 108 | .format(dvec=simd.dvec, dload=simd._dload, index=index, offset=index*simd.width)) 109 | for index in range(unroll_factor): 110 | code.line("const {dvec} vb{index} = {dload}(b+{offset});" 111 | .format(dvec=simd.dvec, dload=simd._dload, index=index, offset=index*simd.width)) 112 | for index in range(unroll_factor): 113 | code.line("{dvec} vproduct{index}_error, vsum{index}_error;" 114 | .format(dvec=simd.dvec, index=index)) 115 | for index in range(unroll_factor): 116 | code.line("const {dvec} vproduct{index} = {defmul}(va{index}, vb{index}, &vproduct{index}_error);" 117 | .format(dvec=simd.dvec, defmul=simd._defmul, index=index)) 118 | for index in range(unroll_factor): 119 | code.line("vsum{index}.hi = {defadd}(vsum{index}.hi, vproduct{index}, &vsum{index}_error);" 120 | .format(defadd=simd._defadd, index=index)) 121 | for index in range(unroll_factor): 122 | code.line("vsum{index}.lo = {dadd}(vsum{index}.lo, {dadd}(vsum{index}_error, vproduct{index}_error));" 123 | .format(dadd=simd._dadd, index=index)) 124 | code.line("a += {elements_per_loop};".format(elements_per_loop=simd.width * unroll_factor)) 125 | code.line("b += {elements_per_loop};".format(elements_per_loop=simd.width * unroll_factor)) 126 | code.line("}") 127 | 128 | # Reduction of multiple SIMD vectors into a single SIMD vector 129 | reduction_offset = 1 130 | while reduction_offset <= unroll_factor: 131 | for i in range(0, unroll_factor - reduction_offset, 2 * reduction_offset): 132 | code.line("vsum{i} = {ddadd}(vsum{i}, vsum{next_i});" 133 | .format(ddadd=simd._ddadd, i=i, next_i=i + reduction_offset)) 134 | reduction_offset *= 2 135 | 136 | # Reduction of a SIMD vector into a scalar 137 | assert simd.name in ["avx", "mic"] 138 | if simd.name == "avx": 139 | code.line("doubledouble sum = _mm256_reduce_add_pdd(vsum0);") 140 | elif simd.name == "mic": 141 | code.line("doubledouble sum = _mm512_reduce_add_pdd(vsum0);") 142 | code.line("while (n--) {") 143 | with CodeBlock(): 144 | code.line("double product_error, sum_error;") 145 | code.line("const double product = efmul(*a++, *b++, &product_error);") 146 | code.line("sum.hi = efadd(sum.hi, product, &sum_error);") 147 | code.line("sum.lo += (sum_error + product_error);") 148 | code.line("}") 149 | code.line("/* Normalize */") 150 | code.line("sum.hi = efaddord(sum.hi, sum.lo, &sum.lo);") 151 | code.line("return sum;"); 152 | 153 | code.line("}") 154 | code.line() 155 | 156 | 157 | def generate_dot_product_declaration(header, unroll_factor, implementation): 158 | header.line({ 159 | "mac": "double dot_product_muladd_unroll{unroll_factor}(size_t n, const double a[], const double b[]);", 160 | "fma": "double dot_product_fma_unroll{unroll_factor}(size_t n, const double a[], const double b[]);", 161 | "compensated": "doubledouble compensated_dot_product_efmuladd_unroll{unroll_factor}(size_t n, const double a[], const double b[]);" 162 | }[implementation].format(unroll_factor=unroll_factor)) 163 | 164 | 165 | def generate_dot_product_unittest(unittest, unroll_factor, implementation): 166 | unittest.line("""\ 167 | TEST({operation}, {implementation}_unroll{unroll_factor}) {{ 168 | DotTester() 169 | .test{test_method}({function}_unroll{unroll_factor}); 170 | }} 171 | """.format( 172 | operation="compensated_dot_product" if implementation == "compensated" else "dot_product", 173 | implementation=implementation, 174 | unroll_factor=unroll_factor, 175 | test_method="CompensatedDotProduct" if implementation == "compensated" else "DotProduct", 176 | function={ 177 | "mac": "dot_product_muladd", 178 | "fma": "dot_product_fma", 179 | "compensated": "compensated_dot_product_efmuladd" 180 | }[implementation])) 181 | 182 | 183 | def main(): 184 | options = parser.parse_args() 185 | 186 | from simd import SimdOperations 187 | with CodeWriter() as implementation: 188 | implementation.line("""\ 189 | #include 190 | 191 | #include 192 | """) 193 | 194 | simd = SimdOperations(options.simd) 195 | if simd.name == "avx": 196 | implementation.line(""" 197 | FPPLUS_STATIC_INLINE double _mm_reduce_add_pd(const __m128d x) { 198 | const __m128d x_hi = _mm_unpackhi_pd(x, x); 199 | const __m128d sum = _mm_add_sd(x, x_hi); 200 | return _mm_cvtsd_f64(sum); 201 | } 202 | 203 | FPPLUS_STATIC_INLINE double _mm256_reduce_add_pd(const __m256d x) { 204 | const __m128d x_lo = _mm256_castpd256_pd128(x); 205 | const __m128d x_hi = _mm256_extractf128_pd(x, 1); 206 | return _mm_reduce_add_pd(_mm_add_pd(x_lo, x_hi)); 207 | } 208 | """) 209 | 210 | for unroll_factor in range(options.unroll_min, options.unroll_max + 1): 211 | generate_dot_product(implementation, simd, unroll_factor, fma=False) 212 | 213 | for unroll_factor in range(options.unroll_min, options.unroll_max + 1): 214 | generate_dot_product(implementation, simd, unroll_factor, fma=True) 215 | 216 | for unroll_factor in range(options.unroll_min, options.unroll_max + 1): 217 | generate_compensated_dot_product(implementation, simd, unroll_factor) 218 | 219 | with CodeWriter() as header: 220 | header.line("""\ 221 | #pragma once 222 | 223 | #ifdef __cplusplus 224 | extern "C" { 225 | #endif 226 | 227 | #include 228 | 229 | #include 230 | 231 | typedef double (*dot_product_function)(size_t, const double*, const double*); 232 | typedef doubledouble (*compensated_dot_product_function)(size_t, const double*, const double*); 233 | """) 234 | 235 | header.line("/* Dot product based on multiplication and addition (with intermediate rounding) */") 236 | for unroll_factor in range(options.unroll_min, options.unroll_max + 1): 237 | generate_dot_product_declaration(header, unroll_factor, "mac") 238 | header.line() 239 | 240 | header.line("/* Dot product based on fused multiply-add */") 241 | for unroll_factor in range(options.unroll_min, options.unroll_max + 1): 242 | generate_dot_product_declaration(header, unroll_factor, "fma") 243 | header.line() 244 | 245 | header.line("/* compensated dot product based on error-free multiplication and error-free addition */") 246 | for unroll_factor in range(options.unroll_min, options.unroll_max + 1): 247 | generate_dot_product_declaration(header, unroll_factor, "compensated") 248 | 249 | header.line(""" 250 | #ifdef __cplusplus 251 | } /* extern "C" */ 252 | #endif""") 253 | 254 | with CodeWriter() as unittest: 255 | unittest.line("""\ 256 | #include 257 | #include 258 | 259 | #include 260 | 261 | #include 262 | 263 | #include "dot-tester.h" 264 | 265 | """) 266 | 267 | unittest.line("/* Dot product based on multiplication and addition (with intermediate rounding) */") 268 | for unroll_factor in range(options.unroll_min, options.unroll_max + 1): 269 | generate_dot_product_unittest(unittest, unroll_factor, "mac") 270 | unittest.line() 271 | 272 | unittest.line("/* Dot product based on fused multiply-add */") 273 | for unroll_factor in range(options.unroll_min, options.unroll_max + 1): 274 | generate_dot_product_unittest(unittest, unroll_factor, "fma") 275 | unittest.line() 276 | 277 | unittest.line("/* compensated dot product based on error-free multiplication and error-free addition */") 278 | for unroll_factor in range(options.unroll_min, options.unroll_max + 1): 279 | generate_dot_product_unittest(unittest, unroll_factor, "compensated") 280 | 281 | unittest.line("""\ 282 | int main(int argc, char* argv[]) { 283 | testing::InitGoogleTest(&argc, argv); 284 | return RUN_ALL_TESTS(); 285 | } 286 | """) 287 | 288 | 289 | with open(options.implementation, "w") as implementation_file: 290 | implementation_file.write(str(implementation)) 291 | 292 | with open(options.header, "w") as header_file: 293 | header_file.write(str(header)) 294 | 295 | with open(options.unittest, "w") as unittest_file: 296 | unittest_file.write(str(unittest)) 297 | 298 | 299 | if __name__ == "__main__": 300 | sys.exit(main()) 301 | -------------------------------------------------------------------------------- /src/dot/options.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | 8 | static void print_options_help(const char* program_name) { 9 | printf( 10 | "%s -s array-size [-i iterations]\n" 11 | "Required parameters:\n" 12 | " -s --array-size The size of array, in bytes, processed in micro-kernel (usually half or level-n cache size)\n" 13 | "Optional parameters:\n" 14 | " -i --iterations The number of benchmark iterations (default: 1000)\n", 15 | program_name); 16 | } 17 | 18 | struct benchmark_options parse_options(int argc, char** argv) { 19 | struct benchmark_options options = { 20 | .iterations = 1000, 21 | .array_size = 0, 22 | }; 23 | for (int argi = 1; argi < argc; argi += 1) { 24 | if ((strcmp(argv[argi], "--array-size") == 0) || (strcmp(argv[argi], "-s") == 0)) { 25 | if (argi + 1 == argc) { 26 | fprintf(stderr, "Error: expected array size value\n"); 27 | exit(EXIT_FAILURE); 28 | } 29 | if (sscanf(argv[argi + 1], "%zu", &options.array_size) != 1) { 30 | fprintf(stderr, "Error: can not parse %s as an unsigned integer\n", argv[argi + 1]); 31 | exit(EXIT_FAILURE); 32 | } 33 | if (options.array_size == 0) { 34 | fprintf(stderr, "Error: invalid value %s for the array size: positive value expected\n", argv[argi + 1]); 35 | exit(EXIT_FAILURE); 36 | } 37 | argi += 1; 38 | } else if ((strcmp(argv[argi], "--iterations") == 0) || (strcmp(argv[argi], "-i") == 0)) { 39 | if (argi + 1 == argc) { 40 | fprintf(stderr, "Error: expected iterations value\n"); 41 | exit(EXIT_FAILURE); 42 | } 43 | if (sscanf(argv[argi + 1], "%zu", &options.iterations) != 1) { 44 | fprintf(stderr, "Error: can not parse %s as an unsigned integer\n", argv[argi + 1]); 45 | exit(EXIT_FAILURE); 46 | } 47 | if (options.iterations == 0) { 48 | fprintf(stderr, "Error: invalid value %s for the number of iterations: positive value expected\n", argv[argi + 1]); 49 | exit(EXIT_FAILURE); 50 | } 51 | argi += 1; 52 | } else if ((strcmp(argv[argi], "--help") == 0) || (strcmp(argv[argi], "-h") == 0)) { 53 | print_options_help(argv[0]); 54 | exit(EXIT_SUCCESS); 55 | } else { 56 | fprintf(stderr, "Error: unknown argument '%s'\n", argv[argi]); 57 | print_options_help(argv[0]); 58 | exit(EXIT_FAILURE); 59 | } 60 | } 61 | if (options.array_size == 0) { 62 | fprintf(stderr, "Error: the block size is not specified\n"); 63 | print_options_help(argv[0]); 64 | exit(EXIT_FAILURE); 65 | } 66 | return options; 67 | } 68 | -------------------------------------------------------------------------------- /src/low-level/benchmark.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #ifndef __KNC__ 13 | #define TUPLE doubledouble 14 | #define ELEMENTS_PER_TUPLE 1 15 | #else 16 | #define TUPLE __m512dd 17 | #define ELEMENTS_PER_TUPLE (sizeof(__m512dd) / sizeof(doubledouble)) 18 | #endif 19 | 20 | static void warmup(size_t elements, const doubledouble array[restrict static elements / ELEMENTS_PER_TUPLE]) { 21 | double start_ns = high_precision_time(); 22 | const size_t warmup_iterations = 1024; 23 | for (size_t iteration = 0; iteration < warmup_iterations; iteration++) { 24 | vsum(elements / ELEMENTS_PER_TUPLE, (const TUPLE*) array); 25 | /* Stop if warmup for over 1 second */ 26 | if (high_precision_time() - start_ns >= 1.0e+9) 27 | break; 28 | } 29 | } 30 | 31 | #ifdef FPPLUS_HAVE_FLOAT128 32 | static void benchmark_quad( 33 | benchmark_quad_function function, const char* operation_name, 34 | size_t iterations, size_t elements, __float128 array[restrict static elements]) 35 | { 36 | uint64_t min_ticks = UINT64_MAX; 37 | for (size_t iteration = 0; iteration < iterations; iteration++) { 38 | const uint64_t start_ticks = cpu_ticks(); 39 | function(elements, array); 40 | const uint64_t total_ticks = cpu_ticks() - start_ticks; 41 | if (total_ticks < min_ticks) 42 | min_ticks = total_ticks; 43 | } 44 | printf("%s\t" "%10zu\t" "%.2lf\n", operation_name, elements, ((double) min_ticks) / ((double) elements)); 45 | } 46 | #endif 47 | 48 | static void benchmark_doubledouble( 49 | benchmark_doubledouble_function function, const char* operation_name, 50 | size_t iterations, size_t elements, 51 | TUPLE array[restrict static elements / ELEMENTS_PER_TUPLE]) 52 | { 53 | uint64_t min_ticks = UINT64_MAX; 54 | for (size_t iteration = 0; iteration < iterations; iteration++) { 55 | const uint64_t start_ticks = cpu_ticks(); 56 | function(elements / ELEMENTS_PER_TUPLE, (TUPLE*) array); 57 | const uint64_t total_ticks = cpu_ticks() - start_ticks; 58 | if (total_ticks < min_ticks) 59 | min_ticks = total_ticks; 60 | } 61 | printf("%s\t" "%10zu\t" "%.2lf\n", operation_name, elements, ((double) min_ticks) / ((double) (elements / ELEMENTS_PER_TUPLE))); 62 | } 63 | 64 | static void benchmark_polevl( 65 | benchmark_polevl_function function, const char* operation_name, size_t iterations, size_t repeats) 66 | { 67 | uint64_t min_ticks = UINT64_MAX; 68 | for (size_t iteration = 0; iteration < iterations; iteration++) { 69 | const uint64_t start_ticks = cpu_ticks(); 70 | #ifndef __KNC__ 71 | function(M_PI, repeats); 72 | #else 73 | function(_mm512_set1_pd(M_PI), repeats); 74 | #endif 75 | const uint64_t total_ticks = cpu_ticks() - start_ticks; 76 | if (total_ticks < min_ticks) 77 | min_ticks = total_ticks; 78 | } 79 | printf("%s\t" "%10zu\t" "%.2lf\n", operation_name, repeats, ((double) min_ticks) / ((double) repeats)); 80 | } 81 | 82 | int main(int argc, char *argv[]) { 83 | const struct benchmark_options options = parse_options(argc, argv); 84 | 85 | const size_t array_size = options.repeats * 16; 86 | void* v_array = NULL; 87 | switch (options.type) { 88 | case benchmark_type_doubledouble_latency: 89 | case benchmark_type_doubledouble_throughput: 90 | #ifdef FPPLUS_HAVE_FLOAT128 91 | case benchmark_type_quad_latency: 92 | #endif 93 | v_array = valloc(array_size); 94 | for (doubledouble* dd_array = v_array; dd_array != v_array + array_size; dd_array++) { 95 | *dd_array = (doubledouble) { 1.0, 0.0 }; 96 | } 97 | warmup(options.repeats, v_array); 98 | break; 99 | case benchmark_type_polevl_latency: 100 | break; 101 | case benchmark_type_none: 102 | __builtin_unreachable(); 103 | } 104 | 105 | switch (options.type) { 106 | case benchmark_type_doubledouble_latency: 107 | benchmark_doubledouble((benchmark_doubledouble_function) vsum, 108 | "DDADD\tLatency", options.iterations, options.repeats, v_array); 109 | benchmark_doubledouble((benchmark_doubledouble_function) vprod, 110 | "DDMUL\tLatency", options.iterations, options.repeats, v_array); 111 | break; 112 | case benchmark_type_doubledouble_throughput: 113 | benchmark_doubledouble(vaddc_helper, 114 | "DDADD\tThroughput", options.iterations, options.repeats, v_array); 115 | benchmark_doubledouble(vmulc_helper, 116 | "DDMUL\tThroughput", options.iterations, options.repeats, v_array); 117 | break; 118 | #ifdef FPPLUS_HAVE_FLOAT128 119 | case benchmark_type_quad_latency: 120 | for (__float128* q_array = v_array; q_array != v_array + array_size; q_array++) { 121 | *q_array = 1.235412347891354098213343278Q; 122 | } 123 | benchmark_quad(qsum, 124 | "QADD\tLatency", options.iterations, options.repeats, v_array); 125 | benchmark_quad(qprod, 126 | "QMUL\tLatency", options.iterations, options.repeats, v_array); 127 | break; 128 | #endif 129 | case benchmark_type_polevl_latency: 130 | benchmark_polevl(benchmark_compensated_horner15, "HORNER/COMP\tLatency", options.iterations, options.repeats); 131 | benchmark_polevl(benchmark_fma_horner15, "HORNER/FMA\tLatency", options.iterations, options.repeats); 132 | benchmark_polevl(benchmark_muladd_horner15, "HORNER/MAC\tLatency", options.iterations, options.repeats); 133 | break; 134 | case benchmark_type_none: 135 | __builtin_unreachable(); 136 | } 137 | free(v_array); 138 | } 139 | -------------------------------------------------------------------------------- /src/low-level/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #include 8 | #include 9 | #include 10 | #ifdef FPPLUS_HAVE_FLOAT128 11 | #include 12 | #endif 13 | 14 | 15 | enum benchmark_type { 16 | benchmark_type_none = 0, 17 | benchmark_type_doubledouble_latency, 18 | benchmark_type_doubledouble_throughput, 19 | #ifdef FPPLUS_HAVE_FLOAT128 20 | benchmark_type_quad_latency, 21 | #endif 22 | benchmark_type_polevl_latency, 23 | }; 24 | 25 | struct benchmark_options { 26 | enum benchmark_type type; 27 | size_t iterations; 28 | size_t repeats; 29 | }; 30 | 31 | struct benchmark_options parse_options(int argc, char** argv); 32 | 33 | 34 | /* Benchmarks of double-double precision operations */ 35 | #ifndef __KNC__ 36 | typedef doubledouble (*benchmark_doubledouble_function)(size_t, doubledouble*restrict); 37 | 38 | doubledouble vsum(size_t array_elements, const doubledouble array[restrict static array_elements]); 39 | doubledouble vprod(size_t array_elements, const doubledouble array[restrict static array_elements]); 40 | void vaddc(size_t augend_elements, doubledouble augend[restrict static augend_elements], const doubledouble addend); 41 | void vmulc(size_t multiplicand_elements, doubledouble multiplicand[restrict static multiplicand_elements], const doubledouble multiplier); 42 | 43 | inline static doubledouble vaddc_helper(size_t array_elements, doubledouble array[restrict static array_elements]) { 44 | vaddc(array_elements, array, (doubledouble) { M_E, M_PI }); 45 | return (doubledouble) { 0.0, 0.0 }; 46 | } 47 | 48 | inline static doubledouble vmulc_helper(size_t array_elements, doubledouble array[restrict static array_elements]) { 49 | vmulc(array_elements, array, (doubledouble) { M_E, M_PI }); 50 | return (doubledouble) { 0.0, 0.0 }; 51 | } 52 | #else 53 | typedef __m512dd (*benchmark_doubledouble_function)(size_t, __m512dd*restrict); 54 | 55 | __m512dd vsum(size_t array_elements, const __m512dd array[restrict static array_elements]); 56 | __m512dd vprod(size_t array_elements, const __m512dd array[restrict static array_elements]); 57 | void vaddc(size_t augend_elements, __m512dd augend[restrict static augend_elements], const __m512dd addend); 58 | void vmulc(size_t multiplicand_elements, __m512dd multiplicand[restrict static multiplicand_elements], const __m512dd multiplier); 59 | 60 | inline static __m512dd vaddc_helper(size_t array_elements, __m512dd array[restrict static array_elements]) { 61 | vaddc(array_elements, array, (__m512dd) { _mm512_set1_pd(M_E), _mm512_set1_pd(M_PI) }); 62 | return _mm512_setzero_pdd(); 63 | } 64 | 65 | inline static __m512dd vmulc_helper(size_t array_elements, __m512dd array[restrict static array_elements]) { 66 | vmulc(array_elements, array, (__m512dd) { _mm512_set1_pd(M_E), _mm512_set1_pd(M_PI) }); 67 | return _mm512_setzero_pdd(); 68 | } 69 | #endif 70 | 71 | #ifdef FPPLUS_HAVE_FLOAT128 72 | /* Benchmarks of quad-precision operations */ 73 | typedef __float128 (*benchmark_quad_function)(size_t, const __float128*restrict); 74 | 75 | __float128 qsum(size_t array_elements, const __float128 array[restrict static array_elements]); 76 | __float128 qprod(size_t array_elements, const __float128 array[restrict static array_elements]); 77 | #endif 78 | 79 | 80 | /* Benchmarks of polynomial evaluation latency */ 81 | #ifndef __KNC__ 82 | typedef double (*benchmark_polevl_function)(double, size_t); 83 | 84 | double benchmark_compensated_horner15(double x, size_t iterations); 85 | double benchmark_muladd_horner15(double x, size_t iterations); 86 | double benchmark_fma_horner15(double x, size_t iterations); 87 | #else 88 | typedef __m512d (*benchmark_polevl_function)(__m512d, size_t); 89 | 90 | __m512d benchmark_compensated_horner15(__m512d x, size_t iterations); 91 | __m512d benchmark_muladd_horner15(__m512d x, size_t iterations); 92 | __m512d benchmark_fma_horner15(__m512d x, size_t iterations); 93 | #endif 94 | 95 | #ifdef __cplusplus 96 | } /* extern "C" */ 97 | #endif 98 | -------------------------------------------------------------------------------- /src/low-level/doubledouble.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | /* 6 | * Benchmarks of double-double precision operations 7 | * Note: on Xeon Phi scalar operations require setting up mask register, which may affect performance, 8 | * so we have special versions for Xeon Phi which operate on whole SIMD vectors. 9 | */ 10 | 11 | /* Chained sum of array elements - benchmark for addition latency */ 12 | #ifndef __KNC__ 13 | doubledouble vsum(size_t array_elements, const doubledouble array[restrict static array_elements]) { 14 | doubledouble sum = { 0.0, 0.0 }; 15 | do { 16 | sum = ddadd(sum, *array++); 17 | } while (--array_elements); 18 | return sum; 19 | } 20 | #else 21 | __m512dd vsum(size_t array_elements, const __m512dd array[restrict static array_elements]) { 22 | __m512dd sum = _mm512_setzero_pdd(); 23 | do { 24 | sum = _mm512_add_pdd(sum, *array++); 25 | } while (--array_elements); 26 | return sum; 27 | } 28 | #endif 29 | 30 | /* Chained product of array elements - benchmark for multiplication latency */ 31 | #ifndef __KNC__ 32 | doubledouble vprod(size_t array_elements, const doubledouble array[restrict static array_elements]) { 33 | doubledouble prod = { 1.0, 0.0 }; 34 | do { 35 | prod = ddmul(prod, *array++); 36 | } while (--array_elements); 37 | return prod; 38 | } 39 | #else 40 | __m512dd vprod(size_t array_elements, const __m512dd array[restrict static array_elements]) { 41 | __m512dd prod = _mm512_setzero_pdd(); 42 | do { 43 | prod = _mm512_mul_pdd(prod, *array++); 44 | } while (--array_elements); 45 | return prod; 46 | } 47 | #endif 48 | 49 | /* Addition of a constant to an array - benchmark for addition throughput */ 50 | #ifndef __KNC__ 51 | void vaddc(size_t augend_elements, doubledouble augend[restrict static augend_elements], const doubledouble addend) { 52 | for (size_t i = 0; i < augend_elements; i++) { 53 | augend[i] = ddadd(augend[i], addend); 54 | } 55 | } 56 | #else 57 | void vaddc(size_t augend_elements, __m512dd augend[restrict static augend_elements], const __m512dd addend) { 58 | /* Xeon Phi is in-order, so it needs explicitly unrolled loop to extract ILP */ 59 | for (size_t i = 0; i < augend_elements; i += 2) { 60 | augend[i] = _mm512_add_pdd(augend[i], addend); 61 | augend[i+1] = _mm512_add_pdd(augend[i+1], addend); 62 | } 63 | } 64 | #endif 65 | 66 | /* Multiplication of an array by a constant - benchmark for multiplication throughput */ 67 | #ifndef __KNC__ 68 | void vmulc(size_t multiplicand_elements, doubledouble multiplicand[restrict static multiplicand_elements], const doubledouble multiplier) { 69 | for (size_t i = 0; i < multiplicand_elements; i++) { 70 | multiplicand[i] = ddmul(multiplicand[i], multiplier); 71 | } 72 | } 73 | #else 74 | void vmulc(size_t multiplicand_elements, __m512dd multiplicand[restrict static multiplicand_elements], const __m512dd multiplier) { 75 | /* Xeon Phi is in-order, so it needs explicitly unrolled loop to extract ILP */ 76 | for (size_t i = 0; i < multiplicand_elements; i += 2) { 77 | multiplicand[i] = _mm512_mul_pdd(multiplicand[i], multiplier); 78 | multiplicand[i+1] = _mm512_mul_pdd(multiplicand[i+1], multiplier); 79 | } 80 | } 81 | #endif 82 | -------------------------------------------------------------------------------- /src/low-level/options.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | 8 | static void print_options_help(const char* program_name) { 9 | printf( 10 | "%s -t type [-i iterations] [-r repeats]\n" 11 | "Required parameters:\n" 12 | " -t --type The type of benchmark:\n" 13 | " doubledouble-latency\n" 14 | " doubledouble-throughput\n" 15 | #ifdef FPPLUS_HAVE_FLOAT128 16 | " quad-latency\n" 17 | #endif 18 | " polevl-latency\n" 19 | "Optional parameters:\n" 20 | " -i --iterations The number of benchmark iterations (default: 1000)\n" 21 | " -r --repeats The number of repeats within the benchmark iteration (default: 1024)\n", 22 | program_name); 23 | } 24 | 25 | struct benchmark_options parse_options(int argc, char** argv) { 26 | struct benchmark_options options = { 27 | .type = benchmark_type_none, 28 | .iterations = 1000, 29 | .repeats = 1024, 30 | }; 31 | for (int argi = 1; argi < argc; argi += 1) { 32 | if ((strcmp(argv[argi], "--type") == 0) || (strcmp(argv[argi], "-t") == 0)) { 33 | if (argi + 1 == argc) { 34 | fprintf(stderr, "Error: expected benchmark type\n"); 35 | exit(EXIT_FAILURE); 36 | } 37 | if (strcmp(argv[argi + 1], "doubledouble-latency") == 0) { 38 | options.type = benchmark_type_doubledouble_latency; 39 | } else if (strcmp(argv[argi + 1], "doubledouble-throughput") == 0) { 40 | options.type = benchmark_type_doubledouble_throughput; 41 | #ifdef FPPLUS_HAVE_FLOAT128 42 | } else if (strcmp(argv[argi + 1], "quad-latency") == 0) { 43 | options.type = benchmark_type_quad_latency; 44 | #endif 45 | } else if (strcmp(argv[argi + 1], "polevl-latency") == 0) { 46 | options.type = benchmark_type_polevl_latency; 47 | } else { 48 | fprintf(stderr, "Error: invalid benchmark type %s\n", argv[argi + 1]); 49 | exit(EXIT_FAILURE); 50 | } 51 | argi += 1; 52 | } else if ((strcmp(argv[argi], "--repeats") == 0) || (strcmp(argv[argi], "-r") == 0)) { 53 | if (argi + 1 == argc) { 54 | fprintf(stderr, "Error: expected repeats value\n"); 55 | exit(EXIT_FAILURE); 56 | } 57 | if (sscanf(argv[argi + 1], "%zu", &options.repeats) != 1) { 58 | fprintf(stderr, "Error: can not parse %s as an unsigned integer\n", argv[argi + 1]); 59 | exit(EXIT_FAILURE); 60 | } 61 | if (options.repeats == 0) { 62 | fprintf(stderr, "Error: invalid value %s for the array repeats: positive value expected\n", argv[argi + 1]); 63 | exit(EXIT_FAILURE); 64 | } 65 | argi += 1; 66 | } else if ((strcmp(argv[argi], "--iterations") == 0) || (strcmp(argv[argi], "-i") == 0)) { 67 | if (argi + 1 == argc) { 68 | fprintf(stderr, "Error: expected iterations value\n"); 69 | exit(EXIT_FAILURE); 70 | } 71 | if (sscanf(argv[argi + 1], "%zu", &options.iterations) != 1) { 72 | fprintf(stderr, "Error: can not parse %s as an unsigned integer\n", argv[argi + 1]); 73 | exit(EXIT_FAILURE); 74 | } 75 | if (options.iterations == 0) { 76 | fprintf(stderr, "Error: invalid value %s for the number of iterations: positive value expected\n", argv[argi + 1]); 77 | exit(EXIT_FAILURE); 78 | } 79 | argi += 1; 80 | } else if ((strcmp(argv[argi], "--help") == 0) || (strcmp(argv[argi], "-h") == 0)) { 81 | print_options_help(argv[0]); 82 | exit(EXIT_SUCCESS); 83 | } else { 84 | fprintf(stderr, "Error: unknown argument '%s'\n", argv[argi]); 85 | print_options_help(argv[0]); 86 | exit(EXIT_FAILURE); 87 | } 88 | } 89 | if (options.type == 0) { 90 | fprintf(stderr, "Error: the benchmark type is not specified\n"); 91 | print_options_help(argv[0]); 92 | exit(EXIT_FAILURE); 93 | } 94 | return options; 95 | } 96 | -------------------------------------------------------------------------------- /src/low-level/polevl.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | /* Benchmarks for latency of polinomial evalution with Horner scheme */ 6 | 7 | /* Polynomial evaluation with compensated Horner scheme */ 8 | #ifndef __KNC__ 9 | double benchmark_compensated_horner15(double x, size_t iterations) { 10 | #else 11 | __m512d benchmark_compensated_horner15(__m512d x, size_t iterations) { 12 | #endif 13 | const double c0 = 0x1.78f187ab028a6p-1; 14 | const double c1 = 0x1.3f5db1c895000p-11; 15 | const double c2 = 0x1.7a26b65c2b4f0p-3; 16 | const double c3 = 0x1.bf60f17a47170p-3; 17 | const double c4 = 0x1.9aab2397bc0cdp-1; 18 | const double c5 = 0x1.e29e3de72e176p-2; 19 | const double c6 = 0x1.ecbb9a473c660p-5; 20 | const double c7 = 0x1.697d6c1218d5ep-1; 21 | const double c8 = 0x1.dd0cb5bd6c8c6p-2; 22 | const double c9 = 0x1.335b4defa4ac0p-7; 23 | const double c10 = 0x1.7bb63e1392fe5p-1; 24 | const double c11 = 0x1.03785a13a5632p-1; 25 | const double c12 = 0x1.ab7eb01482804p-2; 26 | const double c13 = 0x1.f867317158ce0p-3; 27 | const double c14 = 0x1.2fb1b3016c6e2p-2; 28 | const double c15 = 0x1.cda91c1ea93d0p-3; 29 | do { 30 | #ifndef __KNC__ 31 | x = complensated_horner15(x, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15); 32 | #else 33 | x = _mm512_comp_horner15_pd(x, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15); 34 | #endif 35 | } while (--iterations); 36 | return x; 37 | } 38 | 39 | /* Polynomial evaluation with Horner scheme with multiplication and addition involving intermediate rounding */ 40 | #ifndef __KNC__ 41 | double benchmark_muladd_horner15(double x, size_t iterations) { 42 | #else 43 | __m512d benchmark_muladd_horner15(__m512d x, size_t iterations) { 44 | #endif 45 | const double c0 = 0x1.78f187ab028a6p-1; 46 | const double c1 = 0x1.3f5db1c895000p-11; 47 | const double c2 = 0x1.7a26b65c2b4f0p-3; 48 | const double c3 = 0x1.bf60f17a47170p-3; 49 | const double c4 = 0x1.9aab2397bc0cdp-1; 50 | const double c5 = 0x1.e29e3de72e176p-2; 51 | const double c6 = 0x1.ecbb9a473c660p-5; 52 | const double c7 = 0x1.697d6c1218d5ep-1; 53 | const double c8 = 0x1.dd0cb5bd6c8c6p-2; 54 | const double c9 = 0x1.335b4defa4ac0p-7; 55 | const double c10 = 0x1.7bb63e1392fe5p-1; 56 | const double c11 = 0x1.03785a13a5632p-1; 57 | const double c12 = 0x1.ab7eb01482804p-2; 58 | const double c13 = 0x1.f867317158ce0p-3; 59 | const double c14 = 0x1.2fb1b3016c6e2p-2; 60 | const double c15 = 0x1.cda91c1ea93d0p-3; 61 | do { 62 | #ifndef __KNC__ 63 | x = muladd_horner15(x, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15); 64 | #else 65 | x = _mm512_muladd_horner15_pd(x, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15); 66 | #endif 67 | } while (--iterations); 68 | return x; 69 | } 70 | 71 | /* Polynomial evaluation with Horner scheme with fused multiply-add */ 72 | #ifndef __KNC__ 73 | double benchmark_fma_horner15(double x, size_t iterations) { 74 | #else 75 | __m512d benchmark_fma_horner15(__m512d x, size_t iterations) { 76 | #endif 77 | const double c0 = 0x1.78f187ab028a6p-1; 78 | const double c1 = 0x1.3f5db1c895000p-11; 79 | const double c2 = 0x1.7a26b65c2b4f0p-3; 80 | const double c3 = 0x1.bf60f17a47170p-3; 81 | const double c4 = 0x1.9aab2397bc0cdp-1; 82 | const double c5 = 0x1.e29e3de72e176p-2; 83 | const double c6 = 0x1.ecbb9a473c660p-5; 84 | const double c7 = 0x1.697d6c1218d5ep-1; 85 | const double c8 = 0x1.dd0cb5bd6c8c6p-2; 86 | const double c9 = 0x1.335b4defa4ac0p-7; 87 | const double c10 = 0x1.7bb63e1392fe5p-1; 88 | const double c11 = 0x1.03785a13a5632p-1; 89 | const double c12 = 0x1.ab7eb01482804p-2; 90 | const double c13 = 0x1.f867317158ce0p-3; 91 | const double c14 = 0x1.2fb1b3016c6e2p-2; 92 | const double c15 = 0x1.cda91c1ea93d0p-3; 93 | do { 94 | #ifndef __KNC__ 95 | x = fma_horner15(x, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15); 96 | #else 97 | x = _mm512_fma_horner15_pd(x, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15); 98 | #endif 99 | } while (--iterations); 100 | return x; 101 | } 102 | -------------------------------------------------------------------------------- /src/low-level/quad.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | * Benchmarks of quad-precision operations 5 | */ 6 | 7 | 8 | /* Chained sum of array elements - benchmark for addition latency */ 9 | __float128 qsum(size_t array_elements, const __float128 array[restrict static array_elements]) { 10 | __float128 sum = 0.0Q; 11 | do { 12 | sum += (*array++); 13 | } while (--array_elements); 14 | return sum; 15 | } 16 | 17 | /* Chained product of array elements - benchmark for multiplication latency */ 18 | __float128 qprod(size_t array_elements, const __float128 array[restrict static array_elements]) { 19 | __float128 prod = 1.0Q; 20 | do { 21 | prod *= (*array++); 22 | } while (--array_elements); 23 | return prod; 24 | } 25 | -------------------------------------------------------------------------------- /src/simd.py: -------------------------------------------------------------------------------- 1 | class SimdOperations: 2 | def __init__(self, simd): 3 | assert simd in ["avx", "mic"] 4 | self.name = simd 5 | self.width = {"avx": 4, "mic": 8}[simd] 6 | self.regs = {"avx": 16, "mic": 32}[simd] 7 | self.dvec = {"avx": "__m256d", "mic": "__m512d"}[simd] 8 | self.ddvec = {"avx": "__m256dd", "mic": "__m512dd"}[simd] 9 | self._dzero = {"avx": "_mm256_setzero_pd", "mic": "_mm512_setzero_pd"}[simd] 10 | self._dload = {"avx": "_mm256_load_pd", "mic": "_mm512_load_pd"}[simd] 11 | self._dadd = {"avx": "_mm256_add_pd", "mic": "_mm512_add_pd"}[simd] 12 | self._dmul = {"avx": "_mm256_mul_pd", "mic": "_mm512_mul_pd"}[simd] 13 | self._dfma = {"avx": "_mm256_fmadd_pd", "mic": "_mm512_fmadd_pd"}[simd] 14 | self.ddzero = {"avx": "_mm256_setzero_pdd", "mic": "_mm512_setzero_pdd"}[simd] 15 | self._ddadd = {"avx": "_mm256_add_pdd", "mic": "_mm512_add_pdd"}[simd] 16 | self._ddmul = {"avx": "_mm256_mul_pdd", "mic": "_mm512_mul_pdd"}[simd] 17 | self._defadd = {"avx": "_mm256_efadd_pd", "mic": "_mm512_efadd_pd"}[simd] 18 | self._defmul = {"avx": "_mm256_efmul_pd", "mic": "_mm512_efmul_pd"}[simd] 19 | self.ddloaddeinterleave = {"avx": "_mm256_loaddeinterleave_pdd", "mic": "_mm512_loaddeinterleave_pdd"}[simd] 20 | self.ddloadudeinterleave = {"avx": "_mm256_loadudeinterleave_pdd", "mic": "_mm512_loadudeinterleave_pdd"}[simd] 21 | self.ddinterleavestore = {"avx": "_mm256_interleavestore_pdd", "mic": "_mm512_interleavestore_pdd"}[simd] 22 | self.ddinterleavestoreu = {"avx": "_mm256_interleavestoreu_pdd", "mic": "_mm512_interleavestoreu_pdd"}[simd] 23 | self.ddbroadcast = {"avx": "_mm256_broadcast_sdd", "mic": "_mm512_broadcast_sdd"}[simd] 24 | 25 | 26 | def dzero(self): 27 | return self._dzero + "()" 28 | 29 | def dload(self, addr): 30 | return self._dload + "(" + str(addr) + ")" 31 | 32 | def ddadd(self, a, b): 33 | return self._ddadd + "(" + str(a) + ", " + str(b) + ")" 34 | 35 | def ddmul(self, a, b): 36 | return self._ddmul + "(" + str(a) + ", " + str(b) + ")" 37 | -------------------------------------------------------------------------------- /src/utils.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | static int compare_double(const void *a_ptr, const void *b_ptr) { 6 | const double a = *((const double*) a_ptr); 7 | const double b = *((const double*) b_ptr); 8 | return (a > b) - (a < b); 9 | } 10 | 11 | static int compare_uint64(const void *a_ptr, const void *b_ptr) { 12 | const uint64_t a = *((const uint64_t*) a_ptr); 13 | const uint64_t b = *((const uint64_t*) b_ptr); 14 | return (a > b) - (a < b); 15 | } 16 | 17 | double median_double(double array[], size_t length) { 18 | qsort(array, length, sizeof(double), &compare_double); 19 | if (length % 2 == 0) { 20 | const double median_lo = array[length / 2 - 1]; 21 | const double median_hi = array[length / 2]; 22 | return 0.5 * (median_lo + median_hi); 23 | } else { 24 | return array[length / 2]; 25 | } 26 | } 27 | 28 | uint64_t median_uint64(uint64_t array[], size_t length) { 29 | qsort(array, length, sizeof(uint64_t), &compare_uint64); 30 | if (length % 2 == 0) { 31 | const uint64_t median_lo = array[length / 2 - 1]; 32 | const uint64_t median_hi = array[length / 2]; 33 | return (median_lo + median_hi + 1) / 2; 34 | } else { 35 | return array[length / 2]; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #if defined(__APPLE__) && defined(__MACH__) 6 | #include 7 | #include 8 | #endif 9 | 10 | #if defined(__linux__) 11 | #include 12 | #endif 13 | 14 | #include 15 | 16 | /** 17 | * @brief Returns time in nanoseconds 18 | */ 19 | FPPLUS_STATIC_INLINE double high_precision_time() { 20 | #if defined(__APPLE__) && defined(__MACH__) 21 | uint64_t ticks = mach_absolute_time(); 22 | mach_timebase_info_data_t timebase_info; 23 | mach_timebase_info(&timebase_info); 24 | return ((double) ticks) * (((double) timebase_info.numer) / ((double) timebase_info.denom)); 25 | #elif defined(__linux__) 26 | struct timespec timespec; 27 | clock_gettime(CLOCK_MONOTONIC_RAW, ×pec); 28 | return (double) (timespec.tv_sec * 1000000000ll + timespec.tv_nsec); 29 | #else 30 | #error Not implemented 31 | #endif 32 | } 33 | 34 | /** 35 | * @brief Returns the CPU timestamp counter value 36 | */ 37 | FPPLUS_STATIC_INLINE uint64_t cpu_ticks() { 38 | register uint32_t counter_lo, counter_hi; 39 | #ifndef __KNC__ 40 | asm volatile ( 41 | "RDTSCP;" 42 | : "=a" (counter_lo), "=d" (counter_hi) 43 | : 44 | : "rcx" 45 | ); 46 | #else 47 | asm volatile ( 48 | "RDTSC;" 49 | : "=a" (counter_lo), "=d" (counter_hi) 50 | : 51 | : "rcx" 52 | ); 53 | #endif 54 | return (((uint64_t) counter_hi) << 32) | counter_lo; 55 | } 56 | 57 | double median_double(double array[], size_t length); 58 | uint64_t median_uint64(uint64_t array[], size_t length); 59 | -------------------------------------------------------------------------------- /test/ddgemm-tester.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | 23 | 24 | template 25 | class DDGEMMTester { 26 | public: 27 | DDGEMMTester() : 28 | errorLimit_(1.0e-30) 29 | { 30 | } 31 | 32 | DDGEMMTester(const DDGEMMTester&) = delete; 33 | 34 | DDGEMMTester& operator=(const DDGEMMTester&) = delete; 35 | 36 | DDGEMMTester& errorLimit(double errorLimit) { 37 | this->errorLimit_ = errorLimit; 38 | return *this; 39 | } 40 | 41 | double errorLimit() const { 42 | return this->errorLimit_; 43 | } 44 | 45 | void test(size_t kc = 1024) const { 46 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 47 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 48 | 49 | mpfr_t mp_a, mp_b, mp_error, mp_acc[mrT][nrT]; 50 | for (size_t m = 0; m < mrT; m++) { 51 | for (size_t n = 0; n < nrT; n++) { 52 | mpfr_init2(mp_acc[m][n], DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 53 | mpfr_set_zero(mp_acc[m][n], 0); 54 | } 55 | } 56 | 57 | double* array_a = (double*) valloc(mrT * kc * sizeof(doubledouble)); 58 | doubledouble* array_b = (doubledouble*) valloc(nrT * kc * sizeof(doubledouble)); 59 | doubledouble* array_c = (doubledouble*) valloc(mrT * nrT * sizeof(doubledouble)); 60 | memset(array_c, 0, mrT * nrT * sizeof(doubledouble)); 61 | 62 | mpfr_init2(mp_a, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 63 | mpfr_init2(mp_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 64 | for (size_t k = 0; k < kc; k++) { 65 | for (size_t m = 0; m < mrT; m++) { 66 | doubledouble a = { rng(), DBL_EPSILON * rng() }; 67 | a.hi = efaddord(a.hi, a.lo, &a.lo); 68 | 69 | array_a[2 * k * mrT + (m / simdWidthT) * (2 * simdWidthT) + m % simdWidthT] = a.hi; 70 | array_a[2 * k * mrT + (m / simdWidthT) * (2 * simdWidthT) + simdWidthT + m % simdWidthT] = a.lo; 71 | } 72 | for (size_t n = 0; n < nrT; n++) { 73 | doubledouble b = { rng(), DBL_EPSILON * rng() }; 74 | b.hi = efaddord(b.hi, b.lo, &b.lo); 75 | 76 | array_b[k * nrT + n] = b; 77 | } 78 | for (size_t m = 0; m < mrT; m++) { 79 | for (size_t n = 0; n < nrT; n++) { 80 | mpfr_set_d(mp_a, array_a[2 * k * mrT + (m / simdWidthT) * (2 * simdWidthT) + m % simdWidthT], MPFR_RNDN); 81 | mpfr_add_d(mp_a, mp_a, array_a[2 * k * mrT + (m / simdWidthT) * (2 * simdWidthT) + simdWidthT + m % simdWidthT], MPFR_RNDN); 82 | 83 | mpfr_set_d(mp_b, array_b[k * nrT + n].hi, MPFR_RNDN); 84 | mpfr_add_d(mp_b, mp_b, array_b[k * nrT + n].lo, MPFR_RNDN); 85 | 86 | mpfr_fma(mp_acc[m][n], mp_a, mp_b, mp_acc[m][n], MPFR_RNDN); 87 | } 88 | } 89 | } 90 | mpfr_clear(mp_a); 91 | mpfr_clear(mp_b); 92 | 93 | FunctionT(kc, array_a, array_b, array_c); 94 | 95 | mpfr_init2(mp_error, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 96 | for (size_t m = 0; m < mrT; m++) { 97 | for (size_t n = 0; n < nrT; n++) { 98 | mpfr_sub_d(mp_error, mp_acc[m][n], array_c[n * mrT + m].hi, MPFR_RNDN); 99 | mpfr_sub_d(mp_error, mp_error, array_c[n * mrT + m].lo, MPFR_RNDN); 100 | mpfr_div(mp_error, mp_error, mp_acc[m][n], MPFR_RNDN); 101 | mpfr_clear(mp_acc[m][n]); 102 | 103 | const double error = mpfr_get_d(mp_error, MPFR_RNDN); 104 | EXPECT_LT(error, errorLimit()) << 105 | "C[" << m << "][" << n << "] error is " << error; 106 | } 107 | } 108 | mpfr_clear(mp_error); 109 | 110 | free(array_a); 111 | free(array_b); 112 | free(array_c); 113 | } 114 | 115 | private: 116 | double errorLimit_; 117 | }; 118 | -------------------------------------------------------------------------------- /test/dot-tester.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | 23 | 24 | class DotTester { 25 | public: 26 | DotTester() : 27 | arrayElements_(1027), 28 | a(nullptr), 29 | b(nullptr) 30 | { 31 | mpfr_init2(mp_tmp, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 32 | mpfr_init2(mp_sum, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 33 | 34 | this->resize(); 35 | } 36 | 37 | DotTester(const DotTester&) = delete; 38 | 39 | DotTester& operator=(const DotTester&) = delete; 40 | 41 | ~DotTester() { 42 | mpfr_clear(this->mp_tmp); 43 | mpfr_clear(this->mp_sum); 44 | free(this->a); 45 | free(this->b); 46 | } 47 | 48 | DotTester& arrayElements(size_t arrayElements) { 49 | this->arrayElements_ = arrayElements; 50 | this->resize(); 51 | return *this; 52 | } 53 | 54 | size_t arrayElements() const { 55 | return this->arrayElements_; 56 | } 57 | 58 | void testDotProduct(dot_product_function dotProduct, double errorLimit = 5.0 * DBL_EPSILON) { 59 | this->regenerateArrays(); 60 | this->recomputeReference(); 61 | 62 | double sum = dotProduct(arrayElements(), this->a, this->b); 63 | mpfr_sub_d(mp_tmp, mp_sum, sum, MPFR_RNDN); 64 | mpfr_div(mp_tmp, mp_tmp, mp_sum, MPFR_RNDN); 65 | 66 | const double relativeError = fabs(mpfr_get_d(mp_tmp, MPFR_RNDN)); 67 | ASSERT_LT(relativeError, errorLimit); 68 | } 69 | 70 | void testCompensatedDotProduct( 71 | compensated_dot_product_function compensatedDotProduct, 72 | double errorLimit = 10.0 * DBL_EPSILON * DBL_EPSILON) 73 | { 74 | this->regenerateArrays(); 75 | this->recomputeReference(); 76 | 77 | doubledouble sum = compensatedDotProduct(arrayElements(), this->a, this->b); 78 | mpfr_sub_d(mp_tmp, mp_sum, sum.hi, MPFR_RNDN); 79 | mpfr_sub_d(mp_tmp, mp_tmp, sum.lo, MPFR_RNDN); 80 | mpfr_div(mp_tmp, mp_tmp, mp_sum, MPFR_RNDN); 81 | 82 | const double relativeError = fabs(mpfr_get_d(mp_tmp, MPFR_RNDN)); 83 | ASSERT_LT(relativeError, errorLimit); 84 | } 85 | 86 | private: 87 | /** 88 | * @brief Rellocates @b a and @b b arrays according to arrayElements() value. 89 | */ 90 | void resize() { 91 | free(this->a); 92 | free(this->b); 93 | this->a = static_cast(valloc(arrayElements() * sizeof(double))); 94 | this->b = static_cast(valloc(arrayElements() * sizeof(double))); 95 | } 96 | 97 | /** 98 | * @brief (Re-)initializes @b a and @b b arrays with random numbers 99 | */ 100 | void regenerateArrays() { 101 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 102 | auto rng = std::bind(std::uniform_real_distribution(-1.0, 1.0), std::mt19937(seed)); 103 | std::generate(this->a, this->a + arrayElements(), rng); 104 | std::generate(this->b, this->b + arrayElements(), rng); 105 | } 106 | 107 | /** 108 | * @brief Recomputes the high-precision value of dot product in @b mp_sum. 109 | */ 110 | void recomputeReference() { 111 | mpfr_set_zero(mp_sum, 0); 112 | for (size_t i = 0; i < arrayElements(); i++) { 113 | mpfr_set_d(mp_tmp, this->a[i], MPFR_RNDN); 114 | mpfr_mul_d(mp_tmp, mp_tmp, this->b[i], MPFR_RNDN); 115 | mpfr_add(mp_sum, mp_sum, mp_tmp, MPFR_RNDN); 116 | } 117 | } 118 | 119 | size_t arrayElements_; 120 | mutable double* a; 121 | mutable double* b; 122 | mutable mpfr_t mp_tmp; 123 | mutable mpfr_t mp_sum; 124 | }; 125 | -------------------------------------------------------------------------------- /test/double-double.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | 17 | #include 18 | 19 | /* Check that the high double is the sum of addends rounded to closest double-precision number */ 20 | TEST(ddaddl, high_double) { 21 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 22 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 23 | for (size_t iteration = 0; iteration < 1000; iteration++) { 24 | const double a = rng(); 25 | const double b = rng(); 26 | const doubledouble sum = ddaddl(a, b); 27 | EXPECT_EQ(sum.hi, a + b) << "a = " << a << " b = " << b; 28 | } 29 | } 30 | 31 | /* Check that the result is at least as accuate as double-precision addition when addends have the same sign */ 32 | TEST(ddaddl, same_sign_accuracy) { 33 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 34 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 35 | mpfr_t mp_sum_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 36 | mpfr_init2(mp_sum_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 37 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 38 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 39 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 40 | for (size_t iteration = 0; iteration < 1000; iteration++) { 41 | const double a = rng(); 42 | const double b = rng(); 43 | const doubledouble sum = ddaddl(a, b); 44 | 45 | mpfr_set_d(mp_sum_a_b, a, MPFR_RNDN); 46 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b, MPFR_RNDN); 47 | 48 | mpfr_set_d(mp_sum_hi_lo, sum.hi, MPFR_RNDN); 49 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, sum.lo, MPFR_RNDN); 50 | 51 | mpfr_sub(mp_error_doubledouble, mp_sum_a_b, mp_sum_hi_lo, MPFR_RNDN); 52 | mpfr_sub_d(mp_error_double, mp_sum_a_b, a + b, MPFR_RNDN); 53 | 54 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << "a = " << a << " b = " << b; 55 | } 56 | mpfr_clear(mp_sum_a_b); 57 | mpfr_clear(mp_sum_hi_lo); 58 | mpfr_clear(mp_error_doubledouble); 59 | mpfr_clear(mp_error_double); 60 | } 61 | 62 | /* Check that the result is at least as accuate as double-precision addition when addends have opposite signs */ 63 | TEST(ddaddl, opposite_sign_accuracy) { 64 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 65 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 66 | mpfr_t mp_sum_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 67 | mpfr_init2(mp_sum_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 68 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 69 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 70 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 71 | for (size_t iteration = 0; iteration < 1000; iteration++) { 72 | const double a = rng(); 73 | const double b = -rng(); 74 | const doubledouble sum = ddaddl(a, b); 75 | 76 | mpfr_set_d(mp_sum_a_b, a, MPFR_RNDN); 77 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b, MPFR_RNDN); 78 | 79 | mpfr_set_d(mp_sum_hi_lo, sum.hi, MPFR_RNDN); 80 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, sum.lo, MPFR_RNDN); 81 | 82 | mpfr_sub(mp_error_doubledouble, mp_sum_a_b, mp_sum_hi_lo, MPFR_RNDN); 83 | mpfr_sub_d(mp_error_double, mp_sum_a_b, a + b, MPFR_RNDN); 84 | 85 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << "a = " << a << " b = " << b; 86 | } 87 | mpfr_clear(mp_sum_a_b); 88 | mpfr_clear(mp_sum_hi_lo); 89 | mpfr_clear(mp_error_doubledouble); 90 | mpfr_clear(mp_error_double); 91 | } 92 | 93 | /* Check that the result is at least as accuate as double-precision addition when addends have the same sign */ 94 | TEST(ddaddw, same_sign_accuracy) { 95 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 96 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 97 | mpfr_t mp_sum_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 98 | mpfr_init2(mp_sum_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 99 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 100 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 101 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 102 | for (size_t iteration = 0; iteration < 1000; iteration++) { 103 | /* Generate random normalized double-double number */ 104 | doubledouble a = { rng(), rng() * DBL_EPSILON }; 105 | a.hi = efaddord(a.hi, a.lo, &a.lo); 106 | 107 | const double b = rng(); 108 | const doubledouble sum = ddaddw(a, b); 109 | 110 | mpfr_set_d(mp_sum_a_b, a.hi, MPFR_RNDN); 111 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, a.lo, MPFR_RNDN); 112 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b, MPFR_RNDN); 113 | 114 | mpfr_set_d(mp_sum_hi_lo, sum.hi, MPFR_RNDN); 115 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, sum.lo, MPFR_RNDN); 116 | 117 | mpfr_sub(mp_error_doubledouble, mp_sum_a_b, mp_sum_hi_lo, MPFR_RNDN); 118 | mpfr_sub_d(mp_error_double, mp_sum_a_b, (a.hi + b) + a.lo, MPFR_RNDN); 119 | 120 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << "a = " << a.hi << " + " << a.lo << " b = " << b; 121 | } 122 | mpfr_clear(mp_sum_a_b); 123 | mpfr_clear(mp_sum_hi_lo); 124 | mpfr_clear(mp_error_doubledouble); 125 | mpfr_clear(mp_error_double); 126 | } 127 | 128 | /* Check that the result is at least as accuate as double-precision addition when addends have opposite signs */ 129 | TEST(ddaddw, opposite_sign_accuracy) { 130 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 131 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 132 | mpfr_t mp_sum_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 133 | mpfr_init2(mp_sum_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 134 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 135 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 136 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 137 | for (size_t iteration = 0; iteration < 1000; iteration++) { 138 | /* Generate random normalized double-double number */ 139 | doubledouble a = { rng(), rng() * DBL_EPSILON }; 140 | a.hi = efaddord(a.hi, a.lo, &a.lo); 141 | 142 | const double b = -rng(); 143 | const doubledouble sum = ddaddw(a, b); 144 | 145 | mpfr_set_d(mp_sum_a_b, a.hi, MPFR_RNDN); 146 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, a.lo, MPFR_RNDN); 147 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b, MPFR_RNDN); 148 | 149 | mpfr_set_d(mp_sum_hi_lo, sum.hi, MPFR_RNDN); 150 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, sum.lo, MPFR_RNDN); 151 | 152 | mpfr_sub(mp_error_doubledouble, mp_sum_a_b, mp_sum_hi_lo, MPFR_RNDN); 153 | mpfr_sub_d(mp_error_double, mp_sum_a_b, (a.hi + b) + a.lo, MPFR_RNDN); 154 | 155 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << "a = " << a.hi << " + " << a.lo << " b = " << b; 156 | } 157 | mpfr_clear(mp_sum_a_b); 158 | mpfr_clear(mp_sum_hi_lo); 159 | mpfr_clear(mp_error_doubledouble); 160 | mpfr_clear(mp_error_double); 161 | } 162 | 163 | /* Check that the result is at least as accuate as double-precision addition when addends have the same sign */ 164 | TEST(ddadd, same_sign_accuracy) { 165 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 166 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 167 | mpfr_t mp_sum_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 168 | mpfr_init2(mp_sum_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 169 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 170 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 171 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 172 | for (size_t iteration = 0; iteration < 1000; iteration++) { 173 | /* Generate random normalized double-double numbers */ 174 | doubledouble a = { rng(), rng() * DBL_EPSILON }; 175 | doubledouble b = { rng(), rng() * DBL_EPSILON }; 176 | a.hi = efaddord(a.hi, a.lo, &a.lo); 177 | b.hi = efaddord(b.hi, b.lo, &b.lo); 178 | 179 | const doubledouble sum = ddadd(a, b); 180 | 181 | mpfr_set_d(mp_sum_a_b, a.hi, MPFR_RNDN); 182 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, a.lo, MPFR_RNDN); 183 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b.hi, MPFR_RNDN); 184 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b.lo, MPFR_RNDN); 185 | 186 | mpfr_set_d(mp_sum_hi_lo, sum.hi, MPFR_RNDN); 187 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, sum.lo, MPFR_RNDN); 188 | 189 | mpfr_sub(mp_error_doubledouble, mp_sum_a_b, mp_sum_hi_lo, MPFR_RNDN); 190 | mpfr_sub_d(mp_error_double, mp_sum_a_b, (a.hi + b.hi) + (a.lo + b.lo), MPFR_RNDN); 191 | 192 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << 193 | "a = " << a.hi << " + " << a.lo << " b = " << b.hi << " + " << b.lo; 194 | } 195 | mpfr_clear(mp_sum_a_b); 196 | mpfr_clear(mp_sum_hi_lo); 197 | mpfr_clear(mp_error_doubledouble); 198 | mpfr_clear(mp_error_double); 199 | } 200 | 201 | /* Check that the result is at least as accuate as double-precision addition when addends have opposite signs */ 202 | TEST(ddadd, opposite_sign_accuracy) { 203 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 204 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 205 | mpfr_t mp_sum_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 206 | mpfr_init2(mp_sum_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 207 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 208 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 209 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 210 | for (size_t iteration = 0; iteration < 1000; iteration++) { 211 | /* Generate random normalized double-double numbers */ 212 | doubledouble a = { +rng(), +rng() * DBL_EPSILON }; 213 | doubledouble b = { -rng(), -rng() * DBL_EPSILON }; 214 | a.hi = efaddord(a.hi, a.lo, &a.lo); 215 | b.hi = efaddord(b.hi, b.lo, &b.lo); 216 | 217 | const doubledouble sum = ddadd(a, b); 218 | 219 | mpfr_set_d(mp_sum_a_b, a.hi, MPFR_RNDN); 220 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, a.lo, MPFR_RNDN); 221 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b.hi, MPFR_RNDN); 222 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b.lo, MPFR_RNDN); 223 | 224 | mpfr_set_d(mp_sum_hi_lo, sum.hi, MPFR_RNDN); 225 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, sum.lo, MPFR_RNDN); 226 | 227 | mpfr_sub(mp_error_doubledouble, mp_sum_a_b, mp_sum_hi_lo, MPFR_RNDN); 228 | mpfr_sub_d(mp_error_double, mp_sum_a_b, (a.hi + b.hi) + (a.lo + b.lo), MPFR_RNDN); 229 | 230 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << 231 | "a = " << a.hi << " + " << a.lo << " b = " << b.hi << " + " << b.lo; 232 | } 233 | mpfr_clear(mp_sum_a_b); 234 | mpfr_clear(mp_sum_hi_lo); 235 | mpfr_clear(mp_error_doubledouble); 236 | mpfr_clear(mp_error_double); 237 | } 238 | 239 | /* Check that the result is at least as accuate as double-precision addition when addends have the same sign */ 240 | TEST(ddadd_fast, same_sign_accuracy) { 241 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 242 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 243 | mpfr_t mp_sum_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 244 | mpfr_init2(mp_sum_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 245 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 246 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 247 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 248 | for (size_t iteration = 0; iteration < 1000; iteration++) { 249 | /* Generate random normalized double-double numbers */ 250 | doubledouble a = { rng(), rng() * DBL_EPSILON }; 251 | doubledouble b = { rng(), rng() * DBL_EPSILON }; 252 | a.hi = efaddord(a.hi, a.lo, &a.lo); 253 | b.hi = efaddord(b.hi, b.lo, &b.lo); 254 | 255 | const doubledouble sum = ddadd_fast(a, b); 256 | 257 | mpfr_set_d(mp_sum_a_b, a.hi, MPFR_RNDN); 258 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, a.lo, MPFR_RNDN); 259 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b.hi, MPFR_RNDN); 260 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b.lo, MPFR_RNDN); 261 | 262 | mpfr_set_d(mp_sum_hi_lo, sum.hi, MPFR_RNDN); 263 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, sum.lo, MPFR_RNDN); 264 | 265 | mpfr_sub(mp_error_doubledouble, mp_sum_a_b, mp_sum_hi_lo, MPFR_RNDN); 266 | mpfr_sub_d(mp_error_double, mp_sum_a_b, (a.hi + b.hi) + (a.lo + b.lo), MPFR_RNDN); 267 | 268 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << 269 | "a = " << a.hi << " + " << a.lo << " b = " << b.hi << " + " << b.lo; 270 | } 271 | mpfr_clear(mp_sum_a_b); 272 | mpfr_clear(mp_sum_hi_lo); 273 | mpfr_clear(mp_error_doubledouble); 274 | mpfr_clear(mp_error_double); 275 | } 276 | 277 | /* Check that the result is at least as accuate as double-precision addition when addends have opposite signs */ 278 | TEST(ddadd_fast, opposite_sign_accuracy) { 279 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 280 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 281 | mpfr_t mp_sum_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 282 | mpfr_init2(mp_sum_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 283 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 284 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 285 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 286 | for (size_t iteration = 0; iteration < 1000; iteration++) { 287 | /* Generate random normalized double-double numbers */ 288 | doubledouble a = { +rng(), +rng() * DBL_EPSILON }; 289 | doubledouble b = { -rng(), -rng() * DBL_EPSILON }; 290 | a.hi = efaddord(a.hi, a.lo, &a.lo); 291 | b.hi = efaddord(b.hi, b.lo, &b.lo); 292 | 293 | const doubledouble sum = ddadd_fast(a, b); 294 | 295 | mpfr_set_d(mp_sum_a_b, a.hi, MPFR_RNDN); 296 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, a.lo, MPFR_RNDN); 297 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b.hi, MPFR_RNDN); 298 | mpfr_add_d(mp_sum_a_b, mp_sum_a_b, b.lo, MPFR_RNDN); 299 | 300 | mpfr_set_d(mp_sum_hi_lo, sum.hi, MPFR_RNDN); 301 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, sum.lo, MPFR_RNDN); 302 | 303 | mpfr_sub(mp_error_doubledouble, mp_sum_a_b, mp_sum_hi_lo, MPFR_RNDN); 304 | mpfr_sub_d(mp_error_double, mp_sum_a_b, (a.hi + b.hi) + (a.lo + b.lo), MPFR_RNDN); 305 | 306 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << 307 | "a = " << a.hi << " + " << a.lo << " b = " << b.hi << " + " << b.lo; 308 | } 309 | mpfr_clear(mp_sum_a_b); 310 | mpfr_clear(mp_sum_hi_lo); 311 | mpfr_clear(mp_error_doubledouble); 312 | mpfr_clear(mp_error_double); 313 | } 314 | 315 | /* Check that the high double is the product of factors rounded to closest double-precision number */ 316 | TEST(ddmull, high_double) { 317 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 318 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 319 | for (size_t iteration = 0; iteration < 1000; iteration++) { 320 | const double a = rng(); 321 | const double b = rng(); 322 | const doubledouble prod = ddmull(a, b); 323 | EXPECT_EQ(prod.hi, a * b) << "a = " << a << " b = " << b; 324 | } 325 | } 326 | 327 | /* Check that the result is at least as accuate as double-precision multiplication */ 328 | TEST(ddmull, accuracy) { 329 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 330 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 331 | mpfr_t mp_prod_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 332 | mpfr_init2(mp_prod_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 333 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 334 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 335 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 336 | for (size_t iteration = 0; iteration < 1000; iteration++) { 337 | const double a = rng(); 338 | const double b = rng(); 339 | const doubledouble sum = ddmull(a, b); 340 | 341 | mpfr_set_d(mp_prod_a_b, a, MPFR_RNDN); 342 | mpfr_mul_d(mp_prod_a_b, mp_prod_a_b, b, MPFR_RNDN); 343 | 344 | mpfr_set_d(mp_sum_hi_lo, sum.hi, MPFR_RNDN); 345 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, sum.lo, MPFR_RNDN); 346 | 347 | mpfr_sub(mp_error_doubledouble, mp_prod_a_b, mp_sum_hi_lo, MPFR_RNDN); 348 | mpfr_sub_d(mp_error_double, mp_prod_a_b, a * b, MPFR_RNDN); 349 | 350 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << "a = " << a << " b = " << b; 351 | } 352 | mpfr_clear(mp_prod_a_b); 353 | mpfr_clear(mp_sum_hi_lo); 354 | mpfr_clear(mp_error_doubledouble); 355 | mpfr_clear(mp_error_double); 356 | } 357 | 358 | /* Check that the result is at least as accuate as double-precision multiplication */ 359 | TEST(ddmul, accuracy) { 360 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 361 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 362 | mpfr_t mp_a, mp_b, mp_prod_a_b, mp_sum_hi_lo, mp_error_doubledouble, mp_error_double; 363 | mpfr_init2(mp_a, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 364 | mpfr_init2(mp_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 365 | mpfr_init2(mp_prod_a_b, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 366 | mpfr_init2(mp_sum_hi_lo, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 367 | mpfr_init2(mp_error_doubledouble, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 368 | mpfr_init2(mp_error_double, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 369 | for (size_t iteration = 0; iteration < 1000; iteration++) { 370 | /* Generate random normalized double-double numbers */ 371 | doubledouble a = { rng(), rng() * DBL_EPSILON }; 372 | doubledouble b = { rng(), rng() * DBL_EPSILON }; 373 | a.hi = efaddord(a.hi, a.lo, &a.lo); 374 | b.hi = efaddord(b.hi, b.lo, &b.lo); 375 | 376 | const doubledouble prod = ddmul(a, b); 377 | 378 | mpfr_set_d(mp_a, a.hi, MPFR_RNDN); 379 | mpfr_add_d(mp_a, mp_a, a.lo, MPFR_RNDN); 380 | 381 | mpfr_set_d(mp_b, b.hi, MPFR_RNDN); 382 | mpfr_add_d(mp_b, mp_b, b.lo, MPFR_RNDN); 383 | 384 | mpfr_mul(mp_prod_a_b, mp_a, mp_b, MPFR_RNDN); 385 | 386 | mpfr_set_d(mp_sum_hi_lo, prod.hi, MPFR_RNDN); 387 | mpfr_add_d(mp_sum_hi_lo, mp_sum_hi_lo, prod.lo, MPFR_RNDN); 388 | 389 | mpfr_sub(mp_error_doubledouble, mp_prod_a_b, mp_sum_hi_lo, MPFR_RNDN); 390 | mpfr_sub_d(mp_error_double, mp_prod_a_b, a.hi * b.hi + ((a.lo * b.hi + a.hi * b.lo) + a.lo * b.lo), MPFR_RNDN); 391 | 392 | EXPECT_LE(mpfr_cmpabs(mp_error_doubledouble, mp_error_double), 0) << 393 | "a = " << a.hi << " + " << a.lo << " b = " << b.hi << " + " << b.lo; 394 | } 395 | mpfr_clear(mp_a); 396 | mpfr_clear(mp_b); 397 | mpfr_clear(mp_prod_a_b); 398 | mpfr_clear(mp_sum_hi_lo); 399 | mpfr_clear(mp_error_doubledouble); 400 | mpfr_clear(mp_error_double); 401 | } 402 | 403 | int main(int ac, char* av[]) { 404 | testing::InitGoogleTest(&ac, av); 405 | return RUN_ALL_TESTS(); 406 | } 407 | -------------------------------------------------------------------------------- /test/error-free-transform.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | 17 | #include 18 | 19 | inline double ulp(double x) { 20 | x = fabs(x); 21 | return nextafter(x, std::numeric_limits::infinity()) - x; 22 | } 23 | 24 | /* Check that the high double is the sum of addends rounded to closest double-precision number */ 25 | TEST(efadd, high_double) { 26 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 27 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 28 | for (size_t iteration = 0; iteration < 1000; iteration++) { 29 | const double a = rng(); 30 | const double b = rng(); 31 | double e; 32 | const double sum = efadd(a, b, &e); 33 | EXPECT_EQ(sum, a + b) << "a = " << a << " b = " << b; 34 | } 35 | } 36 | 37 | /* Check that the low double is not greater than half ULP of the high double */ 38 | TEST(efadd, low_double) { 39 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 40 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 41 | for (size_t iteration = 0; iteration < 1000; iteration++) { 42 | const double a = rng(); 43 | const double b = rng(); 44 | double e; 45 | const double sum = efadd(a, b, &e); 46 | EXPECT_LE(fabs(e), 0.5 * ulp(sum)) << "a = " << a << " b = " << b; 47 | } 48 | } 49 | 50 | /* Check that the sum of outputs equals the sum of inputs when inputs have the same sign */ 51 | TEST(efadd, same_sign_error_free) { 52 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 53 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 54 | 55 | mpfr_t sum_ab, sum_se; 56 | mpfr_init2(sum_ab, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 57 | mpfr_init2(sum_se, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 58 | for (size_t iteration = 0; iteration < 1000; iteration++) { 59 | const double a = rng(); 60 | const double b = rng(); 61 | double e; 62 | const double s = efadd(a, b, &e); 63 | 64 | mpfr_set_d(sum_ab, a, MPFR_RNDN); 65 | mpfr_add_d(sum_ab, sum_ab, b, MPFR_RNDN); 66 | 67 | mpfr_set_d(sum_se, s, MPFR_RNDN); 68 | mpfr_add_d(sum_se, sum_se, e, MPFR_RNDN); 69 | 70 | EXPECT_TRUE(mpfr_equal_p(sum_ab, sum_se)) << "a = " << a << " b = " << b; 71 | } 72 | mpfr_clear(sum_ab); 73 | mpfr_clear(sum_se); 74 | } 75 | 76 | /* Check that the sum of outputs equals the sum of inputs when inputs have opposite signs */ 77 | TEST(efadd, opposite_sign_error_free) { 78 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 79 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 80 | 81 | mpfr_t sum_ab, sum_se; 82 | mpfr_init2(sum_ab, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 83 | mpfr_init2(sum_se, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 84 | for (size_t iteration = 0; iteration < 1000; iteration++) { 85 | const double a = rng(); 86 | const double b = -rng(); 87 | double e; 88 | const double s = efadd(a, b, &e); 89 | 90 | mpfr_set_d(sum_ab, a, MPFR_RNDN); 91 | mpfr_add_d(sum_ab, sum_ab, b, MPFR_RNDN); 92 | 93 | mpfr_set_d(sum_se, s, MPFR_RNDN); 94 | mpfr_add_d(sum_se, sum_se, e, MPFR_RNDN); 95 | 96 | EXPECT_TRUE(mpfr_equal_p(sum_ab, sum_se)) << "a = " << a << " b = " << b; 97 | } 98 | mpfr_clear(sum_ab); 99 | mpfr_clear(sum_se); 100 | } 101 | 102 | /* Check that the high double is the sum of addends rounded to closest double-precision number */ 103 | TEST(efaddord, high_double) { 104 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 105 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 106 | for (size_t iteration = 0; iteration < 1000; iteration++) { 107 | const double x = rng(); 108 | const double y = rng(); 109 | const double a = fmax(x, y); 110 | const double b = fmin(x, y); 111 | double e; 112 | const double sum = efaddord(a, b, &e); 113 | EXPECT_EQ(sum, a + b) << "a = " << a << " b = " << b; 114 | } 115 | } 116 | 117 | /* Check that the low double is not greater than half ULP of the high double */ 118 | TEST(efaddord, low_double) { 119 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 120 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 121 | for (size_t iteration = 0; iteration < 1000; iteration++) { 122 | const double x = rng(); 123 | const double y = rng(); 124 | const double a = fmax(x, y); 125 | const double b = fmin(x, y); 126 | double e; 127 | const double sum = efaddord(a, b, &e); 128 | EXPECT_LE(fabs(e), 0.5 * ulp(sum)) << "a = " << a << " b = " << b; 129 | } 130 | } 131 | 132 | /* Check that the sum of outputs equals the sum of inputs when inputs have the same sign */ 133 | TEST(efaddord, same_sign_error_free) { 134 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 135 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 136 | 137 | mpfr_t sum_ab, sum_se; 138 | mpfr_init2(sum_ab, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 139 | mpfr_init2(sum_se, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 140 | for (size_t iteration = 0; iteration < 1000; iteration++) { 141 | const double x = rng(); 142 | const double y = rng(); 143 | const double a = fmax(x, y); 144 | const double b = fmin(x, y); 145 | double e; 146 | const double s = efaddord(a, b, &e); 147 | 148 | mpfr_set_d(sum_ab, a, MPFR_RNDN); 149 | mpfr_add_d(sum_ab, sum_ab, b, MPFR_RNDN); 150 | 151 | mpfr_set_d(sum_se, s, MPFR_RNDN); 152 | mpfr_add_d(sum_se, sum_se, e, MPFR_RNDN); 153 | 154 | EXPECT_TRUE(mpfr_equal_p(sum_ab, sum_se)) << "a = " << a << " b = " << b; 155 | } 156 | mpfr_clear(sum_ab); 157 | mpfr_clear(sum_se); 158 | } 159 | 160 | /* Check that the sum of outputs equals the sum of inputs when inputs have opposite signs */ 161 | TEST(efaddord, opposite_sign_error_free) { 162 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 163 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 164 | 165 | mpfr_t sum_ab, sum_se; 166 | mpfr_init2(sum_ab, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 167 | mpfr_init2(sum_se, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 168 | for (size_t iteration = 0; iteration < 1000; iteration++) { 169 | const double x = rng(); 170 | const double y = rng(); 171 | const double a = fmax(x, y); 172 | const double b = -fmin(x, y); 173 | double e; 174 | const double s = efaddord(a, b, &e); 175 | 176 | mpfr_set_d(sum_ab, a, MPFR_RNDN); 177 | mpfr_add_d(sum_ab, sum_ab, b, MPFR_RNDN); 178 | 179 | mpfr_set_d(sum_se, s, MPFR_RNDN); 180 | mpfr_add_d(sum_se, sum_se, e, MPFR_RNDN); 181 | 182 | EXPECT_TRUE(mpfr_equal_p(sum_ab, sum_se)) << "a = " << a << " b = " << b; 183 | } 184 | mpfr_clear(sum_ab); 185 | mpfr_clear(sum_se); 186 | } 187 | 188 | /* Check that the high double is the product of factors rounded to closest double-precision number */ 189 | TEST(efmul, high_double) { 190 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 191 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 192 | for (size_t iteration = 0; iteration < 1000; iteration++) { 193 | const double a = rng(); 194 | const double b = rng(); 195 | double e; 196 | const double product = efmul(a, b, &e); 197 | EXPECT_EQ(product, a * b) << "a = " << a << " b = " << b; 198 | } 199 | } 200 | 201 | /* Check that the low double is not greater than half ULP of the high double */ 202 | TEST(efmul, low_double) { 203 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 204 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 205 | for (size_t iteration = 0; iteration < 1000; iteration++) { 206 | const double a = rng(); 207 | const double b = rng(); 208 | double e; 209 | const double prod = efmul(a, b, &e); 210 | EXPECT_LE(fabs(e), 0.5 * ulp(prod)) << "a = " << a << " b = " << b; 211 | } 212 | } 213 | 214 | /* Check that the sum of outputs equals the sum of inputs */ 215 | TEST(efmul, error_free) { 216 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 217 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 218 | 219 | mpfr_t prod_ab, sum_se; 220 | mpfr_init2(prod_ab, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 221 | mpfr_init2(sum_se, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 222 | for (size_t iteration = 0; iteration < 1000; iteration++) { 223 | const double a = rng(); 224 | const double b = rng(); 225 | double e; 226 | const double s = efmul(a, b, &e); 227 | 228 | mpfr_set_d(prod_ab, a, MPFR_RNDN); 229 | mpfr_mul_d(prod_ab, prod_ab, b, MPFR_RNDN); 230 | 231 | mpfr_set_d(sum_se, s, MPFR_RNDN); 232 | mpfr_add_d(sum_se, sum_se, e, MPFR_RNDN); 233 | 234 | EXPECT_TRUE(mpfr_equal_p(prod_ab, sum_se)) << "a = " << a << " b = " << b; 235 | } 236 | mpfr_clear(prod_ab); 237 | mpfr_clear(sum_se); 238 | } 239 | 240 | /* Check that the high double is the fma of inputs rounded to closest double-precision number */ 241 | TEST(effma, high_double) { 242 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 243 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 244 | for (size_t iteration = 0; iteration < 1000; iteration++) { 245 | const double a = rng(); 246 | const double b = rng(); 247 | const double c = rng(); 248 | double e_hi, e_lo; 249 | const double acc = effma(a, b, c, &e_hi, &e_lo); 250 | EXPECT_EQ(acc, fma(a, b, c)) << "a = " << a << " b = " << b << " c = " << c; 251 | } 252 | } 253 | 254 | /* Check that the error is not greater than half ULP of the high double */ 255 | TEST(effma, total_error) { 256 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 257 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 258 | for (size_t iteration = 0; iteration < 1000; iteration++) { 259 | const double a = rng(); 260 | const double b = rng(); 261 | const double c = rng(); 262 | double e_hi, e_lo; 263 | const double acc = effma(a, b, c, &e_hi, &e_lo); 264 | EXPECT_LE(fabs(e_lo + e_hi), 0.5 * ulp(acc)) << "a = " << a << " b = " << b << " c = " << c; 265 | } 266 | } 267 | 268 | /* Check that the high error is greater, in magnitude, than low error */ 269 | TEST(effma, high_low_error) { 270 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 271 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 272 | for (size_t iteration = 0; iteration < 1000; iteration++) { 273 | const double a = rng(); 274 | const double b = rng(); 275 | const double c = rng(); 276 | double e_hi, e_lo; 277 | effma(a, b, c, &e_hi, &e_lo); 278 | EXPECT_LE(fabs(e_lo), fabs(e_hi)) << "a = " << a << " b = " << b << " c = " << c; 279 | } 280 | } 281 | 282 | /* Check that the sum of outputs equals the sum of inputs */ 283 | TEST(effma, error_free) { 284 | const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); 285 | auto rng = std::bind(std::uniform_real_distribution(), std::mt19937(seed)); 286 | 287 | mpfr_t acc_abc, sum_acc_error; 288 | mpfr_init2(acc_abc, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 289 | mpfr_init2(sum_acc_error, DBL_MANT_DIG + DBL_MAX_EXP - DBL_MIN_EXP); 290 | for (size_t iteration = 0; iteration < 1000; iteration++) { 291 | const double a = rng(); 292 | const double b = rng(); 293 | const double c = rng(); 294 | double e_hi, e_lo; 295 | const double acc = effma(a, b, c, &e_hi, &e_lo); 296 | 297 | MPFR_DECL_INIT(mp_a, DBL_MANT_DIG); 298 | MPFR_DECL_INIT(mp_b, DBL_MANT_DIG); 299 | MPFR_DECL_INIT(mp_c, DBL_MANT_DIG); 300 | mpfr_set_d(mp_a, a, MPFR_RNDN); 301 | mpfr_set_d(mp_b, b, MPFR_RNDN); 302 | mpfr_set_d(mp_c, c, MPFR_RNDN); 303 | 304 | mpfr_fma(acc_abc, mp_a, mp_b, mp_c, MPFR_RNDN); 305 | 306 | mpfr_set_d(sum_acc_error, acc, MPFR_RNDN); 307 | mpfr_add_d(sum_acc_error, sum_acc_error, e_hi, MPFR_RNDN); 308 | mpfr_add_d(sum_acc_error, sum_acc_error, e_lo, MPFR_RNDN); 309 | 310 | EXPECT_TRUE(mpfr_equal_p(acc_abc, sum_acc_error)) << "a = " << a << " b = " << b << " c = " << c; 311 | } 312 | mpfr_clear(acc_abc); 313 | mpfr_clear(sum_acc_error); 314 | } 315 | 316 | int main(int ac, char* av[]) { 317 | testing::InitGoogleTest(&ac, av); 318 | return RUN_ALL_TESTS(); 319 | } 320 | --------------------------------------------------------------------------------