├── .gitignore
├── AUTHORS.md
├── CMakeLists.txt
├── LICENSE
├── README.md
├── examples
    ├── CMakeLists.txt
    ├── DemoNewton.cpp
    ├── common_type.cpp
    ├── example.cpp
    └── example2.cpp
├── floatx.png
├── src
    └── floatx.hpp
├── test
    ├── CMakeLists.txt
    ├── IEEEHelper.cpp
    ├── IEEEHelper.h
    ├── NanInf.cpp
    ├── arithmetic.cpp
    ├── assignment.cpp
    ├── conversion.cpp
    ├── rel_ops.cpp
    ├── round_nearest.cpp
    ├── std_integration.cpp
    ├── stream.cpp
    ├── value_representation.cpp
    ├── value_representation_bits.cpp
    └── value_representation_half.cpp
├── testx
    ├── CMakeLists.txt
    ├── add_000.cpp
    ├── div_000.cpp
    ├── mul_000.cpp
    └── sub_000.cpp
└── third_party
    ├── CMakeLists.txt
    ├── DownloadCMakeLists.txt.in
    ├── git-cmake-format
        └── CMakeLists.txt
    ├── gtest
        └── CMakeLists.txt
    └── package_loader.cmake


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
1 | Goran Flegar, Universitat Jaume I, <flegar@uji.es>  
2 | Florian Scheidegger, IBM Research GmbH, <eid@zurich.ibm.com>  
3 | Vedran Novakovic, Universitat Jaume I, <novakoni@uji.es>  
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.1)
 2 | project(FloatX)
 3 | 
 4 | option(BUILD_TESTS "Generate build files for unit tests" ON)
 5 | option(BUILD_EXHAUSTIVE_TESTS "Generate build files for exhaustive tests" OFF)
 6 | option(DEVEL_TOOLS "Include development tools in build system" ON)
 7 | option(BUILD_EXAMPLES "Build examples in the example/ directory" ON)
 8 | 
 9 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
10 | set(CMAKE_CXX_STANDARD 11)
11 | 
12 | add_subdirectory(third_party)  # third party tools and libraries
13 | 
14 | add_library(floatx INTERFACE)
15 | target_include_directories(floatx INTERFACE src/)
16 | 
17 | if(BUILD_TESTS OR BUILD_EXHAUSTIVE_TESTS)
18 |     enable_testing()
19 | endif()
20 | 
21 | if(BUILD_TESTS)
22 |     add_subdirectory(test)
23 | endif()
24 | 
25 | if(BUILD_EXHAUSTIVE_TESTS)
26 |     add_subdirectory(testx)
27 | endif()
28 | 
29 | if(BUILD_EXAMPLES)
30 |     add_subdirectory(examples)
31 | endif()
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Copyright 2018 - The OPRECOMP Project Consortium, IBM Research GmbH, University Jaume I. All rights reserved.
  2 | 
  3 |                                  Apache License
  4 |                            Version 2.0, January 2004
  5 |                         http://www.apache.org/licenses/
  6 | 
  7 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  8 | 
  9 |    1. Definitions.
 10 | 
 11 |       "License" shall mean the terms and conditions for use, reproduction,
 12 |       and distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 |       "Licensor" shall mean the copyright owner or entity authorized by
 15 |       the copyright owner that is granting the License.
 16 | 
 17 |       "Legal Entity" shall mean the union of the acting entity and all
 18 |       other entities that control, are controlled by, or are under common
 19 |       control with that entity. For the purposes of this definition,
 20 |       "control" means (i) the power, direct or indirect, to cause the
 21 |       direction or management of such entity, whether by contract or
 22 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 23 |       outstanding shares, or (iii) beneficial ownership of such entity.
 24 | 
 25 |       "You" (or "Your") shall mean an individual or Legal Entity
 26 |       exercising permissions granted by this License.
 27 | 
 28 |       "Source" form shall mean the preferred form for making modifications,
 29 |       including but not limited to software source code, documentation
 30 |       source, and configuration files.
 31 | 
 32 |       "Object" form shall mean any form resulting from mechanical
 33 |       transformation or translation of a Source form, including but
 34 |       not limited to compiled object code, generated documentation,
 35 |       and conversions to other media types.
 36 | 
 37 |       "Work" shall mean the work of authorship, whether in Source or
 38 |       Object form, made available under the License, as indicated by a
 39 |       copyright notice that is included in or attached to the work
 40 |       (an example is provided in the Appendix below).
 41 | 
 42 |       "Derivative Works" shall mean any work, whether in Source or Object
 43 |       form, that is based on (or derived from) the Work and for which the
 44 |       editorial revisions, annotations, elaborations, or other modifications
 45 |       represent, as a whole, an original work of authorship. For the purposes
 46 |       of this License, Derivative Works shall not include works that remain
 47 |       separable from, or merely link (or bind by name) to the interfaces of,
 48 |       the Work and Derivative Works thereof.
 49 | 
 50 |       "Contribution" shall mean any work of authorship, including
 51 |       the original version of the Work and any modifications or additions
 52 |       to that Work or Derivative Works thereof, that is intentionally
 53 |       submitted to Licensor for inclusion in the Work by the copyright owner
 54 |       or by an individual or Legal Entity authorized to submit on behalf of
 55 |       the copyright owner. For the purposes of this definition, "submitted"
 56 |       means any form of electronic, verbal, or written communication sent
 57 |       to the Licensor or its representatives, including but not limited to
 58 |       communication on electronic mailing lists, source code control systems,
 59 |       and issue tracking systems that are managed by, or on behalf of, the
 60 |       Licensor for the purpose of discussing and improving the Work, but
 61 |       excluding communication that is conspicuously marked or otherwise
 62 |       designated in writing by the copyright owner as "Not a Contribution."
 63 | 
 64 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 65 |       on behalf of whom a Contribution has been received by Licensor and
 66 |       subsequently incorporated within the Work.
 67 | 
 68 |    2. Grant of Copyright License. Subject to the terms and conditions of
 69 |       this License, each Contributor hereby grants to You a perpetual,
 70 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 71 |       copyright license to reproduce, prepare Derivative Works of,
 72 |       publicly display, publicly perform, sublicense, and distribute the
 73 |       Work and such Derivative Works in Source or Object form.
 74 | 
 75 |    3. Grant of Patent License. Subject to the terms and conditions of
 76 |       this License, each Contributor hereby grants to You a perpetual,
 77 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 78 |       (except as stated in this section) patent license to make, have made,
 79 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 80 |       where such license applies only to those patent claims licensable
 81 |       by such Contributor that are necessarily infringed by their
 82 |       Contribution(s) alone or by combination of their Contribution(s)
 83 |       with the Work to which such Contribution(s) was submitted. If You
 84 |       institute patent litigation against any entity (including a
 85 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 86 |       or a Contribution incorporated within the Work constitutes direct
 87 |       or contributory patent infringement, then any patent licenses
 88 |       granted to You under this License for that Work shall terminate
 89 |       as of the date such litigation is filed.
 90 | 
 91 |    4. Redistribution. You may reproduce and distribute copies of the
 92 |       Work or Derivative Works thereof in any medium, with or without
 93 |       modifications, and in Source or Object form, provided that You
 94 |       meet the following conditions:
 95 | 
 96 |       (a) You must give any other recipients of the Work or
 97 |           Derivative Works a copy of this License; and
 98 | 
 99 |       (b) You must cause any modified files to carry prominent notices
100 |           stating that You changed the files; and
101 | 
102 |       (c) You must retain, in the Source form of any Derivative Works
103 |           that You distribute, all copyright, patent, trademark, and
104 |           attribution notices from the Source form of the Work,
105 |           excluding those notices that do not pertain to any part of
106 |           the Derivative Works; and
107 | 
108 |       (d) If the Work includes a "NOTICE" text file as part of its
109 |           distribution, then any Derivative Works that You distribute must
110 |           include a readable copy of the attribution notices contained
111 |           within such NOTICE file, excluding those notices that do not
112 |           pertain to any part of the Derivative Works, in at least one
113 |           of the following places: within a NOTICE text file distributed
114 |           as part of the Derivative Works; within the Source form or
115 |           documentation, if provided along with the Derivative Works; or,
116 |           within a display generated by the Derivative Works, if and
117 |           wherever such third-party notices normally appear. The contents
118 |           of the NOTICE file are for informational purposes only and
119 |           do not modify the License. You may add Your own attribution
120 |           notices within Derivative Works that You distribute, alongside
121 |           or as an addendum to the NOTICE text from the Work, provided
122 |           that such additional attribution notices cannot be construed
123 |           as modifying the License.
124 | 
125 |       You may add Your own copyright statement to Your modifications and
126 |       may provide additional or different license terms and conditions
127 |       for use, reproduction, or distribution of Your modifications, or
128 |       for any such Derivative Works as a whole, provided Your use,
129 |       reproduction, and distribution of the Work otherwise complies with
130 |       the conditions stated in this License.
131 | 
132 |    5. Submission of Contributions. Unless You explicitly state otherwise,
133 |       any Contribution intentionally submitted for inclusion in the Work
134 |       by You to the Licensor shall be under the terms and conditions of
135 |       this License, without any additional terms or conditions.
136 |       Notwithstanding the above, nothing herein shall supersede or modify
137 |       the terms of any separate license agreement you may have executed
138 |       with Licensor regarding such Contributions.
139 | 
140 |    6. Trademarks. This License does not grant permission to use the trade
141 |       names, trademarks, service marks, or product names of the Licensor,
142 |       except as required for reasonable and customary use in describing the
143 |       origin of the Work and reproducing the content of the NOTICE file.
144 | 
145 |    7. Disclaimer of Warranty. Unless required by applicable law or
146 |       agreed to in writing, Licensor provides the Work (and each
147 |       Contributor provides its Contributions) on an "AS IS" BASIS,
148 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 |       implied, including, without limitation, any warranties or conditions
150 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 |       PARTICULAR PURPOSE. You are solely responsible for determining the
152 |       appropriateness of using or redistributing the Work and assume any
153 |       risks associated with Your exercise of permissions under this License.
154 | 
155 |    8. Limitation of Liability. In no event and under no legal theory,
156 |       whether in tort (including negligence), contract, or otherwise,
157 |       unless required by applicable law (such as deliberate and grossly
158 |       negligent acts) or agreed to in writing, shall any Contributor be
159 |       liable to You for damages, including any direct, indirect, special,
160 |       incidental, or consequential damages of any character arising as a
161 |       result of this License or out of the use or inability to use the
162 |       Work (including but not limited to damages for loss of goodwill,
163 |       work stoppage, computer failure or malfunction, or any and all
164 |       other commercial damages or losses), even if such Contributor
165 |       has been advised of the possibility of such damages.
166 | 
167 |    9. Accepting Warranty or Additional Liability. While redistributing
168 |       the Work or Derivative Works thereof, You may choose to offer,
169 |       and charge a fee for, acceptance of support, warranty, indemnity,
170 |       or other liability obligations and/or rights consistent with this
171 |       License. However, in accepting such obligations, You may act only
172 |       on Your own behalf and on Your sole responsibility, not on behalf
173 |       of any other Contributor, and only if You agree to indemnify,
174 |       defend, and hold each Contributor harmless for any liability
175 |       incurred by, or claims asserted against, such Contributor by reason
176 |       of your accepting any such warranty or additional liability.
177 | 
178 |    END OF TERMS AND CONDITIONS
179 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | FloatX (Float eXtended)
  2 | =======================
  3 | 
  4 | FloatX is a header-only C++ library which extends floating point types beyond
  5 | the native single and double (and on some hardware half) precision types. It
  6 | provides template types which allow the user to select the number of bits used
  7 | for the exponent and significand parts of the floating point number.
  8 | The idea of FloatX is based on the FlexFloat library, but, instead of
  9 | implementing the functionality in C and providing C++ wrappers, FloatX is
 10 | written completely in C++, which makes it more natural to the end user.
 11 | In addition, FloatX provides a superset of FlexFloat's functionalities.
 12 | 
 13 | 
 14 | ![](./floatx.png)
 15 | 
 16 | 
 17 | Features
 18 | --------
 19 | 
 20 | This section lists the functionalities provided by FloatX. Functionalities that
 21 | are also provided by FlexFloat have (_flexfloat_) appended to the description.
 22 | 
 23 | *   header-only library, without a compiled component, and heavy inlining,
 24 |     resulting in relatively high performance
 25 | *   `floatx<exp_bits, sig_bits, backend_float>` class template, which allows
 26 |     emulation of non-native types with `exp_bits` exponent bits and `sig_bits`
 27 |     significand bits using a natively supported `backend_float` type to perform
 28 |     arithmetic operations (_flexfloat_ - provides a similar functionality in
 29 |     the C++ wrapper, but the memory consumption of the flexfloat C++ class was
 30 |     suboptimal).
 31 | *   `floatxr<backend_float>` class template, which provides the same
 32 |     functionality as `floatx`, but allows changing the precision of the type
 33 |     at runtime. This class is easier to experiment with, but is not as
 34 |     efficient as `floatx` in both the performance, as well as the memory
 35 |     consumption. (_flexfloat_ - provides a type that has a comparable memory
 36 |     consumption with the precision selectable at runtime in the C library only)
 37 | *   conversions between builtin types and `floatx`
 38 |     (_flexfloat_ - had a bug where NaN can be cast to Inf during conversion)
 39 | *   assignments on `floatx` and `floatxr` types (_flexfloat_)
 40 | *   relational operations on `floatx` and `floatxr` types
 41 |     (_flexfloat_ - did not handle NaN properly)
 42 | *   relational operations between different types
 43 | *   arithmetic operations on `floatx` and `floatxr` types (_flexfloat_)
 44 | *   arithmetic operations between different types with implicit type promotion
 45 | *   `std::ostream& operator <<(std::ostream&, floatx[r])` (_flexfloat_)
 46 | *   `std::istream& operator >>(std::istream&, floatx[r])`
 47 | *   CUDA support
 48 | 
 49 | 
 50 | What FloatX is NOT
 51 | ------------------
 52 | 
 53 | FloatX does not implement arbitrary floating point types. The only supported
 54 | types are "subtypes" of those natively supported by the hardware.
 55 | In case you need implementations of larger types, consider using the SoftFloat
 56 | library.
 57 | 
 58 | FloatX __emulates__ the types of custom precision, subject to the constraints
 59 | above, and, while trying to achieve as high performance as possible, it is
 60 | __not__ capable of magically delivering better performance than natively
 61 | supported types. Thus, do not expect `floatx<3, 3>` to consume less memory, or
 62 | be faster than e.g. `float`, though `floatx<11, 52>` should deliver similar
 63 | performance as `double`.
 64 | 
 65 | That being said, it is not likely that FloatX will be useful in production
 66 | codes. On the other hand, it can be handy in research projects which aim to
 67 | study the effects of using different precisions.
 68 | 
 69 | Installation
 70 | ------------
 71 | 
 72 | To use the library, just make sure that a directory containing `floatx.hpp` is
 73 | in your include path (here, it is in `src/` subdirectory).
 74 | 
 75 | Alternatively, if you are using CMake, a `CMakeLists.txt` file is provided.
 76 | You can download the repository into your project and use the following code to
 77 | depend on the floatx target:
 78 | 
 79 | ```
 80 | add_subdirectory(floatx)
 81 | target_add_library(my_target PRIVATE floatx)
 82 | ```
 83 | 
 84 | ### Building the examples / unit tests
 85 | 
 86 | A standard CMake command line sequence should do:
 87 | 
 88 | ```
 89 | mkdir build && cd build && cmake .. && make
 90 | ```
 91 | 
 92 | To run all the tests:
 93 | 
 94 | ```
 95 | make test
 96 | ```
 97 | 
 98 | This will (hopefully) output a summary of the form:
 99 | 
100 | ```
101 | test_<testname>............ Passed
102 | ```
103 | 
104 | To run only one of the tests (and see more detail output):
105 | 
106 | ```
107 | ./test/<testname>
108 | ```
109 | 
110 | 
111 | Examples
112 | --------
113 | 
114 | Some sample code using floatx:
115 | ```
116 | 1:  flx::floatx<7, 12> a = 1.2; // 7 exponent bits , 12 sign . bits
117 | 2:  flx::floatx<7, 12> b = 3; // 7 exponent bits , 12 sign . bits
118 | 3:  flx::floatx<10, 9> c; // 10 exponent bits , 9 sign . bits
119 | 4:  float d = 3.2;
120 | 5:  double e = 5.2;
121 | 6:
122 | 7:  std :: cin >> c;
123 | 8:  c = a + b; // decltype (a + b) == floatx <7, 12>
124 | 9:  bool t = a < b;
125 | 10:  a += c;
126 | 11:  d = a / c; // decltype (a / c) == floatx <10 , 12>
127 | 12:  e = c - d; // decltype (c - d) == floatx <10 , 23>
128 | 13:  c = a * e; // decltype (a * e) == floatx <11 , 52>
129 | 14:  std :: cout << c;
130 | ```
131 | 
132 | Lines 1, 2, and 3 show how floatx numbers can be constructed
133 | from built-in types (floating-point numbers and integers) and read
134 | from C++ streams. Lines 8 and 9 show how these objects are used
135 | to perform basic arithmetic and relational operations. Lines 10-13
136 | demonstrate the interoperability between different floatx and built-in
137 | types. The comments on the right specify the return type of the
138 | operation. Note, that T == U, where T and U are types, is used to
139 | convey that these two types are the same, i.e., that std::is_same<T,
140 | U>::value evaluates to true. Lines 8 and 11-13 also show that floatx
141 | types can be implicitly converted to other floatx types or built-in
142 | types. Finally, line 14 shows how floatx types can be written to an
143 | output stream.
144 | 
145 | 
146 | ## Authors and contacts
147 |  - Goran Flegar, Departamento de Ingeniería y Ciencia de Computadores, Universidad Jaime I, Spain, flegar@uji.es
148 |  - Florian Scheidegger, IBM Research - Zurich, eid@zurich.ibm.com
149 |  - Vedran Novakovic, Departamento de Ingeniería y Ciencia de Computadores, Universidad Jaime I, Spain
150 |  - Giovani Mariani, IBM Research - Zurich,
151 |  - Andres E. Tomas, Departamento de Ingeniería y Ciencia de Computadores, Universidad Jaime I, Spain,tomasan@uji.es
152 |  - A. Cristiano I. Malossi, IBM Research - Zurich, acm@zurich.ibm.com
153 |  - Enrique S. Quintana-Orti, Departamento de Informática de Sistemas y Computadores,Universitat Politècnica de València, Spain, quintana@icc.uji.es
154 | 
155 | 
156 | ## Reference
157 | 
158 | The full text of our paper explaining floatx in datail is available under the following link: https://dl.acm.org/doi/pdf/10.1145/3368086?download=true.
159 | 
160 | Please, if you like and use our work, cite our paper as follows:
161 | 
162 | ```
163 | @article{flegar2019floatx,
164 | author={Flegar, Goran and Scheidegger, Florian and Novakovi{\'c}, Vedran and Mariani, Giovani and Tom{\'{}} s, Andr{\'e}s E and Malossi, A Cristiano I and Quintana-Ort{\'\i}, Enrique S},
165 |  title = {FloatX: A C++ Library for Customized Floating-Point Arithmetic},
166 |  year = {2019},
167 |  issue_date = {December 2019},
168 |  publisher = {Association for Computing Machinery},
169 |  address = {New York, NY, USA},
170 |  volume = {45},
171 |  number = {4},
172 |  issn = {0098-3500},
173 |  url = {https://doi.org/10.1145/3368086},
174 |  doi = {10.1145/3368086},
175 |  journal={ACM Transactions on Mathematical Software (TOMS)},
176 |  month = dec,
177 |  articleno = {Article 40},
178 |  numpages = {23},
179 | }
180 | ```
181 | 
182 | ## Acknowledgments
183 | 
184 | This work was funded by the European Union’s H2020 research and innovation program under grant
185 | agreement No 732631, project OPRECOMP.
186 | 
187 | For details visit http://oprecomp.eu/. 
188 | 
189 | 
190 | 
191 | 
192 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(example example.cpp)
 2 | target_link_libraries(example PRIVATE floatx)
 3 | 
 4 | add_executable(example2 example2.cpp)
 5 | target_link_libraries(example2 PRIVATE floatx)
 6 | 
 7 | add_executable(common_type common_type.cpp)
 8 | target_link_libraries(common_type PRIVATE floatx)
 9 | 
10 | add_executable(DemoNewton DemoNewton.cpp)
11 | target_link_libraries(DemoNewton PRIVATE floatx)


--------------------------------------------------------------------------------
/examples/DemoNewton.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | /*
 19 |  * compile that file
 20 |  * g++ -std=c++11 -Wall -o DemoNewton -I. DemoNewton.cpp
 21 |  */
 22 | 
 23 | #include <floatx.hpp>
 24 | #include <iostream>
 25 | 
 26 | // Babylonian method:
 27 | // Derived from Netwon
 28 | // See: https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Example
 29 | // based on float type
 30 | float myroot(float a, float a0, float tol)
 31 | {
 32 |     float x = a0;
 33 |     float xnext;
 34 |     float err;
 35 | 
 36 |     int k = 0;
 37 |     do {
 38 |         xnext = 0.5 * (x + a / x);
 39 |         err = fabs(x - xnext);
 40 |         printf("[k=%i]: %f %e\n", k++, xnext, err);
 41 |         x = xnext;
 42 |     } while (err > tol);
 43 |     return xnext;
 44 | }
 45 | 
 46 | // general routine based on type T
 47 | // note, if T is float that routine is the same as above.
 48 | template <typename T>
 49 | T myroot_general(T a, T a0, T tol)
 50 | {
 51 |     T x = a0;
 52 |     T xnext;
 53 |     T err;
 54 | 
 55 |     int k = 0;
 56 |     do {
 57 |         xnext = 0.5 * (x + a / x);
 58 |         // for example fabs(...) is not defined for the floatx type
 59 |         // hence, we use a cast to double and back to our type
 60 |         err = (T)fabs(double(x - xnext));
 61 |         printf("[k=%i]: %f %e\n", k++, double(xnext), double(err));
 62 |         x = xnext;
 63 |     } while (err > tol);
 64 |     return xnext;
 65 | }
 66 | 
 67 | int main(int argc, char** argv)
 68 | {
 69 |     printf(
 70 |         "floatx working "
 71 |         "example\n==============================================\n");
 72 |     printf("Iteratively compute the square root of a\n");
 73 | 
 74 |     if (argc != 3) {
 75 |         printf("Usage: %s <a> <a0>\n computes root(a) by Newton Iterations.\n",
 76 |                argv[0]);
 77 |         printf("Example: \n %s 2 1\n", argv[0]);
 78 |         exit(-1);
 79 |     }
 80 | 
 81 |     float a = atof(argv[1]);
 82 |     float a0 = atof(argv[2]);
 83 | 
 84 |     float res = myroot(a, a0, 1e-6);
 85 | 
 86 |     printf("\n\nBaseline version (float)\n\n");
 87 | 
 88 |     printf("==============================================\n");
 89 |     printf("Result Computed (float):      %.20f\n", res);
 90 |     float ref = sqrt(a);
 91 |     printf("Reference:                    %.20f\n", ref);
 92 |     printf("==============================================\n");
 93 |     printf("Error:                        %e\n", ref - res);
 94 |     printf("==============================================\n");
 95 | 
 96 | 
 97 |     printf("\n\nFloatx Version IEEE 16bit, e.g., floatx<5,10>\n\n");
 98 |     res = myroot_general<flx::floatx<5, 10>>(a, a0, 1e-6);
 99 |     printf("==============================================\n");
100 |     printf("Result Computed (floatx<E,M>: %.20f\n", res);
101 |     printf("Reference:                    %.20f\n", ref);
102 |     printf("==============================================\n");
103 |     printf("Error:                        %e\n", ref - res);
104 |     printf("==============================================\n");
105 | 
106 |     return 0;
107 | }


--------------------------------------------------------------------------------
/examples/common_type.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
 3 |                     IBM Research GmbH. All rights reserved.
 4 | 
 5 |    Licensed under the Apache License, Version 2.0 (the "License");
 6 |    you may not use this file except in compliance with the License.
 7 |    You may obtain a copy of the License at
 8 | 
 9 |        http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |    Unless required by applicable law or agreed to in writing, software
12 |    distributed under the License is distributed on an "AS IS" BASIS,
13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |    See the License for the specific language governing permissions and
15 |    limitations under the License.
16 | */
17 | 
18 | #include <iostream>
19 | 
20 | 
21 | // Uncoment to disable common type resolution.
22 | // #define FLOATX_NO_TYPE_RESOLUTION
23 | #include <floatx.hpp>
24 | 
25 | 
26 | int main()
27 | {
28 |     using float1 = flx::floatx<5, 7>;
29 |     using float2 = flx::floatx<4, 8>;
30 |     std::cout << float1(2.6) + float1(6.2) << std::endl;  // always works
31 |     std::cout << float1(2.6) + float2(6.2) << std::endl;  // fails with flag
32 | }
33 | 


--------------------------------------------------------------------------------
/examples/example.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
 3 |                     IBM Research GmbH. All rights reserved.
 4 | 
 5 |    Licensed under the Apache License, Version 2.0 (the "License");
 6 |    you may not use this file except in compliance with the License.
 7 |    You may obtain a copy of the License at
 8 | 
 9 |        http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |    Unless required by applicable law or agreed to in writing, software
12 |    distributed under the License is distributed on an "AS IS" BASIS,
13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |    See the License for the specific language governing permissions and
15 |    limitations under the License.
16 | */
17 | 
18 | /*
19 |  * Example of using the FloatX library. Compile with
20 |  * g++ -std=c++11 -I ../src test.cpp
21 |  */
22 | #include <iostream>
23 | 
24 | 
25 | #include <floatx.hpp>
26 | 
27 | 
28 | int main()
29 | {
30 |     std::cout << "sizeof(floatx)  = " << sizeof(flx::floatx<11, 52>)
31 |               << "\nsizeof(floatxr) = " << sizeof(flx::floatxr<>) << std::endl;
32 |     // compile-time types
33 |     flx::floatx<11, 52> f;      // double
34 |     flx::floatx<7, 22> g(5.3);  // float with 7 exp and 22 significand bits
35 | 
36 |     // runtime types
37 |     flx::floatxr<> fr(11, 52);
38 |     flx::floatxr<> gr(7, 22, 5.3);
39 | 
40 |     std::cout << std::scientific;
41 | 
42 |     // conversion to double
43 |     std::cout << double(f) << std::endl
44 |               << double(g) << std::endl
45 |               << double(fr) << std::endl
46 |               << double(gr) << std::endl;
47 | 
48 |     // conversion to flexfloat
49 |     flx::floatx<3, 2> lg(g);
50 |     flx::floatx<3, 2> lgr(gr);
51 | 
52 |     std::cout << double(lg) << ", precision = "
53 |               << "(" << get_exp_bits(lg) << ", " << get_sig_bits(lg) << ")\n"
54 |               << double(lgr) << ", precision = "
55 |               << "(" << get_exp_bits(lgr) << ", " << get_sig_bits(lgr) << ")"
56 |               << std::endl;
57 |     return 0;
58 | }
59 | 


--------------------------------------------------------------------------------
/examples/example2.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | /*
 19 |  * Example of using the FloatX library. Compile with
 20 |  * g++ -std=c++11 -I ../src test.cpp
 21 |  */
 22 | #include <iostream>
 23 | 
 24 | 
 25 | #include <floatx.hpp>
 26 | 
 27 | template <typename T>
 28 | void foo(T* a, int n)
 29 | {
 30 |     printf("HI FOO ROUTINE\n");
 31 |     for (unsigned i = 0; i < n; ++i) {
 32 |         if (i == 0 || i == 1) {
 33 |             a[i] = 1;
 34 |         } else {
 35 |             a[i] = a[i - 1] + a[i - 2];
 36 |         }
 37 |     }
 38 | }
 39 | 
 40 | void compileExample()
 41 | {
 42 |     typedef flx::floatx<11, 48> T;
 43 |     T res = 0;
 44 | 
 45 |     double a = 3.1;
 46 |     double b = 5.2;
 47 | 
 48 |     res = (T)a * b;  // ERROR
 49 | 
 50 |     std::cout << "[cout] res = " << res << std::endl;
 51 | }
 52 | 
 53 | void compileExample2()
 54 | {
 55 |     typedef flx::floatx<11, 48> T;
 56 |     T res = 33.45;
 57 |     int i = 4;
 58 | 
 59 |     std::cout << "[cout] res = " << res << " and i = " << i << std::endl;
 60 | 
 61 |     // TODO (withouth double does not yet work)
 62 |     if (res == (double)i) {
 63 |         std::cout << "TRUE\n";
 64 |     } else {
 65 |         std::cout << "FALSE\n";
 66 |     }
 67 | 
 68 |     if (res == 3.0) {
 69 |         std::cout << "TRUE 2\n";
 70 |     } else {
 71 |         std::cout << "FALSE 2\n";
 72 |     }
 73 | }
 74 | 
 75 | int main()
 76 | {
 77 |     compileExample();
 78 |     compileExample2();
 79 |     printf("--------------------------\n");
 80 |     // Double-precision variables
 81 | 
 82 |     // simple use case
 83 |     // flx::floatx<11, 52> ff_a, ff_b, ff_c;
 84 |     // flx::floatx<5, 30> ff_a, ff_b, ff_c;
 85 | 
 86 |     // other use case
 87 |     flx::floatx<11, 48> ff_a;
 88 |     flx::floatx<5, 5> ff_b;
 89 |     flx::floatx<11, 30> ff_c;
 90 | 
 91 |     // Assigment with cast (from double literal)
 92 |     ff_a = 10.4;
 93 |     ff_b = 11.5;
 94 |     // Overloaded operators
 95 |     // ff_b += 2;   // DOES NOT WORK (cast from int not defined)
 96 |     ff_b += 2.0;  // WORKS.
 97 | 
 98 |     // ff_b = ff_b + 2;  // DOES NOT WORK (cast from int not defined)
 99 |     ff_b = ff_b + 2.0;  // DOES NOT WORK (except flex is as double)
100 |     // ff_b = double( ff_b  + flx::floatx<11, 32>(2)); //WORKS
101 | 
102 |     ff_c = ff_a + ff_b;
103 | 
104 |     // C++ output stream
105 |     // Explicit output as double.
106 |     std::cout << "output after double cast:\n";
107 |     std::cout << "[cout] ff_a = " << double(ff_a) << std::endl;
108 |     std::cout << "[cout] ff_b = " << double(ff_b) << std::endl;
109 |     std::cout << "[cout] ff_c = " << double(ff_c) << std::endl;
110 | 
111 |     // Implicit output works as well.
112 |     std::cout << "Output:\n";
113 |     std::cout << "[cout] ff_a = " << ff_a << std::endl;
114 |     std::cout << "[cout] ff_b = " << ff_b << std::endl;
115 |     std::cout << "[cout] ff_c = " << ff_c << std::endl;
116 | 
117 |     std::cout << "Get information about type:\n";
118 |     std::cout << "[cout] ff_a = " << ff_a << " <" << get_exp_bits(ff_a) << ","
119 |               << get_sig_bits(ff_a) << ">" << std::endl;
120 |     std::cout << "[cout] ff_b = " << ff_b << " <" << get_exp_bits(ff_b) << ","
121 |               << get_sig_bits(ff_b) << ">" << std::endl;
122 |     std::cout << "[cout] ff_c = " << ff_c << " <" << get_exp_bits(ff_c) << ","
123 |               << get_sig_bits(ff_c) << ">" << std::endl;
124 | 
125 |     std::cout << "Sizeof Results (it is the static case):\n";
126 |     std::cout << "sizeof( ff_a ) = " << sizeof(ff_a) << "\n";
127 |     std::cout << "sizeof( ff_b ) = " << sizeof(ff_b) << "\n";
128 |     std::cout << "sizeof( ff_c ) = " << sizeof(ff_c) << "\n";
129 | 
130 |     // get_exp_bits()
131 |     // Binary output.
132 |     // std::cout << "[cout] ff_a = " << ff_a << " (" << flexfloat_as_bits <<
133 |     // ff_a << flexfloat_as_double << ")" << std::endl; std::cout << "[cout]
134 |     // ff_b = " << ff_b << " (" << flexfloat_as_bits << ff_b <<
135 |     // flexfloat_as_double << ")" << std::endl; std::cout << "[cout] ff_c = " <<
136 |     // ff_c << " (" << flexfloat_as_bits << ff_c << flexfloat_as_double << ")"
137 |     // << std::endl;
138 | 
139 |     // generate arrays of data
140 |     // flexfloat<11, 52> ff_a
141 |     int n = 100;
142 |     // double* a = new double[n];
143 |     flx::floatx<5, 12>* a = new flx::floatx<5, 12>[n];
144 | 
145 |     for (unsigned i = 0; i < n; ++i) {
146 |         a[i] = i;
147 |     }
148 | 
149 |     // foo< flx::floatx<5, 12> >( a, n);         // OK
150 |     // foo< flx::floatx<5, 52> >( a, n);         //wrong type
151 |     foo(a, n);  // infers type, ok
152 | 
153 |     for (unsigned i = 0; i < n; ++i) {
154 |         // std::cout << i << ":\t" << a[i] << " (" << flexfloat_as_bits << a[i]
155 |         // << flexfloat_as_double << ")" <<  std::endl;
156 |         std::cout << i << ":\t" << double(a[i]) << std::endl;
157 |     }
158 |     delete[] a;
159 | 
160 |     // std::cout << "sizeof(floatx)  = " << sizeof(flx::floatx<11, 52>)
161 |     //           << "\nsizeof(floatxr) = " << sizeof(flx::floatxr<>)
162 |     //           << std::endl;
163 |     // // compile-time types
164 |     // flx::floatx<11, 52> f;  // double
165 |     // flx::floatx<7, 22> g(5.3);  // float with 7 exp and 22 significand bits
166 | 
167 |     // // runtime types
168 |     // flx::floatxr<> fr(11, 52);
169 |     // flx::floatxr<> gr(7, 22, 5.3);
170 | 
171 |     // std::cout << std::scientific;
172 | 
173 |     // // conversion to double
174 |     // std::cout << double(f) << std::endl
175 |     //           << double(g) << std::endl
176 |     //           << double(fr) << std::endl
177 |     //           << double(gr) << std::endl;
178 | 
179 |     // // conversion to flexfloat
180 |     // flx::floatx<3, 2> lg(g);
181 |     // flx::floatx<3, 2> lgr(gr);
182 | 
183 |     // std::cout << double(lg) << ", precision = "
184 |     //           << "(" << get_exp_bits(lg) << ", " << get_sig_bits(lg) << ")\n"
185 |     //           << double(lgr) << ", precision = "
186 |     //           << "(" << get_exp_bits(lgr) << ", " << get_sig_bits(lgr) << ")"
187 |     //           << std::endl;
188 |     // return 0;
189 | }
190 | 


--------------------------------------------------------------------------------
/floatx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oprecomp/FloatX/a67318fbedf0cebd5da277f3633275b905a5c12a/floatx.png


--------------------------------------------------------------------------------
/src/floatx.hpp:
--------------------------------------------------------------------------------
   1 | /*
   2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
   3 |                     IBM Research GmbH. All rights reserved.
   4 | 
   5 |    Licensed under the Apache License, Version 2.0 (the "License");
   6 |    you may not use this file except in compliance with the License.
   7 |    You may obtain a copy of the License at
   8 | 
   9 |        http://www.apache.org/licenses/LICENSE-2.0
  10 | 
  11 |    Unless required by applicable law or agreed to in writing, software
  12 |    distributed under the License is distributed on an "AS IS" BASIS,
  13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 |    See the License for the specific language governing permissions and
  15 |    limitations under the License.
  16 | */
  17 | 
  18 | #ifndef FLOATX_FLOATX_HPP_
  19 | #define FLOATX_FLOATX_HPP_
  20 | 
  21 | 
  22 | #ifdef __CUDA_ARCH__
  23 | #include "cuda_runtime.h"
  24 | #endif  // __CUDA_ARCH__
  25 | 
  26 | #include <climits>
  27 | 
  28 | #if CHAR_BIT != 8
  29 | #error Expecting 8 bits in a char!
  30 | #endif  // ?CHAR_BIT
  31 | 
  32 | #include <cassert>
  33 | #include <cmath>
  34 | #include <cstdint>
  35 | 
  36 | #include <bitset>
  37 | #include <istream>
  38 | #include <ostream>
  39 | #include <string>
  40 | #include <type_traits>
  41 | 
  42 | 
  43 | #ifdef __CUDA_ARCH__
  44 | #define FLOATX_ATTRIBUTES __host__ __device__
  45 | #define FLOATX_INLINE __forceinline__
  46 | #else  // __CUDA_ARCH__
  47 | #define FLOATX_ATTRIBUTES
  48 | #define FLOATX_INLINE inline
  49 | #endif  // __CUDA_ARCH__
  50 | 
  51 | 
  52 | #define USE_BUILTINS
  53 | 
  54 | 
  55 | namespace flx {
  56 | 
  57 | 
  58 | namespace detail {
  59 | 
  60 | 
  61 | static constexpr int bits_in_byte = CHAR_BIT;
  62 | 
  63 | 
  64 | template <typename>
  65 | class floatx_base;
  66 | 
  67 | 
  68 | template <int size>
  69 | struct bits_type {};
  70 | 
  71 | 
  72 | #define ENABLE_STANDARD_BITS_TYPE(_size)     \
  73 |     template <>                              \
  74 |     struct bits_type<_size / bits_in_byte> { \
  75 |         using type = std::uint##_size##_t;   \
  76 |     }
  77 | 
  78 | ENABLE_STANDARD_BITS_TYPE(8);
  79 | ENABLE_STANDARD_BITS_TYPE(16);
  80 | ENABLE_STANDARD_BITS_TYPE(32);
  81 | ENABLE_STANDARD_BITS_TYPE(64);
  82 | 
  83 | #undef ENABLE_STANDARD_BITS_TYPE
  84 | 
  85 | 
  86 | }  // namespace detail
  87 | 
  88 | 
  89 | #define FLOATX_USE_DEFAULT_TRAITS(_type)                                 \
  90 |     static const auto sig_pos = 0;                                       \
  91 |     static const auto exp_pos = float_traits<_type>::sig_bits;           \
  92 |     static const auto sgn_pos = exp_pos + float_traits<_type>::exp_bits; \
  93 |     static const auto sig_mask =                                         \
  94 |         (UINT64_C(1) << float_traits<_type>::sig_bits) - UINT64_C(1);    \
  95 |     static const auto exp_mask =                                         \
  96 |         (UINT64_C(1) << float_traits<_type>::exp_bits) - UINT64_C(1);    \
  97 |     static const auto sgn_mask = UINT64_C(1);                            \
  98 |     static const auto bias = exp_mask >> 1;                              \
  99 |     using bits_type = typename detail::bits_type<sizeof(_type)>::type
 100 | 
 101 | 
 102 | template <typename T, typename = void>
 103 | struct float_traits {};
 104 | 
 105 | template <typename T>
 106 | struct float_traits<T,
 107 |                     typename std::enable_if<std::is_integral<T>::value>::type> {
 108 |     static const bool is_floatx = false;
 109 |     static const bool is_runtime = false;
 110 |     static const int exp_bits = 0;
 111 |     static const int sig_bits = 0;
 112 |     using backend_float = T;
 113 | 
 114 |     FLOATX_USE_DEFAULT_TRAITS(T);
 115 | };
 116 | 
 117 | template <>
 118 | struct float_traits<float, void> {
 119 |     static const bool is_floatx = false;
 120 |     static const bool is_runtime = false;
 121 |     static const int exp_bits = 8;
 122 |     static const int sig_bits = 23;
 123 |     using backend_float = float;
 124 | 
 125 |     FLOATX_USE_DEFAULT_TRAITS(float);
 126 | };
 127 | 
 128 | template <>
 129 | struct float_traits<double, void> {
 130 |     static const bool is_floatx = false;
 131 |     static const bool is_runtime = false;
 132 |     static const int exp_bits = 11;
 133 |     static const int sig_bits = 52;
 134 |     using backend_float = double;
 135 | 
 136 |     FLOATX_USE_DEFAULT_TRAITS(double);
 137 | };
 138 | 
 139 | 
 140 | #define ENABLE_PROPERTY(_prop)                                              \
 141 |     template <typename Float>                                               \
 142 |     FLOATX_ATTRIBUTES FLOATX_INLINE constexpr auto get_##_prop(             \
 143 |         const Float&) noexcept->                                            \
 144 |         typename std::enable_if<!float_traits<Float>::is_runtime,           \
 145 |                                 decltype(float_traits<Float>::_prop)>::type \
 146 |     {                                                                       \
 147 |         return float_traits<Float>::_prop;                                  \
 148 |     }                                                                       \
 149 |     template <typename RuntimeFloat>                                        \
 150 |     FLOATX_ATTRIBUTES FLOATX_INLINE constexpr auto get_##_prop(             \
 151 |         const RuntimeFloat& f) noexcept->                                   \
 152 |         typename std::enable_if<float_traits<RuntimeFloat>::is_runtime,     \
 153 |                                 decltype(f.get_##_prop())>::type            \
 154 |     {                                                                       \
 155 |         return f.get_##_prop();                                             \
 156 |     }
 157 | 
 158 | ENABLE_PROPERTY(exp_bits);  // get_exp_bits(f)
 159 | ENABLE_PROPERTY(sig_bits);  // get_sig_bits(f)
 160 | 
 161 | #undef ENABLE_PROPERTY
 162 | 
 163 | 
 164 | template <int ExpBits, int SigBits, typename BackendFloat = double>
 165 | class floatx
 166 |     : public detail::floatx_base<floatx<ExpBits, SigBits, BackendFloat>> {
 167 | private:
 168 |     using backend_float = typename float_traits<floatx>::backend_float;
 169 | 
 170 | public:
 171 |     FLOATX_ATTRIBUTES floatx() noexcept
 172 |         : detail::floatx_base<floatx>(backend_float(0.0))
 173 |     {
 174 |         this->initialize();
 175 |     }
 176 | 
 177 |     template <typename T>
 178 |     FLOATX_ATTRIBUTES floatx(const T& other) noexcept
 179 |         : detail::floatx_base<floatx>(backend_float(other))
 180 |     {
 181 |         this->initialize();
 182 |     }
 183 | 
 184 |     // Default copy/move constructors/assignment operators are OK here
 185 | 
 186 |     template <typename T>
 187 |     FLOATX_ATTRIBUTES floatx& operator=(const T& other) noexcept
 188 |     {
 189 |         return *this = floatx(other);
 190 |     }
 191 | };
 192 | 
 193 | 
 194 | template <int ExpBits, int SigBits, typename BackendFloat>
 195 | struct float_traits<floatx<ExpBits, SigBits, BackendFloat>, void> {
 196 |     static const bool is_floatx = true;
 197 |     static const bool is_runtime = false;
 198 |     static const int exp_bits = ExpBits;
 199 |     static const int sig_bits = SigBits;
 200 |     using backend_float = BackendFloat;
 201 | 
 202 |     FLOATX_USE_DEFAULT_TRAITS(backend_float);
 203 | };
 204 | 
 205 | 
 206 | template <typename BackendFloat = double, typename MetadataType = short>
 207 | class floatxr
 208 |     : public detail::floatx_base<floatxr<BackendFloat, MetadataType>> {
 209 | private:
 210 |     using backend_float = typename float_traits<floatxr>::backend_float;
 211 | 
 212 | public:
 213 |     using metadata_type = MetadataType;
 214 | 
 215 |     FLOATX_ATTRIBUTES
 216 |     floatxr(metadata_type exp_bits, metadata_type sig_bits) noexcept
 217 |         : detail::floatx_base<floatxr>(backend_float(0.0)),
 218 |           exp_bits_(exp_bits),
 219 |           sig_bits_(sig_bits)
 220 |     {
 221 |         this->initialize();
 222 |     }
 223 | 
 224 |     // Default copy/move constructors are OK
 225 | 
 226 |     template <typename T>
 227 |     FLOATX_ATTRIBUTES floatxr(metadata_type exp_bits, metadata_type sig_bits,
 228 |                               const T& other) noexcept
 229 |         : detail::floatx_base<floatxr>(backend_float(other)),
 230 |           exp_bits_(exp_bits),
 231 |           sig_bits_(sig_bits)
 232 |     {
 233 |         this->initialize();
 234 |     }
 235 | 
 236 |     template <typename T>
 237 |     FLOATX_ATTRIBUTES floatxr(const T& other) noexcept
 238 |         : detail::floatx_base<floatxr>(backend_float(other)),
 239 |           exp_bits_(flx::get_exp_bits(other)),
 240 |           sig_bits_(flx::get_sig_bits(other))
 241 |     {
 242 |         /* already initialized */
 243 |     }
 244 | 
 245 |     // Assignment needs to preserve the format of the result
 246 |     template <typename T>
 247 |     FLOATX_ATTRIBUTES floatxr& operator=(const T& other) noexcept
 248 |     {
 249 |         return *this = floatxr(flx::get_exp_bits(*this),
 250 |                                flx::get_sig_bits(*this), backend_float(other));
 251 |     }
 252 | 
 253 |     FLOATX_ATTRIBUTES void set_precision(metadata_type exp_bits,
 254 |                                          metadata_type sig_bits)
 255 |     {
 256 |         exp_bits_ = exp_bits;
 257 |         sig_bits_ = sig_bits;
 258 |         this->initialize();
 259 |     }
 260 | 
 261 |     FLOATX_ATTRIBUTES constexpr metadata_type get_exp_bits() const noexcept
 262 |     {
 263 |         return exp_bits_;
 264 |     }
 265 | 
 266 |     FLOATX_ATTRIBUTES constexpr metadata_type get_sig_bits() const noexcept
 267 |     {
 268 |         return sig_bits_;
 269 |     }
 270 | 
 271 | private:
 272 |     metadata_type exp_bits_;
 273 |     metadata_type sig_bits_;
 274 | };
 275 | 
 276 | 
 277 | template <typename BackendFloat, typename MetadataType>
 278 | struct float_traits<floatxr<BackendFloat, MetadataType>, void> {
 279 |     static const bool is_floatx = true;
 280 |     static const bool is_runtime = true;
 281 |     static const int exp_bits = float_traits<BackendFloat>::exp_bits;
 282 |     static const int sig_bits = float_traits<BackendFloat>::sig_bits;
 283 |     using backend_float = BackendFloat;
 284 | 
 285 |     FLOATX_USE_DEFAULT_TRAITS(backend_float);
 286 | };
 287 | 
 288 | 
 289 | template <typename FloatX1, typename FloatX2, typename BackendFloat>
 290 | struct supertype {
 291 | private:
 292 |     static constexpr int max(int x, int y) { return (x > y) ? x : y; }
 293 | 
 294 | public:
 295 | #ifdef FLOATX_NO_TYPE_RESOLUTION
 296 |     static_assert(std::is_same<FloatX1, FloatX2>::value,
 297 |                   "Common type detection is disabled by the user"
 298 |                   " [FLOATX_NO_TYPE_RESOLUTION]");
 299 | #endif  // FLOATX_NO_TYPE_RESOLUTION
 300 | 
 301 |     using type = typename std::enable_if<
 302 |         float_traits<FloatX1>::is_floatx || float_traits<FloatX2>::is_floatx,
 303 |         typename std::conditional<float_traits<FloatX1>::is_runtime ||
 304 |                                       float_traits<FloatX2>::is_runtime,
 305 |                                   floatxr<BackendFloat>,
 306 |                                   floatx<max(float_traits<FloatX1>::exp_bits,
 307 |                                              float_traits<FloatX2>::exp_bits),
 308 |                                          max(float_traits<FloatX1>::sig_bits,
 309 |                                              float_traits<FloatX2>::sig_bits),
 310 |                                          BackendFloat>>::type>::type;
 311 |     static constexpr int max_exp_bits(FloatX1 x, FloatX2 y)
 312 |     {
 313 |         return max(get_exp_bits(x), get_exp_bits(y));
 314 |     }
 315 |     static constexpr int max_sig_bits(FloatX1 x, FloatX2 y)
 316 |     {
 317 |         return max(get_sig_bits(x), get_sig_bits(y));
 318 |     }
 319 | };
 320 | 
 321 | 
 322 | #define ENABLE_RELATIONAL_OPERATOR(_op)                              \
 323 |     template <typename Float1, typename Float2>                      \
 324 |     FLOATX_ATTRIBUTES FLOATX_INLINE                                  \
 325 |         typename std::enable_if<float_traits<Float1>::is_floatx ||   \
 326 |                                     float_traits<Float2>::is_floatx, \
 327 |                                 bool>::type                          \
 328 |         operator _op(const Float1& x, const Float2& y)               \
 329 |     {                                                                \
 330 |         return typename float_traits<Float1>::backend_float(x) _op   \
 331 |             typename float_traits<Float2>::backend_float(y);         \
 332 |     }
 333 | 
 334 | ENABLE_RELATIONAL_OPERATOR(==)
 335 | ENABLE_RELATIONAL_OPERATOR(!=)
 336 | ENABLE_RELATIONAL_OPERATOR(<)
 337 | ENABLE_RELATIONAL_OPERATOR(>)
 338 | ENABLE_RELATIONAL_OPERATOR(<=)
 339 | ENABLE_RELATIONAL_OPERATOR(>=)
 340 | 
 341 | #undef ENABLE_RELATIONAL_OPERATOR
 342 | 
 343 | 
 344 | #define ENABLE_ARITHMETIC_OPERATOR(_op)                                        \
 345 |     template <typename Float1, typename Float2>                                \
 346 |     FLOATX_ATTRIBUTES FLOATX_INLINE typename std::enable_if<                   \
 347 |         (float_traits<Float1>::is_floatx ||                                    \
 348 |          float_traits<Float2>::is_floatx) &&                                   \
 349 |             !float_traits<Float1>::is_runtime &&                               \
 350 |             !float_traits<Float2>::is_runtime,                                 \
 351 |         typename supertype<                                                    \
 352 |             Float1, Float2,                                                    \
 353 |             decltype(typename float_traits<Float1>::backend_float() _op        \
 354 |                      typename float_traits<Float2>::backend_float())>::type>:: \
 355 |         type                                                                   \
 356 |         operator _op(const Float1& x, const Float2& y)                         \
 357 |     {                                                                          \
 358 |         using bf = decltype(typename float_traits<Float1>::backend_float(      \
 359 |             x) _op typename float_traits<Float2>::backend_float(y));           \
 360 |         using st = typename supertype<Float1, Float2, bf>::type;               \
 361 |         return st(bf(x) _op bf(y));                                            \
 362 |     }                                                                          \
 363 |                                                                                \
 364 |     template <typename Float1, typename Float2>                                \
 365 |     FLOATX_ATTRIBUTES FLOATX_INLINE typename std::enable_if<                   \
 366 |         float_traits<Float1>::is_runtime || float_traits<Float2>::is_runtime,  \
 367 |         typename supertype<                                                    \
 368 |             Float1, Float2,                                                    \
 369 |             decltype(typename float_traits<Float1>::backend_float() _op        \
 370 |                      typename float_traits<Float2>::backend_float())>::type>:: \
 371 |         type                                                                   \
 372 |         operator _op(const Float1& x, const Float2& y)                         \
 373 |     {                                                                          \
 374 |         using bf = decltype(typename float_traits<Float1>::backend_float(      \
 375 |             x) _op typename float_traits<Float2>::backend_float(y));           \
 376 |         using st = supertype<Float1, Float2, bf>;                              \
 377 |         return typename st::type(st::max_exp_bits(x, y),                       \
 378 |                                  st::max_sig_bits(x, y), bf(x) _op bf(y));     \
 379 |     }                                                                          \
 380 |                                                                                \
 381 |     template <typename Float1, typename Float2>                                \
 382 |     FLOATX_ATTRIBUTES FLOATX_INLINE                                            \
 383 |         typename std::enable_if<float_traits<Float1>::is_floatx ||             \
 384 |                                     float_traits<Float2>::is_floatx,           \
 385 |                                 Float1&>::type                                 \
 386 |         operator _op##=(Float1& x, const Float2& y)                            \
 387 |     {                                                                          \
 388 |         return x = Float1(x _op y);                                            \
 389 |     }
 390 | 
 391 | ENABLE_ARITHMETIC_OPERATOR(+)
 392 | ENABLE_ARITHMETIC_OPERATOR(-)
 393 | ENABLE_ARITHMETIC_OPERATOR(*)
 394 | ENABLE_ARITHMETIC_OPERATOR(/)
 395 | 
 396 | #undef ENABLE_ARITHMETIC_OPERATOR
 397 | 
 398 | 
 399 | template <typename FloatX>
 400 | FLOATX_INLINE typename std::enable_if<float_traits<FloatX>::is_floatx,
 401 |                                       std::ostream&>::type&
 402 | operator<<(std::ostream& os, const FloatX& f) noexcept
 403 | {
 404 |     return os << typename float_traits<FloatX>::backend_float(f);
 405 | }
 406 | 
 407 | 
 408 | template <typename FloatX>
 409 | FLOATX_INLINE typename std::enable_if<float_traits<FloatX>::is_floatx,
 410 |                                       std::istream&>::type
 411 | operator>>(std::istream& is, FloatX& f) noexcept
 412 | {
 413 |     typename float_traits<FloatX>::backend_float tmp;
 414 |     is >> tmp;
 415 |     f = tmp;
 416 |     return is;
 417 | }
 418 | 
 419 | 
 420 | template <typename Float>
 421 | FLOATX_ATTRIBUTES FLOATX_INLINE
 422 |     std::bitset<sizeof(typename float_traits<Float>::backend_float)>
 423 |     bits(const Float& x) noexcept
 424 | {
 425 |     using bf = typename float_traits<Float>::backend_float;
 426 |     using bitset = std::bitset<sizeof(bf)>;
 427 |     bf val = bf(x);
 428 |     return *reinterpret_cast<bitset*>(&val);
 429 | }
 430 | 
 431 | 
 432 | namespace detail {
 433 | 
 434 | 
 435 | template <typename Float>
 436 | constexpr FLOATX_ATTRIBUTES FLOATX_INLINE
 437 |     typename float_traits<Float>::bits_type
 438 |     reinterpret_as_bits(Float val)
 439 | {
 440 |     return *reinterpret_cast<const typename float_traits<Float>::bits_type*>(
 441 |         &val);
 442 | }
 443 | 
 444 | 
 445 | template <typename Float>
 446 | constexpr FLOATX_ATTRIBUTES FLOATX_INLINE Float
 447 | reinterpret_bits_as(typename float_traits<Float>::bits_type bits)
 448 | {
 449 |     return *reinterpret_cast<const Float*>(&bits);
 450 | }
 451 | 
 452 | 
 453 | template <typename SignificandType>
 454 | constexpr FLOATX_ATTRIBUTES FLOATX_INLINE SignificandType
 455 | get_round_nearest_correction(SignificandType sig, SignificandType lsb_mask,
 456 |                              SignificandType after_lsb_mask,
 457 |                              SignificandType rest_mask)
 458 | {
 459 |     return (sig & after_lsb_mask) && ((sig & rest_mask) || (sig & lsb_mask));
 460 | }
 461 | 
 462 | 
 463 | FLOATX_ATTRIBUTES FLOATX_INLINE constexpr uint64_t
 464 | generate_rest_mask_fast_shift_less64(uint64_t MASK_AFTER_LSB)
 465 | {
 466 |     return (MASK_AFTER_LSB >= 1) ? (MASK_AFTER_LSB - UINT64_C(0x1))
 467 |                                  : UINT64_C(0x0000000000000000);
 468 | }
 469 | 
 470 | 
 471 | FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t round_nearest(uint64_t mant,
 472 |                                                        uint16_t SHIFT)
 473 | {
 474 |     if (SHIFT >= 64)
 475 |         SHIFT = 63;  // that works to cover the case of down-shifts if the bit
 476 |                      // number 63 is never set. (since DATA >> 64 is all zero
 477 |                      // which is in that case equivalent to DATA >> 63)
 478 |     assert(!(mant & (UINT64_C(0x1) << 63)));
 479 | 
 480 |     // fast, no additional cases and simpler MASK generation.
 481 |     const uint64_t MASK_LSB = UINT64_C(0x0000000000000001) << SHIFT;
 482 |     const uint64_t MASK_AFTER_LSB = UINT64_C(0x0000000000000001) << (SHIFT - 1);
 483 |     const uint64_t MASK_REST =
 484 |         generate_rest_mask_fast_shift_less64(MASK_AFTER_LSB);
 485 | 
 486 |     uint64_t mant_res = mant >> SHIFT;
 487 | 
 488 |     if ((mant & MASK_AFTER_LSB) && ((mant & MASK_REST) || (mant & MASK_LSB))) {
 489 |         // round up if the bit after the lsb is set (>=0.5) and the number is
 490 |         // indeed bigger than >0.5 or if it is =0.5 and the TiesToEven rule
 491 |         // requires to round up.
 492 |         mant_res += 0x1;
 493 |     }
 494 | 
 495 |     mant_res = mant_res << SHIFT;
 496 | 
 497 |     return mant_res;
 498 | }
 499 | 
 500 | 
 501 | // CONSTANTS USED FOR BACKEND = DOUBLE
 502 | const uint64_t MASK_MANTISSA = UINT64_C(0x000FFFFFFFFFFFFF);
 503 | const uint64_t MASK_EXPONENT = UINT64_C(0x7FF0000000000000);
 504 | const uint64_t MASK_SIGN = UINT64_C(0x8000000000000000);
 505 | const uint64_t MASK_MANTISSA_OVERFLOW = UINT64_C(0x0010000000000000);
 506 | const uint64_t POS_INF_PATTERN = UINT64_C(0x7ff0000000000000);
 507 | const uint64_t NEG_INF_PATTERN = UINT64_C(0xfff0000000000000);
 508 | const uint64_t BACKEND_BIAS =
 509 |     UINT64_C(1023);  // that value is 2^(BACKEND_E-1)-1.
 510 | const int BACKEND_E = 11;
 511 | const int BACKEND_M = 52;
 512 | 
 513 | 
 514 | template <typename BitsType>
 515 | constexpr FLOATX_ATTRIBUTES FLOATX_INLINE bool is_nan_or_inf(BitsType number)
 516 | {
 517 |     return (number & MASK_EXPONENT) == MASK_EXPONENT;
 518 | }
 519 | 
 520 | 
 521 | template <typename BitsType, typename ExpType>
 522 | constexpr FLOATX_ATTRIBUTES FLOATX_INLINE bool is_small(BitsType e,
 523 |                                                         ExpType emin)
 524 | {
 525 |     return e < emin;
 526 | }
 527 | 
 528 | 
 529 | FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t
 530 | convert_nan_or_inf_to_backend(const uint64_t number, const uint8_t M)
 531 | {
 532 |     // The following line delets any additional information that might be coded
 533 |     // in NAN bits. NAN bits towards the MSB of the mantissa that fit into the
 534 |     // target type are preserved.
 535 |     const uint64_t MASK_DELETE = UINT64_C(0xFFFFFFFFFFFFFFFF)
 536 |                                  << (BACKEND_M - M);
 537 | 
 538 |     // fix the nan (note that the following line does not affects +INF and -INF
 539 |     // by construction)
 540 |     return (number & MASK_DELETE);
 541 | }
 542 | 
 543 | FLOATX_ATTRIBUTES FLOATX_INLINE void convert_subnormal_mantissa_and_exp(
 544 |     const uint64_t number, const uint8_t M, const int16_t emin, const int e,
 545 |     uint64_t& mant, uint64_t& exp)
 546 | {
 547 |     int t = emin - e;
 548 | 
 549 |     // the hidden one might have a influence in rounding, hence add the hidden
 550 |     // one to the mantissa.
 551 |     mant = mant | MASK_MANTISSA_OVERFLOW;
 552 | 
 553 |     // Perform IEEE 754 rounding with TiesToEven.
 554 |     mant = round_nearest(mant, BACKEND_M - M + t);
 555 | 
 556 |     // Handle the case where the number is rounded to exact 0
 557 |     // since it is smaller (after rounding) than the smallest Subnormal / 2
 558 |     if (mant == 0x0) {
 559 |         exp = 0x0;
 560 |     }
 561 | 
 562 |     // remove the hidden one from the mantissa
 563 |     mant = mant & ~MASK_MANTISSA_OVERFLOW;
 564 | }
 565 | 
 566 | FLOATX_ATTRIBUTES FLOATX_INLINE void fix_too_large_mantissa(const int M, int& e,
 567 |                                                             uint64_t& mant,
 568 |                                                             uint64_t& exp)
 569 | {
 570 |     e += 1;
 571 |     // The following is the formula for the new exponent in the case the
 572 |     // mantissa was rounded up to a value that does not fit into the MANTISSA
 573 |     // field.
 574 |     exp = ((uint64_t)e + BACKEND_BIAS) << BACKEND_M;
 575 |     mant = UINT64_C(0x0000000000000000);
 576 | }
 577 | 
 578 | FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t assemble_regular_number(
 579 |     const uint64_t sign_pattern, const uint64_t mant, const uint64_t exp)
 580 | {
 581 |     // ensure that the mantissa and exp fields to not contain bits at wrong
 582 |     // locations.
 583 |     assert((mant & ~MASK_MANTISSA) == 0x0);
 584 |     assert((exp & ~MASK_EXPONENT) == 0x0);
 585 | 
 586 |     // Assemble the number from the original sign and the current exp and mant
 587 |     // field.
 588 |     return (sign_pattern | exp | mant);
 589 | }
 590 | 
 591 | FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t
 592 | assemble_inf_number(const uint64_t sign_pattern)
 593 | {
 594 |     // The code of rounding operates on the magnitude, here we still need to at
 595 |     // the right sign for the final number
 596 |     return sign_pattern | POS_INF_PATTERN;
 597 | }
 598 | 
 599 | // // functionality to get bit representations.
 600 | // bool get_sign_from_backend(const double d);
 601 | 
 602 | // // That functions return the bit representation embedded in a data word. A
 603 | // backend representation of type <E,M>
 604 | // // will return the full representation of 1+E+M bits in the LSB : LSB+1+E+M
 605 | // bit positions of the embedding dataword (e.g. uint64_t). uint16_t
 606 | // get_exponent_from_backend(const double d, const uint8_t E, const uint8_t M);
 607 | // uint64_t get_mantissa_from_backend(const double d, const uint8_t E, const
 608 | // uint8_t M); uint64_t get_fullbit_representation(const double d, const uint8_t
 609 | // E, const uint8_t M);
 610 | 
 611 | // // The reverse operation generates constructs a given number of exponent and
 612 | // mantissa bits.
 613 | // // Note, that the input is encoded into the embedding type as follows:
 614 | // //  exp:  bits (E-1) downto 0
 615 | // //  mant: bits (M-1) downto 0
 616 | // //  -> bits at higher positions are required to be 0. (?) or neglected?
 617 | // double construct_number(bool sign, uint16_t exp, uint64_t mant, const uint8_t
 618 | // E, const uint8_t M);
 619 | 
 620 | // functionality to get bit representations.
 621 | FLOATX_ATTRIBUTES FLOATX_INLINE bool get_sign_from_backend(const double d)
 622 | {
 623 |     uint64_t number = flx::detail::reinterpret_as_bits(d);
 624 |     return (number & MASK_SIGN);
 625 | }
 626 | 
 627 | FLOATX_ATTRIBUTES FLOATX_INLINE constexpr bool is_zero_or_nan_or_inf_exp(
 628 |     const uint64_t exp)
 629 | {
 630 |     return ((exp == 0x0) || (exp == MASK_EXPONENT));
 631 | }
 632 | 
 633 | FLOATX_ATTRIBUTES FLOATX_INLINE uint16_t
 634 | get_exponent_zero_or_nan_or_inf_exp(const uint64_t exp, const uint8_t E)
 635 | {
 636 |     uint16_t target_exp = (uint16_t)(exp >> BACKEND_M);
 637 |     // if it is an inf or nan delete any additional ones in the format.
 638 |     // (exponent requires E 1's)
 639 |     target_exp = target_exp & ((0x1 << E) - 1);
 640 | 
 641 |     // assert no bits are set at positions 15:E.
 642 |     // information is encoded only at positons E-1:0.
 643 |     assert(target_exp < (0x1 << E));
 644 |     return target_exp;
 645 | }
 646 | 
 647 | FLOATX_ATTRIBUTES FLOATX_INLINE uint16_t
 648 | get_exponent_regular_backend_exp(const uint64_t exp, const uint8_t E)
 649 | {
 650 |     // That is the double exponent.
 651 |     // Interpret the exponent.
 652 |     uint16_t target_exp = 0x0;
 653 |     int e = (exp >> BACKEND_M) - BACKEND_BIAS;
 654 | 
 655 |     // TARGET FORMAT (emax and emin depends on E)
 656 |     // IEEE 754 STANDARD
 657 |     int16_t emax = (0x1 << (E - 1)) - 1;
 658 |     int16_t emin = 1 - emax;
 659 | 
 660 |     // Target bias is the same as emax.
 661 |     if (e < emin) {
 662 |         // a regular case in the backend, but a subnormal in the target format.
 663 |         target_exp = 0x0;  // subnormals have a zero exponent.
 664 |     } else {
 665 |         // Encode the exponent in target format.
 666 |         target_exp = (uint16_t)(e + emax);
 667 |     }
 668 | 
 669 |     // assert no bits are set at positions 15:E.
 670 |     // information is encoded only at positons E-1:0.
 671 |     assert(target_exp < (0x1 << E));
 672 |     return target_exp;
 673 | }
 674 | 
 675 | FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t
 676 | get_mantissa_zero_or_nan_or_inf_exp(const uint64_t mant, const uint8_t M)
 677 | {
 678 |     uint64_t ret = mant >> (BACKEND_M - M);
 679 | 
 680 |     assert(ret < (UINT64_C(0x1) << M));
 681 |     return ret;
 682 | }
 683 | 
 684 | FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t get_mantissa_regular_backend_exp(
 685 |     const uint64_t exp, const uint64_t mant, const uint8_t E, const uint8_t M)
 686 | {
 687 |     // That is the double exponent.
 688 |     // Interpret the exponent.
 689 |     int e = (exp >> BACKEND_M) - BACKEND_BIAS;
 690 | 
 691 |     // TARGET FORMAT (emax and emin depends on E)
 692 |     // IEEE 754 STANDARD
 693 |     int16_t emax = (0x1 << (E - 1)) - 1;
 694 |     int16_t emin = 1 - emax;
 695 |     // Target bias is the same as emax.
 696 | 
 697 |     uint64_t ret;
 698 | 
 699 |     if (e < emin) {
 700 |         int t = emin - e;
 701 |         // Subnormal. The backend mantissa needs the hidden 1 that is visible in
 702 |         // the subnormal representation of the target format.
 703 |         ret = (mant | MASK_MANTISSA_OVERFLOW) >> (BACKEND_M - M + t);
 704 |     } else {
 705 |         ret = mant >> (BACKEND_M - M);
 706 |     }
 707 | 
 708 |     assert(ret < (UINT64_C(0x1) << M));
 709 |     return ret;
 710 | }
 711 | 
 712 | FLOATX_ATTRIBUTES FLOATX_INLINE uint16_t
 713 | get_exponent_from_backend(const double d, const uint8_t E)
 714 | {
 715 |     uint64_t number = flx::detail::reinterpret_as_bits(d);
 716 |     uint64_t exp = number & MASK_EXPONENT;
 717 | 
 718 |     // detects, zero, denormals, infs and nans in the backend double.
 719 |     if (is_zero_or_nan_or_inf_exp(exp)) {
 720 |         return get_exponent_zero_or_nan_or_inf_exp(exp, E);
 721 |     } else {
 722 |         return get_exponent_regular_backend_exp(exp, E);
 723 |     }
 724 | }
 725 | 
 726 | FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t
 727 | get_mantissa_from_backend(const double d, const uint8_t E, const uint8_t M)
 728 | {
 729 |     uint64_t number = flx::detail::reinterpret_as_bits(d);
 730 |     uint64_t exp = number & MASK_EXPONENT;
 731 |     uint64_t mant = number & MASK_MANTISSA;
 732 | 
 733 |     if (is_zero_or_nan_or_inf_exp(exp)) {
 734 |         return get_mantissa_zero_or_nan_or_inf_exp(mant, M);
 735 |     } else {
 736 |         return get_mantissa_regular_backend_exp(exp, mant, E, M);
 737 |     }
 738 | }
 739 | 
 740 | FLOATX_ATTRIBUTES FLOATX_INLINE uint8_t
 741 | count_leading_zeros(const uint64_t data) noexcept
 742 | {
 743 | #ifdef USE_BUILTINS
 744 | #ifdef __CUDA_ARCH__
 745 |     return __clzll(data);
 746 | #else   // !__CUDA_ARCH__
 747 |     return __builtin_clzl(data);
 748 | #endif  // ?__CUDA_ARCH__
 749 | #else   // !USE_BUILTINS
 750 |     uint8_t t = 0u;  // t will be the number of zero bits on the left
 751 |     for (t = 0u; t < 64u; ++t) {
 752 |         if (data & (UINT64_C(0x1) << (63u - t))) {
 753 |             break;
 754 |         }
 755 |     }
 756 |     return t;
 757 | #endif  // ?USE_BUILTINS
 758 | }
 759 | 
 760 | 
 761 | FLOATX_ATTRIBUTES FLOATX_INLINE void construct_number_subormal(
 762 |     uint64_t& backend_exp, uint64_t& mant, const int16_t emin, const uint8_t M)
 763 | {
 764 |     // Zero and Subnormal.
 765 |     if (mant == UINT64_C(0x0)) {
 766 |         // real zero.
 767 |         backend_exp = 0x0;
 768 |         mant = 0x0;
 769 |     } else {
 770 |         // a subnormal in the target fromat, but result in a regular number in
 771 |         // the backend fromat.
 772 |         uint8_t t = count_leading_zeros(mant);
 773 |         t = t - (63 - M);
 774 | 
 775 |         // interpret exponent in the <E,M> format.
 776 | 
 777 |         int e = emin - t;
 778 | 
 779 |         // rewrite the exponent in the backend format.
 780 |         backend_exp = ((uint64_t)e + BACKEND_BIAS) << BACKEND_M;
 781 | 
 782 |         // mantissa.
 783 |         mant = mant << (BACKEND_M - M + t);
 784 |         mant = mant & ~MASK_MANTISSA_OVERFLOW;
 785 |     }
 786 | }
 787 | 
 788 | FLOATX_ATTRIBUTES FLOATX_INLINE void construct_number_nan_or_inf(
 789 |     uint64_t& backend_exp, uint64_t& mant, const uint8_t M)
 790 | {
 791 |     if (mant == 0x0) {
 792 |         // Inf
 793 |         backend_exp = MASK_EXPONENT;  // encode a backend inf.
 794 |     } else {
 795 |         // Nan
 796 |         backend_exp = MASK_EXPONENT;  // encode nan
 797 |         mant = mant << (BACKEND_M - M);
 798 |     }
 799 | }
 800 | 
 801 | FLOATX_ATTRIBUTES FLOATX_INLINE void construct_number_regular(
 802 |     uint64_t& backend_exp, uint64_t& mant, uint16_t const exp, int16_t emax,
 803 |     const uint8_t M)
 804 | {
 805 |     mant = mant << (BACKEND_M - M);
 806 | 
 807 |     // interpret exponent in the <E,M> format.
 808 |     int e = exp - emax;
 809 | 
 810 |     // rewrite the exponent in the backend format.
 811 |     backend_exp = ((uint64_t)e + BACKEND_BIAS) << BACKEND_M;
 812 | }
 813 | 
 814 | FLOATX_ATTRIBUTES FLOATX_INLINE double construct_number(bool sign, uint16_t exp,
 815 |                                                         uint64_t mant,
 816 |                                                         const uint8_t E,
 817 |                                                         const uint8_t M)
 818 | {
 819 |     uint64_t backend_exp = 0x0;
 820 | 
 821 |     // use emax as bias for the <E,M> format.
 822 |     int16_t emax = (0x1 << (E - 1)) - 1;
 823 |     int16_t emin = 1 - emax;
 824 | 
 825 |     if (exp == 0x0) {
 826 |         construct_number_subormal(backend_exp, mant, emin, M);
 827 |     } else if (exp == ((0x1 << E) - 0x1)) {
 828 |         construct_number_nan_or_inf(backend_exp, mant, M);
 829 |     } else {
 830 |         construct_number_regular(backend_exp, mant, exp, emax, M);
 831 |     }
 832 | 
 833 |     uint64_t sign_bit = MASK_SIGN;
 834 |     sign_bit *= sign;
 835 | 
 836 |     uint64_t number = sign_bit | backend_exp | mant;
 837 |     double res = reinterpret_bits_as<double>(number);
 838 |     return res;
 839 | }
 840 | 
 841 | FLOATX_ATTRIBUTES FLOATX_INLINE double construct_number(
 842 |     uint64_t fullbit_representation, const uint8_t E, const uint8_t M)
 843 | {
 844 |     bool sign = (fullbit_representation & (UINT64_C(0x1) << (E + M)));
 845 |     uint64_t exp =
 846 |         fullbit_representation & (((UINT64_C(0x1) << E) - UINT64_C(1)) << M);
 847 |     exp = exp >> M;
 848 |     uint64_t mant =
 849 |         fullbit_representation & ((UINT64_C(0x1) << M) - UINT64_C(1));
 850 |     return construct_number(sign, (uint16_t)exp, mant, E, M);
 851 | }
 852 | 
 853 | FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t construct_fullbit_representation(
 854 |     bool sign, uint16_t exp, uint64_t mant, const uint8_t E, const uint8_t M)
 855 | {
 856 |     assert(exp < (0x1 << E));
 857 |     assert(mant < (UINT64_C(0x1) << M));
 858 | 
 859 |     uint64_t sign_bit = UINT64_C(0x1) << (E + M);
 860 |     sign_bit *= sign;
 861 | 
 862 |     uint64_t target_exp = (uint64_t)exp;
 863 |     target_exp = target_exp << M;
 864 | 
 865 |     // Note that the words have information encoded at different positions
 866 |     // [63:E+M+1]   free
 867 |     // E+M          sign_bit
 868 |     // E+M-1:M      target_exp
 869 |     // M-1:0        mantissa
 870 |     mant = sign_bit | target_exp | mant;
 871 | 
 872 |     return mant;
 873 | }
 874 | 
 875 | // That functions return the bit representation embedded in a data word. A
 876 | // backend representation of type <E,M> will return the full representation of
 877 | // 1+E+M bits in the LSB : LSB+1+E+M bit positions of the embedding dataword
 878 | // (e.g. uint64_t). Encoding of the result: [63:E+M+1]   free E+M
 879 | // sign_bit E+M-1:M      target_exp M-1:0        mantissa
 880 | FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t
 881 | get_fullbit_representation(const double d, const uint8_t E, const uint8_t M)
 882 | {
 883 |     return construct_fullbit_representation(
 884 |         get_sign_from_backend(d), get_exponent_from_backend(d, E),
 885 |         get_mantissa_from_backend(d, E, M), E, M);
 886 | }
 887 | 
 888 | // Bitset wrappers.
 889 | template <uint8_t E>
 890 | FLOATX_ATTRIBUTES FLOATX_INLINE std::bitset<E> get_exponent_from_backend_BS(
 891 |     const double d)
 892 | {
 893 |     return std::bitset<E>(get_exponent_from_backend(d, E));
 894 | }
 895 | 
 896 | template <uint8_t E, uint8_t M>
 897 | FLOATX_ATTRIBUTES FLOATX_INLINE std::bitset<M> get_mantissa_from_backend_BS(
 898 |     const double d)
 899 | {
 900 |     return std::bitset<M>(get_mantissa_from_backend(d, E, M));
 901 | }
 902 | 
 903 | template <uint8_t E, uint8_t M>
 904 | FLOATX_ATTRIBUTES FLOATX_INLINE std::bitset<1 + E + M>
 905 | get_fullbit_representation_BS(const double d)
 906 | {
 907 |     return std::bitset<1 + E + M>(get_fullbit_representation(d, E, M));
 908 | }
 909 | 
 910 | template <uint8_t E, uint8_t M>
 911 | FLOATX_ATTRIBUTES FLOATX_INLINE double construct_number(bool sign,
 912 |                                                         std::bitset<E> exp,
 913 |                                                         std::bitset<M> mant)
 914 | {
 915 |     return construct_number(sign, exp.to_ulong(), mant.to_ulong(), E, M);
 916 | }
 917 | 
 918 | template <uint8_t E, uint8_t M>
 919 | FLOATX_ATTRIBUTES FLOATX_INLINE double construct_number(
 920 |     std::bitset<1 + E + M> fullbit_representation)
 921 | {
 922 |     return construct_number(fullbit_representation.to_ulong(), E, M);
 923 | }
 924 | 
 925 | #define ENABLE_EXTRACT_PART(_part)                                   \
 926 |     template <typename T>                                            \
 927 |     FLOATX_ATTRIBUTES FLOATX_INLINE uint64_t extract_##_part##_bits( \
 928 |         const T& val) noexcept                                       \
 929 |     {                                                                \
 930 |         return (*reinterpret_cast<const uint64_t*>(&val) >>          \
 931 |                 float_traits<T>::_part##_pos) &                      \
 932 |                float_traits<T>::_part##_mask;                        \
 933 |     }
 934 | 
 935 | ENABLE_EXTRACT_PART(sgn);
 936 | ENABLE_EXTRACT_PART(exp);
 937 | ENABLE_EXTRACT_PART(sig);
 938 | 
 939 | #undef ENABLE_EXTRACT_PART
 940 | 
 941 | template <typename ConcreteFloatX>
 942 | class floatx_base {
 943 | private:
 944 |     using backend_float = typename float_traits<ConcreteFloatX>::backend_float;
 945 |     using bits_type = typename float_traits<ConcreteFloatX>::bits_type;
 946 | 
 947 | public:
 948 |     FLOATX_ATTRIBUTES floatx_base(const backend_float& value) noexcept
 949 |         : value_(value)
 950 |     {}
 951 | 
 952 |     FLOATX_ATTRIBUTES void initialize() noexcept
 953 |     {
 954 |         value_ = this->enforce_rounding(value_);
 955 |     }
 956 | 
 957 |     FLOATX_ATTRIBUTES constexpr operator backend_float() const noexcept
 958 |     {
 959 |         return value_;
 960 |     }
 961 | 
 962 |     template <typename T>
 963 |     FLOATX_ATTRIBUTES constexpr operator T() const noexcept
 964 |     {
 965 |         return T(value_);
 966 |     }
 967 | 
 968 | private:
 969 |     FLOATX_ATTRIBUTES const ConcreteFloatX& self() const noexcept
 970 |     {
 971 |         return *static_cast<const ConcreteFloatX*>(this);
 972 |     }
 973 | 
 974 |     FLOATX_ATTRIBUTES ConcreteFloatX& self() noexcept
 975 |     {
 976 |         return *static_cast<ConcreteFloatX*>(this);
 977 |     }
 978 | 
 979 |     static constexpr auto backend_sig_pos =
 980 |         float_traits<backend_float>::sig_pos;
 981 |     static constexpr auto backend_exp_pos =
 982 |         float_traits<backend_float>::exp_pos;
 983 |     static constexpr auto backend_sgn_pos =
 984 |         float_traits<backend_float>::sgn_pos;
 985 |     static constexpr auto backend_sig_mask =
 986 |         float_traits<backend_float>::sig_mask << backend_sig_pos;
 987 |     static constexpr auto backend_exp_mask =
 988 |         float_traits<backend_float>::exp_mask << backend_exp_pos;
 989 |     static constexpr auto backend_sig_bits =
 990 |         float_traits<backend_float>::sig_bits;
 991 |     static constexpr auto backend_exp_bits =
 992 |         float_traits<backend_float>::exp_bits;
 993 |     static constexpr auto backend_bias = float_traits<backend_float>::bias;
 994 |     static constexpr auto backend_sig_overflow_mask =
 995 |         (float_traits<backend_float>::sig_mask + 1) << backend_sig_pos;
 996 |     static constexpr auto backend_sgn_mask =
 997 |         float_traits<backend_float>::sgn_mask << backend_sgn_pos;
 998 | 
 999 |     FLOATX_ATTRIBUTES
1000 |     backend_float enforce_rounding(backend_float value) const noexcept
1001 |     {
1002 |         const auto exp_bits = get_exp_bits(self());
1003 |         const auto sig_bits = get_sig_bits(self());
1004 |         bits_type bits = reinterpret_as_bits(value);
1005 |         auto sig = (bits & backend_sig_mask) >> backend_sig_pos;
1006 |         auto raw_exp = bits & backend_exp_mask;
1007 |         const auto sgn = bits & backend_sgn_mask;
1008 | 
1009 |         int exp = (raw_exp >> backend_exp_pos) - backend_bias;
1010 | 
1011 |         const int emax = (1 << (exp_bits - 1)) - 1;
1012 |         const int emin = 1 - emax;
1013 | 
1014 |         if (is_nan_or_inf(bits)) {
1015 |             bits = convert_nan_or_inf_to_backend(bits, sig_bits);
1016 |         } else {
1017 |             if (is_small(exp, emin)) {
1018 |                 convert_subnormal_mantissa_and_exp(bits, sig_bits, emin, exp,
1019 |                                                    sig, raw_exp);
1020 |             } else {
1021 |                 sig = round_nearest(sig, backend_sig_bits - sig_bits);
1022 |             }
1023 |             if (significand_is_out_of_range(sig)) {
1024 |                 fix_too_large_mantissa(sig_bits, exp, sig, raw_exp);
1025 |             }
1026 |             if (exponent_is_out_of_range(exp, emax)) {
1027 |                 bits = assemble_inf_number(sgn);
1028 |             } else {
1029 |                 bits = assemble_regular_number(sgn, sig, raw_exp);
1030 |             }
1031 |         }
1032 | 
1033 |         return reinterpret_bits_as<backend_float>(bits);
1034 |     }
1035 | 
1036 |     static constexpr FLOATX_ATTRIBUTES bits_type
1037 |     reinterpret_as_bits(backend_float val)
1038 |     {
1039 |         return *reinterpret_cast<const bits_type*>(&val);
1040 |     }
1041 | 
1042 |     static constexpr FLOATX_ATTRIBUTES bool significand_is_out_of_range(
1043 |         bits_type sig)
1044 |     {
1045 |         return sig >= backend_sig_overflow_mask;
1046 |     }
1047 | 
1048 |     static constexpr FLOATX_ATTRIBUTES bool exponent_is_out_of_range(int exp,
1049 |                                                                      int emax)
1050 |     {
1051 |         return exp > emax;
1052 |     }
1053 | 
1054 | protected:
1055 |     backend_float value_;
1056 | };
1057 | 
1058 | 
1059 | }  // namespace detail
1060 | 
1061 | 
1062 | template <typename Float>
1063 | FLOATX_ATTRIBUTES FLOATX_INLINE std::string bitstring(const Float& x) noexcept
1064 | {
1065 |     using bf = typename float_traits<Float>::backend_float;
1066 |     const uint64_t one = UINT64_C(1);
1067 |     const char map[] = {'0', '1'};
1068 |     const int eb = get_exp_bits(x);
1069 |     const int sb = get_sig_bits(x);
1070 |     const int beb = get_exp_bits(bf(x));
1071 |     const int bsb = get_sig_bits(bf(x));
1072 | 
1073 |     std::string s(sb + eb + 3, '-');
1074 |     auto sgn = detail::extract_sgn_bits(bf(x));
1075 |     auto exp = detail::extract_exp_bits(bf(x));
1076 |     auto sig = detail::extract_sig_bits(bf(x));
1077 | 
1078 |     int i = 0;
1079 |     s[i++] = map[bool(sgn & UINT64_C(1))];  // sign bit
1080 |     ++i;  // leave '-' between sign and exponent parts
1081 |     s[i++] = map[bool(exp & (one << (beb - 1)))];  // bias bit
1082 |     for (auto mask = (one << (eb - 2)); mask > 0; mask >>= 1) {
1083 |         s[i++] = map[bool(exp & mask)];
1084 |     }
1085 |     ++i;  // leave '-' between exponent and significand parts
1086 |     for (auto mask = (one << (bsb - 1)); i < s.size(); mask >>= 1) {
1087 |         s[i++] = map[bool(sig & mask)];
1088 |     }
1089 |     return s;
1090 | }
1091 | 
1092 | 
1093 | };  // namespace flx
1094 | 
1095 | 
1096 | #undef FLOATX_ATTRIBUTES
1097 | #undef FLOATX_INLINE
1098 | #undef USE_BUILTINS
1099 | 
1100 | 
1101 | #endif  // FLOATX_FLOATX_HPP_
1102 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | function(create_test test_name)
 2 |     add_executable(${test_name} ${test_name}.cpp)
 3 |     target_link_libraries(${test_name} PRIVATE floatx gtest_main)
 4 |     file(RELATIVE_PATH REL_BINARY_DIR
 5 |          ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
 6 |     add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${test_name})
 7 | endfunction(create_test)
 8 | 
 9 | add_library(ieee_helper STATIC IEEEHelper.cpp)
10 | 
11 | create_test(conversion)
12 | create_test(assignment)
13 | create_test(rel_ops)
14 | create_test(arithmetic)
15 | create_test(stream)
16 | create_test(std_integration)
17 | create_test(NanInf)
18 | create_test(round_nearest)
19 | create_test(value_representation)
20 | target_link_libraries(value_representation PRIVATE ieee_helper)
21 | create_test(value_representation_half)
22 | target_link_libraries(value_representation_half PRIVATE ieee_helper)
23 | create_test(value_representation_bits)
24 | target_link_libraries(value_representation_bits PRIVATE ieee_helper)
25 | 


--------------------------------------------------------------------------------
/test/IEEEHelper.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | #include "IEEEHelper.h"
 19 | 
 20 | 
 21 | #include <bitset>
 22 | #include <cassert>
 23 | #include <cmath>
 24 | #include <cstdio>
 25 | #include <cstdlib>
 26 | #include <iostream>
 27 | 
 28 | 
 29 | void IEEEHelper::showConfig()
 30 | {
 31 |     printf("Configuration of format<%u,%u>:\n", _e, _m);
 32 |     printf(" p = %u\n", _m + 1);
 33 |     printf(" emax = %i\n", getEmax());
 34 |     printf(" emin = %i\n", getEmin());
 35 |     printf(" bias = %i\n", getBias());
 36 |     printf("Limits:\n");
 37 |     printf(" max: %5e \t = %.20f\n", maxValue(), maxValue());
 38 |     printf(" sNr: %5e \t = %.20f\n", smallestNormalValue(),
 39 |            smallestNormalValue());
 40 |     printf(" lSN: %5e \t = %.20f\n", maxSubnormalValue(), maxSubnormalValue());
 41 |     printf(" sSN: %5e \t = %.20f\n", minSubnormalValue(), minSubnormalValue());
 42 |     printf("Cases (one-sided):\n");
 43 |     printf(" Normal: %i \t = %u*%u\n", countNormalRange(), countExpRange(),
 44 |            countSubnormalRange());
 45 |     printf(" Subnormal: %i\n", countSubnormalRange());
 46 |     printf(" NAN/INFs: %i\n", countSubnormalRange());
 47 |     printf(" 2^(E+M) = %i = (sum over #cases one side) = %u\n",
 48 |            (int)pow(2, _e + _m),
 49 |            countNormalRange() + 2 * countSubnormalRange());
 50 |     printf("\n");
 51 | }
 52 | 
 53 | double IEEEHelper::iterateNormalRange(int ie, int im)
 54 | {
 55 |     assert(ie >= 0);
 56 |     assert(ie < _NnormalExp);
 57 |     assert(im >= 0);
 58 |     assert(im < _Nsubnormal);
 59 | 
 60 |     double m = 1.0 + im * pow(2.0, -_m);
 61 |     // printf("im = %i, m = %f\n", im, m );
 62 |     return pow(2.0, ie + getEmin()) * m;
 63 | }
 64 | 
 65 | double IEEEHelper::iterateSubnormalRange(int im)
 66 | {
 67 |     assert(im >= 0);
 68 |     assert(im < _Nsubnormal);
 69 | 
 70 |     double m = 0.0 + im * pow(2.0, -_m);
 71 |     return pow(2.0, getEmin()) * m;
 72 | }
 73 | 
 74 | void show(uint64_t u)
 75 | {
 76 |     printf("%016llx\t", u);
 77 |     std::cout << std::bitset<64>(u) << std::endl;
 78 | }
 79 | 
 80 | #define CAST_DOUBLE_TO_UINT64(d) (*((uint64_t*)(&(d))))
 81 | #define CAST_UINT64_TO_DOUBLE(d) (*((double*)(&(d))))
 82 | 
 83 | void show(double d)
 84 | {
 85 |     printf("%.20e\t", d);
 86 |     uint64_t u = CAST_DOUBLE_TO_UINT64(d);
 87 |     printf("0x%016llx\t", u);
 88 |     std::cout << std::bitset<64>(u) << std::endl;
 89 | }
 90 | 
 91 | void showTable(IEEEHelper& h)
 92 | {
 93 |     int ne = h.countExpRange();
 94 |     int nm = h.countSubnormalRange();
 95 | 
 96 |     printf("Subnormal Range:\n");
 97 | 
 98 |     for (int im = 0; im < nm; ++im) {
 99 |         double d = h.iterateSubnormalRange(im);
100 |         printf("%5i/%5i: \t %.20e \t %f \n", im, nm, d, d);
101 |     }
102 | 
103 |     printf("Normal Range:\n");
104 |     for (int ie = 0; ie < ne; ++ie) {
105 |         for (int im = 0; im < nm; ++im) {
106 |             double d = h.iterateNormalRange(ie, im);
107 |             printf("(%5i,%5i)/(%5i,%5i): \t %.20e \t %f \n", ie, im, ne, nm, d,
108 |                    d);
109 |         }
110 |     }
111 | }
112 | 
113 | 
114 | // int main(int argc, char **argv)
115 | // {
116 | //     if( argc != 2+1 )
117 | //     {
118 | //         printf("Usage: %s <E> <M>\n", argv[0]);
119 | //         exit(1);
120 | //     }
121 | 
122 | //     int e = atoi( argv[1]);
123 | //     int m = atoi( argv[2]);
124 | 
125 | // 	IEEEHelper h = IEEEHelper(e,m);
126 | 
127 | // 	h.showConfig();
128 | // 	show( h );
129 | 
130 | // 	return 0;
131 | // }
132 | 


--------------------------------------------------------------------------------
/test/IEEEHelper.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cmath>
 4 | 
 5 | // ### IEEE 754 HELPER FUNCITONS
 6 | 
 7 | class IEEEHelper {
 8 | private:
 9 |     int _e;
10 |     int _m;
11 |     int _emax;
12 |     int _emin;
13 | 
14 |     long long int _Nnormal;
15 |     long long int _NnormalExp;
16 |     long long int _Nsubnormal;
17 | 
18 | public:
19 |     IEEEHelper(int e, int m)
20 |     {
21 |         _e = e;
22 |         _m = m;
23 | 
24 |         _Nnormal = (getEmax() - getEmin() + 1) * pow(2.0, _m);
25 |         _NnormalExp = (getEmax() - getEmin() + 1);
26 |         _Nsubnormal = pow(2.0, _m);
27 |     }
28 | 
29 |     inline int getEmax() { return pow(2.0, _e - 1) - 1; }
30 |     inline int getEmin() { return 1 - getEmax(); }
31 |     inline int getBias() { return getEmax(); }
32 | 
33 |     inline double getMmin() { return pow(2.0, -_m); }            // use p = m+1
34 |     inline double getMmaxNormal() { return 2 - pow(2.0, -_m); }  // use p = m+1
35 |     inline double getMmaxSubnormal()
36 |     {
37 |         return 1 - pow(2.0, -_m);
38 |     }  // use p = m+1
39 | 
40 |     inline double maxValue() { return pow(2.0, getEmax()) * getMmaxNormal(); }
41 |     inline double smallestNormalValue() { return pow(2.0, getEmin()); }
42 |     inline double maxSubnormalValue()
43 |     {
44 |         return pow(2.0, getEmin()) * getMmaxSubnormal();
45 |     }
46 |     inline double minSubnormalValue()
47 |     {
48 |         return pow(2.0, getEmin()) * getMmin();
49 |     }
50 | 
51 |     void showConfig();
52 | 
53 |     int countNormalRange() { return _Nnormal; }
54 |     int countExpRange() { return _NnormalExp; }
55 |     int countSubnormalRange() { return _Nsubnormal; }
56 | 
57 |     double iterateNormalRange(int ie, int im);
58 |     double iterateSubnormalRange(int i);
59 | };
60 | 
61 | void show(double d);
62 | void showTable(IEEEHelper& h);
63 | 


--------------------------------------------------------------------------------
/test/NanInf.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | #include <gtest/gtest.h>
 19 | #include <floatx.hpp>
 20 | 
 21 | // #include <cmath> // defines NAN
 22 | // #define nan NAN
 23 | #define nan double(0.0 / 0.0)
 24 | #define inf double(1.0 / 0.0)
 25 | 
 26 | namespace {
 27 | 
 28 | void show(double d)
 29 | {
 30 |     printf("%.20e\t", d);
 31 |     uint64_t u = flx::detail::reinterpret_as_bits(d);
 32 |     printf("0x%016llx\t", u);
 33 |     std::cout << std::bitset<64>(u) << std::endl;
 34 | }
 35 | 
 36 | 
 37 | // System representation of nan's.
 38 | TEST(FloatxNanInfTest, system_nans)
 39 | {
 40 |     double constnan = 0.0 / 0.0;
 41 |     printf("constnan: ");
 42 |     show((double)constnan);
 43 | 
 44 |     double zero;
 45 |     // try to prevent the compiler to figure out what
 46 |     // dynamicnan should be in the compile time
 47 |     *(double*)memset(&zero, ~0, sizeof(zero)) = 0.0;
 48 |     double dynamicnan = zero / zero;
 49 |     printf("dynamicnan: ");
 50 |     show((double)dynamicnan);
 51 | 
 52 |     EXPECT_NE(constnan, dynamicnan);  // holds only for NANs
 53 |     EXPECT_NE(constnan, nan);         // holds only for NANs
 54 |     EXPECT_NE(dynamicnan, nan);       // holds only for NANs
 55 | }
 56 | 
 57 | // See Intel 64 and IA-32 Architectures Software Developer's Manual
 58 | // Vol. 1, Appendix E Sect. 4.2.2 Table E-1 for a discussion of a
 59 | // type of NaN returned for an invalid operation (e.g., 0/0).  It
 60 | // seems that always a particular encoding ("QNaN indefinite") is
 61 | // used in such cases, but what happens generally (see TODOs below)?
 62 | 
 63 | // A NAN CASE
 64 | TEST(FloatxNanInfTest, cast_nans)
 65 | {
 66 |     using T1 = flx::floatx<2, 3>;
 67 |     using T2 = flx::floatx<10, 50>;
 68 |     T1 a = 0.0 / 0.0;
 69 |     T2 b = 0.0;
 70 |     b = a;
 71 | 
 72 |     double constnan =
 73 |         nan;  // note, the way how that nan is generated is relevant!
 74 | 
 75 |     EXPECT_NE(a, a);    // holds only for NANs
 76 |     EXPECT_NE(a, nan);  // holds only for NANs
 77 |     // TODO: is the following expectation true generally?
 78 |     EXPECT_EQ(*reinterpret_cast<uint64_t*>(&a),
 79 |               *reinterpret_cast<uint64_t*>(&constnan));
 80 | 
 81 |     EXPECT_NE(b, b);    // holds only for NANs
 82 |     EXPECT_NE(b, nan);  // holds only for NANs
 83 |     // TODO: is the following expectation true generally?
 84 |     EXPECT_EQ(*reinterpret_cast<uint64_t*>(&b),
 85 |               *reinterpret_cast<uint64_t*>(&constnan));
 86 | 
 87 |     // Differnt bit represenations for nans
 88 |     // TODO: is the following expectation true generally?
 89 |     EXPECT_EQ(*reinterpret_cast<uint64_t*>(&b),
 90 |               *reinterpret_cast<uint64_t*>(&a));
 91 | }
 92 | 
 93 | // A NAN CASE
 94 | TEST(FloatxNanInfTest, DIV_2_47_simple)
 95 | {
 96 |     using T = flx::floatx<2, 47>;
 97 |     T a = -(
 98 |         7.105427e-15 / 2 -
 99 |         1e-17);  // a bit smaller than half of the smallest subnormal in <2,47>
100 |     T b = -(7.105427e-15 / 12.0);
101 |     T c = 0;
102 |     c = a / b;
103 |     EXPECT_EQ(double(a), 0.00000000000000000000);
104 |     EXPECT_EQ(double(b), 0.00000000000000000000);
105 |     EXPECT_NE(c, c);    // holds only for NANs
106 |     EXPECT_NE(c, nan);  // holds only for NANs
107 | 
108 |     double zero;
109 |     // try to prevent the compiler to figure out what
110 |     // dynamicnan should be in the compile time
111 |     *(double*)memset(&zero, ~0, sizeof(zero)) = 0.0;
112 |     double dynamicnan = zero / zero;
113 | 
114 |     EXPECT_NE(c, dynamicnan);  // holds only for NANs
115 |     // TODO: is the following expectation true generally?
116 |     EXPECT_EQ(*reinterpret_cast<uint64_t*>(&c),
117 |               *reinterpret_cast<uint64_t*>(&dynamicnan));
118 | }
119 | 
120 | // A REGULAR CASE (fixing in subnormal does not cause the inf case here)
121 | TEST(FloatxNanInfTest, DIV_3_3_simple)
122 | {
123 |     using T = flx::floatx<3, 3>;
124 |     T a = 0.33333333333333331483;
125 |     T b = 0.11111111111111110494;
126 |     T c = 0;
127 |     c = a / b;
128 |     EXPECT_EQ(double(a), 3.43750000000000000000e-01);
129 |     EXPECT_EQ(double(b), 1.25000000000000000000e-01);
130 |     EXPECT_EQ(double(c), 2.7500000000000000000e-00);
131 | }
132 | 
133 | // A INF CASE.
134 | TEST(FloatxNanInfTest, DIV_3_3_simple_inf)
135 | {
136 |     using T = flx::floatx<3, 3>;
137 |     T a = 0.33333333333333331483;
138 |     T b =
139 |         (0.03125000000000000000 / 2 -
140 |          1e-17);  // a bit smaller than half of the smallest subnormal in <3,3>
141 |     T c = 0;
142 |     c = a / b;
143 | 
144 |     // printf("a: 			"); show((double)a);
145 |     // printf("b: 			"); show((double)b);
146 |     // printf("c:   		"); show((double)c);
147 |     // printf("inf: 		"); show((double)inf);
148 | 
149 |     EXPECT_EQ(double(a), 3.43750000000000000000e-01);
150 |     EXPECT_EQ(double(b), 00000000000000000000);
151 |     EXPECT_EQ(double(c), inf);
152 | }
153 | 
154 | }  // namespace
155 | 


--------------------------------------------------------------------------------
/test/arithmetic.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | #include <gtest/gtest.h>
 19 | #include <floatx.hpp>
 20 | 
 21 | 
 22 | namespace {
 23 | 
 24 | 
 25 | TEST(FloatxArithmeticTest, ResultHasCorrectType)
 26 | {
 27 |     using doublex = flx::floatx<11, 52>;
 28 |     using floatx = flx::floatx<8, 23>;
 29 | 
 30 |     ::testing::StaticAssertTypeEq<doublex, decltype(doublex() + doublex())>();
 31 |     ::testing::StaticAssertTypeEq<doublex, decltype(doublex() - doublex())>();
 32 |     ::testing::StaticAssertTypeEq<doublex, decltype(doublex() * doublex())>();
 33 |     ::testing::StaticAssertTypeEq<doublex, decltype(doublex() / doublex())>();
 34 | 
 35 |     ::testing::StaticAssertTypeEq<floatx, decltype(floatx() + floatx())>();
 36 |     ::testing::StaticAssertTypeEq<floatx, decltype(floatx() - floatx())>();
 37 |     ::testing::StaticAssertTypeEq<floatx, decltype(floatx() * floatx())>();
 38 |     ::testing::StaticAssertTypeEq<floatx, decltype(floatx() / floatx())>();
 39 | 
 40 |     doublex dlhs;
 41 |     ::testing::StaticAssertTypeEq<doublex&, decltype(dlhs += doublex())>();
 42 |     ::testing::StaticAssertTypeEq<doublex&, decltype(dlhs -= doublex())>();
 43 |     ::testing::StaticAssertTypeEq<doublex&, decltype(dlhs *= doublex())>();
 44 |     ::testing::StaticAssertTypeEq<doublex&, decltype(dlhs /= doublex())>();
 45 |     floatx flhs;
 46 |     ::testing::StaticAssertTypeEq<floatx&, decltype(flhs += floatx())>();
 47 |     ::testing::StaticAssertTypeEq<floatx&, decltype(flhs -= floatx())>();
 48 |     ::testing::StaticAssertTypeEq<floatx&, decltype(flhs *= floatx())>();
 49 |     ::testing::StaticAssertTypeEq<floatx&, decltype(flhs /= floatx())>();
 50 | }
 51 | 
 52 | 
 53 | TEST(FloatxArithmeticTest, PromotesTypes)
 54 | {
 55 |     using flx1 = flx::floatx<9, 7>;
 56 |     using flx2 = flx::floatx<6, 13>;
 57 |     using supertype = flx::floatx<9, 13>;
 58 |     ::testing::StaticAssertTypeEq<supertype, decltype(flx1() + flx2())>();
 59 |     ::testing::StaticAssertTypeEq<supertype, decltype(flx1() - flx2())>();
 60 |     ::testing::StaticAssertTypeEq<supertype, decltype(flx1() * flx2())>();
 61 |     ::testing::StaticAssertTypeEq<supertype, decltype(flx1() / flx2())>();
 62 | 
 63 |     flx1 flhs;
 64 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs += flx2())>();
 65 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs -= flx2())>();
 66 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs *= flx2())>();
 67 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs /= flx2())>();
 68 | 
 69 |     using flx3 = flx::floatx<9, 23>;
 70 |     ::testing::StaticAssertTypeEq<flx3, decltype(flx1() + float())>();
 71 |     ::testing::StaticAssertTypeEq<flx3, decltype(flx1() - float())>();
 72 |     ::testing::StaticAssertTypeEq<flx3, decltype(flx1() * float())>();
 73 |     ::testing::StaticAssertTypeEq<flx3, decltype(flx1() / float())>();
 74 | 
 75 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs += float())>();
 76 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs -= float())>();
 77 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs *= float())>();
 78 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs /= float())>();
 79 | 
 80 |     using doublex = flx::floatx<11, 52>;
 81 |     ::testing::StaticAssertTypeEq<doublex, decltype(flx1() + double())>();
 82 |     ::testing::StaticAssertTypeEq<doublex, decltype(flx1() - double())>();
 83 |     ::testing::StaticAssertTypeEq<doublex, decltype(flx1() * double())>();
 84 |     ::testing::StaticAssertTypeEq<doublex, decltype(flx1() / double())>();
 85 | 
 86 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs += double())>();
 87 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs -= double())>();
 88 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs *= double())>();
 89 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs /= double())>();
 90 | 
 91 |     ::testing::StaticAssertTypeEq<flx1, decltype(flx1() + int())>();
 92 |     ::testing::StaticAssertTypeEq<flx1, decltype(flx1() - int())>();
 93 |     ::testing::StaticAssertTypeEq<flx1, decltype(flx1() * int())>();
 94 |     ::testing::StaticAssertTypeEq<flx1, decltype(flx1() / int())>();
 95 | 
 96 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs += int())>();
 97 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs -= int())>();
 98 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs *= int())>();
 99 |     ::testing::StaticAssertTypeEq<flx1&, decltype(flhs /= int())>();
100 | }
101 | 
102 | 
103 | TEST(FloatxrArithmeticTest, ResultHasCorrectType)
104 | {
105 |     auto fxr = []() { return flx::floatxr<>(8, 23); };
106 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() + fxr())>();
107 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() - fxr())>();
108 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() * fxr())>();
109 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() / fxr())>();
110 | 
111 |     flx::floatxr<> dlhs(8, 23);
112 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&, decltype(dlhs += fxr())>();
113 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&, decltype(dlhs -= fxr())>();
114 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&, decltype(dlhs *= fxr())>();
115 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&, decltype(dlhs /= fxr())>();
116 | }
117 | 
118 | 
119 | TEST(FloatxrArithmeticTest, PromotesTypes)
120 | {
121 |     auto fxr = []() { return flx::floatxr<>(8, 23); };
122 |     using floatx = flx::floatx<9, 12>;
123 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() + floatx())>();
124 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() - floatx())>();
125 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() * floatx())>();
126 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() / floatx())>();
127 | 
128 |     flx::floatxr<> dlhs(8, 23);
129 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&,
130 |                                   decltype(dlhs += floatx())>();
131 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&,
132 |                                   decltype(dlhs -= floatx())>();
133 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&,
134 |                                   decltype(dlhs *= floatx())>();
135 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&,
136 |                                   decltype(dlhs /= floatx())>();
137 | 
138 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() + double())>();
139 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() - double())>();
140 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() * double())>();
141 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() / double())>();
142 | 
143 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&,
144 |                                   decltype(dlhs += double())>();
145 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&,
146 |                                   decltype(dlhs -= double())>();
147 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&,
148 |                                   decltype(dlhs *= double())>();
149 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&,
150 |                                   decltype(dlhs /= double())>();
151 | 
152 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() + int())>();
153 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() - int())>();
154 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() * int())>();
155 |     ::testing::StaticAssertTypeEq<flx::floatxr<>, decltype(fxr() / int())>();
156 | 
157 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&, decltype(dlhs += int())>();
158 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&, decltype(dlhs -= int())>();
159 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&, decltype(dlhs *= int())>();
160 |     ::testing::StaticAssertTypeEq<flx::floatxr<>&, decltype(dlhs /= int())>();
161 | }
162 | 
163 | 
164 | }  // namespace
165 | 


--------------------------------------------------------------------------------
/test/assignment.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
 3 |                     IBM Research GmbH. All rights reserved.
 4 | 
 5 |    Licensed under the Apache License, Version 2.0 (the "License");
 6 |    you may not use this file except in compliance with the License.
 7 |    You may obtain a copy of the License at
 8 | 
 9 |        http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |    Unless required by applicable law or agreed to in writing, software
12 |    distributed under the License is distributed on an "AS IS" BASIS,
13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |    See the License for the specific language governing permissions and
15 |    limitations under the License.
16 | */
17 | 
18 | #include <gtest/gtest.h>
19 | #include <floatx.hpp>
20 | 
21 | 
22 | namespace {
23 | 
24 | 
25 | TEST(FloatxAssignmentTest, PreservesPrecision)
26 | {
27 |     const double val = 1.0 + 1e-15;
28 |     flx::floatx<11, 52> fx_val;
29 |     fx_val = val;
30 |     EXPECT_EQ(val, double(fx_val));
31 | }
32 | 
33 | TEST(FloatxAssignmentTest, LowersPrecision)
34 | {
35 |     const double val = 1.0 + 1e-15;
36 |     flx::floatx<8, 23> fx_val;
37 |     fx_val = val;
38 |     EXPECT_NE(val, double(fx_val));  // round to float
39 | }
40 | 
41 | 
42 | TEST(FloatxAssignmentTest, AssignsBetweenFormats)
43 | {
44 |     const double val = 1.0 + 1e-15;
45 |     flx::floatx<11, 52> d_val(val);
46 |     flx::floatx<8, 23> s_val;
47 |     s_val = d_val;
48 |     EXPECT_NE(val, double(s_val));
49 |     EXPECT_EQ(float(val), float(s_val));
50 | }
51 | 
52 | 
53 | TEST(FloatxrAssignmentTest, PreservesPrecision)
54 | {
55 |     const double val = 1.0 + 1e-15;
56 |     flx::floatxr<> fx_val(11, 52);
57 |     fx_val = val;
58 |     EXPECT_EQ(val, double(fx_val));
59 | }
60 | 
61 | TEST(FloatxrAssignmentTest, LowersPrecision)
62 | {
63 |     const double val = 1.0 + 1e-15;
64 |     flx::floatxr<> fx_val(8, 23);
65 |     fx_val = val;
66 |     EXPECT_NE(val, double(fx_val));  // round to float
67 | }
68 | 
69 | 
70 | TEST(FloatxrAssignmentTest, AssignsBetweenFormats)
71 | {
72 |     const double val = 1.0 + 1e-15;
73 |     flx::floatx<11, 52> d_val(val);
74 |     flx::floatxr<> s_val(8, 23);
75 |     s_val = d_val;
76 |     EXPECT_NE(val, double(s_val));
77 |     EXPECT_EQ(float(val), float(s_val));
78 | }
79 | 
80 | 
81 | }  // namespace
82 | 


--------------------------------------------------------------------------------
/test/conversion.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | #include <bitset>
 19 | 
 20 | 
 21 | #include <gtest/gtest.h>
 22 | #include <floatx.hpp>
 23 | 
 24 | 
 25 | namespace {
 26 | 
 27 | 
 28 | TEST(FloatxConversionTest, PreservesDoublePrecision)
 29 | {
 30 |     const double val = 1.0 + 1e-15;
 31 |     EXPECT_EQ(val, double(flx::floatx<11, 52>(val)));
 32 | }
 33 | 
 34 | TEST(FloatxConversionTest, LowersPrecision)
 35 | {
 36 |     const double val = 1.0 + 1e-15;
 37 |     EXPECT_NE(val, double(flx::floatx<8, 23>(val)));  // round to float
 38 | }
 39 | 
 40 | 
 41 | TEST(FloatxConversionTest, HandlesDenormals)
 42 | {
 43 |     EXPECT_EQ(0.25, double(flx::floatx<2, 3>(0.25)));
 44 |     EXPECT_EQ(0.75, double(flx::floatx<2, 3>(0.75)));
 45 | }
 46 | 
 47 | 
 48 | TEST(FloatxConversionTest, ConvertsBetweenFloatX)
 49 | {
 50 |     const double val = 1.0 + 1e-15;
 51 |     flx::floatx<11, 52> d_val(val);
 52 |     flx::floatx<8, 23> s_val(d_val);
 53 |     EXPECT_NE(val, double(s_val));
 54 |     EXPECT_EQ(float(val), float(s_val));
 55 | }
 56 | 
 57 | TEST(FloatxConversionTest, ConvertsToBits)
 58 | {
 59 |     const double val = 1.0 + 1e-15;
 60 |     ::testing::StaticAssertTypeEq<std::bitset<sizeof(val)>,
 61 |                                   decltype(bits(flx::floatx<11, 52>(val)))>();
 62 |     ::testing::StaticAssertTypeEq<std::bitset<sizeof(val)>,
 63 |                                   decltype(bits(flx::floatx<8, 23>(val)))>();
 64 |     EXPECT_EQ(flx::bits(val), bits(flx::floatx<11, 52>(val)));
 65 |     EXPECT_NE(flx::bits(val), bits(flx::floatx<8, 23>(val)));
 66 | }
 67 | 
 68 | TEST(FloatxConversionTest, ConvertsToString)
 69 | {
 70 |     flx::floatx<4, 5> val1 = 1.0;
 71 |     EXPECT_EQ("0-0111-00000", bitstring(val1));
 72 |     flx::floatx<3, 2> val2 = 1.75;
 73 |     EXPECT_EQ("0-011-11", bitstring(val2));
 74 |     flx::floatx<5, 7> val3 = 0.0;
 75 |     EXPECT_EQ("0-00000-0000000", bitstring(val3));
 76 | }
 77 | 
 78 | TEST(FloatxrConversionTest, PreservesDoublePrecision)
 79 | {
 80 |     const double val = 1.0 + 1e-15;
 81 |     EXPECT_EQ(val, double(flx::floatxr<>(11, 52, val)));
 82 | }
 83 | 
 84 | TEST(FloatxrConversionTest, LowersPrecision)
 85 | {
 86 |     const double val = 1.0 + 1e-15;
 87 |     EXPECT_NE(val, double(flx::floatxr<>(8, 23, val)));  // round to float
 88 | }
 89 | 
 90 | TEST(FloatxrConversionTest, InheritsPrecision)
 91 | {
 92 |     const double val = 1.0 + 1e-15;
 93 |     EXPECT_EQ(val, double(flx::floatxr<>(val)));
 94 | }
 95 | 
 96 | TEST(FloatxrConversionTest, ChangesPrecision)
 97 | {
 98 |     const double val = 1.0 + 1e-15;
 99 |     flx::floatxr<> fxr_val(val);
100 |     fxr_val.set_precision(8, 23);
101 |     EXPECT_NE(val, double(fxr_val));
102 |     EXPECT_EQ(float(val), float(fxr_val));
103 | }
104 | 
105 | TEST(FloatxrConversionTest, ConvertsBetweenFloatX)
106 | {
107 |     const double val = 1.0 + 1e-15;
108 |     flx::floatx<11, 52> d_val(val);
109 |     flx::floatxr<> s_val(8, 23, d_val);
110 |     EXPECT_NE(val, double(s_val));
111 |     EXPECT_EQ(float(val), float(s_val));
112 | }
113 | 
114 | TEST(FloatxrConversionTest, ConvertsToBits)
115 | {
116 |     const double val = 1.0 + 1e-15;
117 |     ::testing::StaticAssertTypeEq<std::bitset<sizeof(val)>,
118 |                                   decltype(bits(flx::floatxr<>(val)))>();
119 |     EXPECT_EQ(flx::bits(val), bits(flx::floatxr<>(val)));
120 |     EXPECT_NE(flx::bits(val), bits(flx::floatxr<>(8, 23, val)));
121 | }
122 | 
123 | 
124 | TEST(FloatxrConversionTest, ConvertsToString)
125 | {
126 |     flx::floatxr<> val1(4, 5, 1.0);
127 |     EXPECT_EQ("0-0111-00000", bitstring(val1));
128 |     flx::floatxr<> val2(3, 2, 1.75);
129 |     EXPECT_EQ("0-011-11", bitstring(val2));
130 |     flx::floatxr<> val3(5, 7, 0.0);
131 |     EXPECT_EQ("0-00000-0000000", bitstring(val3));
132 | }
133 | 
134 | }  // namespace
135 | 


--------------------------------------------------------------------------------
/test/rel_ops.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | #include <gtest/gtest.h>
 19 | #include <floatx.hpp>
 20 | 
 21 | 
 22 | namespace {
 23 | 
 24 | 
 25 | TEST(FloatxRelOpsTest, Equal)
 26 | {
 27 |     using doublex = flx::floatx<11, 52>;
 28 |     using floatx = flx::floatx<8, 23>;
 29 |     const double val1 = 1.0 + 1e-15;
 30 |     const double val2 = 1.0 + 2e-15;
 31 |     EXPECT_TRUE(doublex(val1) == doublex(val1));
 32 |     EXPECT_FALSE(doublex(val1) == doublex(val2));
 33 |     EXPECT_TRUE(floatx(val1) == floatx(val1));
 34 |     EXPECT_TRUE(floatx(val1) == floatx(val2));    // due to rounding
 35 |     EXPECT_FALSE(floatx(val1) == doublex(val1));  // due to rounding
 36 |     EXPECT_FALSE(floatx(val1) == doublex(val2));  // due to rounding
 37 | }
 38 | 
 39 | 
 40 | TEST(FloatxRelOpsTest, NotEqual)
 41 | {
 42 |     using doublex = flx::floatx<11, 52>;
 43 |     using floatx = flx::floatx<8, 23>;
 44 |     const double val1 = 1.0 + 1e-15;
 45 |     const double val2 = 1.0 + 2e-15;
 46 |     EXPECT_FALSE(doublex(val1) != doublex(val1));
 47 |     EXPECT_TRUE(doublex(val1) != doublex(val2));
 48 |     EXPECT_FALSE(floatx(val1) != floatx(val1));
 49 |     EXPECT_FALSE(floatx(val1) != floatx(val2));  // due to rounding
 50 |     EXPECT_TRUE(floatx(val1) != doublex(val1));  // due to rounding
 51 |     EXPECT_TRUE(floatx(val1) != doublex(val2));  // due to rounding
 52 | }
 53 | 
 54 | 
 55 | TEST(FloatxRelOpsTest, LessThan)
 56 | {
 57 |     using doublex = flx::floatx<11, 52>;
 58 |     using floatx = flx::floatx<8, 23>;
 59 |     const double val1 = 1.0 + 1e-15;
 60 |     const double val2 = 1.0 + 2e-15;
 61 |     EXPECT_FALSE(doublex(val1) < doublex(val1));
 62 |     EXPECT_FALSE(doublex(val2) < doublex(val1));
 63 |     EXPECT_TRUE(doublex(val1) < doublex(val2));
 64 |     EXPECT_FALSE(floatx(val1) < floatx(val1));
 65 |     EXPECT_FALSE(floatx(val2) < floatx(val1));
 66 |     EXPECT_FALSE(floatx(val1) < floatx(val2));  // due to rounding
 67 |     EXPECT_TRUE(floatx(val1) < doublex(val1));  // due to rounding
 68 |     EXPECT_TRUE(floatx(val2) < doublex(val1));  // due to rounding
 69 |     EXPECT_TRUE(floatx(val1) < doublex(val2));  // due to rounding
 70 | }
 71 | 
 72 | 
 73 | TEST(FloatxRelOpsTest, LessOrEqual)
 74 | {
 75 |     using doublex = flx::floatx<11, 52>;
 76 |     using floatx = flx::floatx<8, 23>;
 77 |     const double val1 = 1.0 + 1e-15;
 78 |     const double val2 = 1.0 + 2e-15;
 79 |     EXPECT_TRUE(doublex(val1) <= doublex(val1));
 80 |     EXPECT_FALSE(doublex(val2) <= doublex(val1));
 81 |     EXPECT_TRUE(doublex(val1) <= doublex(val2));
 82 |     EXPECT_TRUE(floatx(val1) <= floatx(val1));
 83 |     EXPECT_TRUE(floatx(val2) <= floatx(val1));  // due to rounding
 84 |     EXPECT_TRUE(floatx(val1) <= floatx(val2));
 85 |     EXPECT_TRUE(floatx(val1) <= doublex(val1));
 86 |     EXPECT_TRUE(floatx(val2) <= doublex(val1));  // due to rounding
 87 |     EXPECT_TRUE(floatx(val1) <= doublex(val2));
 88 | }
 89 | 
 90 | 
 91 | TEST(FloatxRelOpsTest, GreaterThan)
 92 | {
 93 |     using doublex = flx::floatx<11, 52>;
 94 |     using floatx = flx::floatx<8, 23>;
 95 |     const double val1 = 1.0 + 1e-15;
 96 |     const double val2 = 1.0 + 2e-15;
 97 |     EXPECT_FALSE(doublex(val1) > doublex(val1));
 98 |     EXPECT_TRUE(doublex(val2) > doublex(val1));
 99 |     EXPECT_FALSE(doublex(val1) > doublex(val2));
100 |     EXPECT_FALSE(floatx(val1) > floatx(val1));
101 |     EXPECT_FALSE(floatx(val2) > floatx(val1));  // due to rounding
102 |     EXPECT_FALSE(floatx(val1) > floatx(val2));
103 |     EXPECT_FALSE(floatx(val1) > doublex(val1));
104 |     EXPECT_FALSE(floatx(val2) > doublex(val1));  // due to rounding
105 |     EXPECT_FALSE(floatx(val1) > doublex(val2));
106 | }
107 | 
108 | 
109 | TEST(FloatxRelOpsTest, GreaterOrEqual)
110 | {
111 |     using doublex = flx::floatx<11, 52>;
112 |     using floatx = flx::floatx<8, 23>;
113 |     const double val1 = 1.0 + 1e-15;
114 |     const double val2 = 1.0 + 2e-15;
115 |     EXPECT_TRUE(doublex(val1) >= doublex(val1));
116 |     EXPECT_TRUE(doublex(val2) >= doublex(val1));
117 |     EXPECT_FALSE(doublex(val1) >= doublex(val2));
118 |     EXPECT_TRUE(floatx(val1) >= floatx(val1));
119 |     EXPECT_TRUE(floatx(val2) >= floatx(val1));    // due to rounding
120 |     EXPECT_TRUE(floatx(val1) >= floatx(val2));    // due to rounding
121 |     EXPECT_FALSE(floatx(val1) >= doublex(val1));  // due to rounding
122 |     EXPECT_FALSE(floatx(val2) >= doublex(val1));  // due to rounding
123 |     EXPECT_FALSE(floatx(val1) >= doublex(val2));
124 | }
125 | 
126 | 
127 | TEST(FloatxrRelOpsTest, Equal)
128 | {
129 |     auto doublex = [](double a) { return flx::floatxr<>(11, 52, a); };
130 |     auto floatx = [](double a) { return flx::floatxr<>(8, 23, a); };
131 |     const double val1 = 1.0 + 1e-15;
132 |     const double val2 = 1.0 + 2e-15;
133 |     EXPECT_TRUE(doublex(val1) == doublex(val1));
134 |     EXPECT_FALSE(doublex(val1) == doublex(val2));
135 |     EXPECT_TRUE(floatx(val1) == floatx(val1));
136 |     EXPECT_TRUE(floatx(val1) == floatx(val2));    // due to rounding
137 |     EXPECT_FALSE(floatx(val1) == doublex(val1));  // due to rounding
138 |     EXPECT_FALSE(floatx(val1) == doublex(val2));  // due to rounding
139 | }
140 | 
141 | 
142 | TEST(FloatxrRelOpsTest, NotEqual)
143 | {
144 |     auto doublex = [](double a) { return flx::floatxr<>(11, 52, a); };
145 |     auto floatx = [](double a) { return flx::floatxr<>(8, 23, a); };
146 |     const double val1 = 1.0 + 1e-15;
147 |     const double val2 = 1.0 + 2e-15;
148 |     EXPECT_FALSE(doublex(val1) != doublex(val1));
149 |     EXPECT_TRUE(doublex(val1) != doublex(val2));
150 |     EXPECT_FALSE(floatx(val1) != floatx(val1));
151 |     EXPECT_FALSE(floatx(val1) != floatx(val2));  // due to rounding
152 |     EXPECT_TRUE(floatx(val1) != doublex(val1));  // due to rounding
153 |     EXPECT_TRUE(floatx(val1) != doublex(val2));  // due to rounding
154 | }
155 | 
156 | 
157 | TEST(FloatxrRelOpsTest, LessThan)
158 | {
159 |     auto doublex = [](double a) { return flx::floatxr<>(11, 52, a); };
160 |     auto floatx = [](double a) { return flx::floatxr<>(8, 23, a); };
161 |     const double val1 = 1.0 + 1e-15;
162 |     const double val2 = 1.0 + 2e-15;
163 |     EXPECT_FALSE(doublex(val1) < doublex(val1));
164 |     EXPECT_FALSE(doublex(val2) < doublex(val1));
165 |     EXPECT_TRUE(doublex(val1) < doublex(val2));
166 |     EXPECT_FALSE(floatx(val1) < floatx(val1));
167 |     EXPECT_FALSE(floatx(val2) < floatx(val1));
168 |     EXPECT_FALSE(floatx(val1) < floatx(val2));  // due to rounding
169 |     EXPECT_TRUE(floatx(val1) < doublex(val1));  // due to rounding
170 |     EXPECT_TRUE(floatx(val2) < doublex(val1));  // due to rounding
171 |     EXPECT_TRUE(floatx(val1) < doublex(val2));  // due to rounding
172 | }
173 | 
174 | 
175 | TEST(FloatxrRelOpsTest, LessOrEqual)
176 | {
177 |     auto doublex = [](double a) { return flx::floatxr<>(11, 52, a); };
178 |     auto floatx = [](double a) { return flx::floatxr<>(8, 23, a); };
179 |     const double val1 = 1.0 + 1e-15;
180 |     const double val2 = 1.0 + 2e-15;
181 |     EXPECT_TRUE(doublex(val1) <= doublex(val1));
182 |     EXPECT_FALSE(doublex(val2) <= doublex(val1));
183 |     EXPECT_TRUE(doublex(val1) <= doublex(val2));
184 |     EXPECT_TRUE(floatx(val1) <= floatx(val1));
185 |     EXPECT_TRUE(floatx(val2) <= floatx(val1));  // due to rounding
186 |     EXPECT_TRUE(floatx(val1) <= floatx(val2));
187 |     EXPECT_TRUE(floatx(val1) <= doublex(val1));
188 |     EXPECT_TRUE(floatx(val2) <= doublex(val1));  // due to rounding
189 |     EXPECT_TRUE(floatx(val1) <= doublex(val2));
190 | }
191 | 
192 | 
193 | TEST(FloatxrRelOpsTest, GreaterThan)
194 | {
195 |     auto doublex = [](double a) { return flx::floatxr<>(11, 52, a); };
196 |     auto floatx = [](double a) { return flx::floatxr<>(8, 23, a); };
197 |     const double val1 = 1.0 + 1e-15;
198 |     const double val2 = 1.0 + 2e-15;
199 |     EXPECT_FALSE(doublex(val1) > doublex(val1));
200 |     EXPECT_TRUE(doublex(val2) > doublex(val1));
201 |     EXPECT_FALSE(doublex(val1) > doublex(val2));
202 |     EXPECT_FALSE(floatx(val1) > floatx(val1));
203 |     EXPECT_FALSE(floatx(val2) > floatx(val1));  // due to rounding
204 |     EXPECT_FALSE(floatx(val1) > floatx(val2));
205 |     EXPECT_FALSE(floatx(val1) > doublex(val1));
206 |     EXPECT_FALSE(floatx(val2) > doublex(val1));  // due to rounding
207 |     EXPECT_FALSE(floatx(val1) > doublex(val2));
208 | }
209 | 
210 | 
211 | TEST(FloatxrRelOpsTest, GreaterOrEqual)
212 | {
213 |     auto doublex = [](double a) { return flx::floatxr<>(11, 52, a); };
214 |     auto floatx = [](double a) { return flx::floatxr<>(8, 23, a); };
215 |     const double val1 = 1.0 + 1e-15;
216 |     const double val2 = 1.0 + 2e-15;
217 |     EXPECT_TRUE(doublex(val1) >= doublex(val1));
218 |     EXPECT_TRUE(doublex(val2) >= doublex(val1));
219 |     EXPECT_FALSE(doublex(val1) >= doublex(val2));
220 |     EXPECT_TRUE(floatx(val1) >= floatx(val1));
221 |     EXPECT_TRUE(floatx(val2) >= floatx(val1));    // due to rounding
222 |     EXPECT_TRUE(floatx(val1) >= floatx(val2));    // due to rounding
223 |     EXPECT_FALSE(floatx(val1) >= doublex(val1));  // due to rounding
224 |     EXPECT_FALSE(floatx(val2) >= doublex(val1));  // due to rounding
225 |     EXPECT_FALSE(floatx(val1) >= doublex(val2));
226 | }
227 | 
228 | 
229 | }  // namespace
230 | 


--------------------------------------------------------------------------------
/test/round_nearest.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
 3 |                     IBM Research GmbH. All rights reserved.
 4 | 
 5 |    Licensed under the Apache License, Version 2.0 (the "License");
 6 |    you may not use this file except in compliance with the License.
 7 |    You may obtain a copy of the License at
 8 | 
 9 |        http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |    Unless required by applicable law or agreed to in writing, software
12 |    distributed under the License is distributed on an "AS IS" BASIS,
13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |    See the License for the specific language governing permissions and
15 |    limitations under the License.
16 | */
17 | 
18 | #include <gtest/gtest.h>
19 | #include <floatx.hpp>
20 | 
21 | // Check internal functions.
22 | // Rounding of a uint64_t type value.
23 | // Checks the following routine:
24 | // inline uint64_t SHIFT_RIGHT_ROUND_NEAREST(uint64_t mant, uint16_t SHIFT)
25 | // IMPLEMENTS rounding according the IEEE 754 standard with a NEAREST policy and
26 | // ties are resolved to even.
27 | 
28 | TEST(RoundNearest, down)
29 | {
30 |     //																						  RRRRRRRRRR
31 |     // number:    	0x3e6999999999999a
32 |     // 0011111001101001100110011001100110011001100110011001100110011010
33 |     // >> 10 :    	0x000f9a6666666666
34 |     // 0000000000001111100110100110011001100110011001100110011001100110 RND 10:
35 |     // 0x000f9a6666666666
36 |     // 0000000000001111100110100110011001100110011001100110011001100110
37 |     uint64_t number = 0x3e6999999999999a;
38 |     int shift_amount = 10;
39 |     uint64_t expected = 0x000f9a6666666666 << shift_amount;
40 | 
41 |     EXPECT_EQ(expected, flx::detail::round_nearest(number, shift_amount));
42 | }
43 | 
44 | TEST(RoundNearest, up)
45 | {
46 |     //																						RRRRRRRRRRRR
47 |     // number:    	0x3e6999999999999a
48 |     // 0011111001101001100110011001100110011001100110011001100110011010
49 |     // >> 12 :    	0x0003e69999999999
50 |     // 0000000000000011111001101001100110011001100110011001100110011001 RND 12:
51 |     // 0x0003e6999999999a
52 |     // 0000000000000011111001101001100110011001100110011001100110011010
53 |     uint64_t number = 0x3e6999999999999a;
54 |     int shift_amount = 12;
55 |     uint64_t expected = 0x0003e6999999999a << shift_amount;
56 | 
57 |     EXPECT_EQ(expected, flx::detail::round_nearest(number, shift_amount));
58 | }
59 | 
60 | TEST(RoundNearest, RoundNearestWithTiesToEvenRoundsUp)
61 | {
62 |     //																								RRRR
63 |     // number:    	0x0ffffffff00000f8
64 |     // 0000111111111111111111111111111111110000000000000000000011111000
65 |     // >>  4 :    	0x00ffffffff00000f
66 |     // 0000000011111111111111111111111111111111000000000000000000001111 RND  4:
67 |     // 0x00ffffffff000010
68 |     // 0000000011111111111111111111111111111111000000000000000000010000
69 |     uint64_t number = 0x0ffffffff00000f8;
70 |     int shift_amount = 4;
71 |     uint64_t expected = 0x00ffffffff000010 << shift_amount;
72 | 
73 |     EXPECT_EQ(expected, flx::detail::round_nearest(number, shift_amount));
74 | }
75 | 
76 | TEST(RoundNearest, RoundNearestWithTiesToEvenRoundsDown)
77 | {
78 |     //																								RRRR
79 |     // number:    	0x0ffffffff00000e8
80 |     // 0000111111111111111111111111111111110000000000000000000011101000
81 |     // >>  4 :    	0x00ffffffff00000e
82 |     // 0000000011111111111111111111111111111111000000000000000000001110 RND  4:
83 |     // 0x00ffffffff00000e
84 |     // 0000000011111111111111111111111111111111000000000000000000001110
85 |     uint64_t number = 0x0ffffffff00000e8;
86 |     int shift_amount = 4;
87 |     uint64_t expected = 0x00ffffffff00000e << shift_amount;
88 | 
89 |     EXPECT_EQ(expected, flx::detail::round_nearest(number, shift_amount));
90 | }
91 | 


--------------------------------------------------------------------------------
/test/std_integration.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | #include <gtest/gtest.h>
 19 | #include <floatx.hpp>
 20 | 
 21 | 
 22 | #include <algorithm>
 23 | #include <complex>
 24 | #include <tuple>
 25 | #include <vector>
 26 | 
 27 | 
 28 | namespace {
 29 | 
 30 | 
 31 | using doublex = flx::floatx<11, 52>;
 32 | using floatx = flx::floatx<8, 23>;
 33 | 
 34 | 
 35 | TEST(Tuple, CanCreateFloatXTuple)
 36 | {
 37 |     auto tpl = std::make_tuple(doublex{3.2}, floatx{5.2});
 38 | 
 39 |     ASSERT_NEAR(std::get<0>(tpl), 3.2, 1e-15);
 40 |     ASSERT_NEAR(std::get<1>(tpl), 5.2, 1e-6);
 41 | }
 42 | 
 43 | 
 44 | TEST(TupleVector, CanCreateVectorOfTuples)
 45 | {
 46 |     std::vector<std::tuple<doublex, floatx>> vec{
 47 |         std::make_tuple(doublex{3.2}, floatx{5.2}),
 48 |         std::make_tuple(doublex{0.5}, floatx{1.2})};
 49 | 
 50 |     ASSERT_NEAR(std::get<0>(vec[0]), 3.2, 1e-15);
 51 |     ASSERT_NEAR(std::get<1>(vec[0]), 5.2, 1e-6);
 52 |     ASSERT_NEAR(std::get<0>(vec[1]), 0.5, 1e-15);
 53 |     ASSERT_NEAR(std::get<1>(vec[1]), 1.2, 1e-6);
 54 | }
 55 | 
 56 | 
 57 | TEST(TupleVector, CanIterateThroughVector)
 58 | {
 59 |     std::vector<std::tuple<doublex, floatx>> vec{
 60 |         std::make_tuple(doublex{3.2}, floatx{5.2}),
 61 |         std::make_tuple(doublex{0.5}, floatx{1.2})};
 62 | 
 63 |     for (auto& elem : vec) {
 64 |         std::get<0>(elem) += 1;
 65 |     }
 66 | 
 67 |     ASSERT_NEAR(std::get<0>(vec[0]), 4.2, 1e-15);
 68 |     ASSERT_NEAR(std::get<1>(vec[0]), 5.2, 1e-6);
 69 |     ASSERT_NEAR(std::get<0>(vec[1]), 1.5, 1e-15);
 70 |     ASSERT_NEAR(std::get<1>(vec[1]), 1.2, 1e-6);
 71 | }
 72 | 
 73 | 
 74 | TEST(TupleVector, CanSortTupleVector)
 75 | {
 76 |     std::vector<std::tuple<doublex, floatx>> vec{
 77 |         std::make_tuple(doublex{3.2}, floatx{5.2}),
 78 |         std::make_tuple(doublex{0.5}, floatx{1.2})};
 79 | 
 80 |     std::sort(begin(vec), end(vec));
 81 | 
 82 |     ASSERT_NEAR(std::get<0>(vec[0]), 0.5, 1e-15);
 83 |     ASSERT_NEAR(std::get<1>(vec[0]), 1.2, 1e-6);
 84 |     ASSERT_NEAR(std::get<0>(vec[1]), 3.2, 1e-15);
 85 |     ASSERT_NEAR(std::get<1>(vec[1]), 5.2, 1e-6);
 86 | }
 87 | 
 88 | 
 89 | // NOTE: this is non-standard behavior, a conformant implementation is allowed
 90 | // to have undefined behavior for std::complex<flx::floatx<exp, sig>>
 91 | TEST(Complex, CanCreateComplexFloatX)
 92 | {
 93 |     std::complex<floatx> a(3.2, 2.5);
 94 | 
 95 |     ASSERT_NEAR(a.real(), 3.2, 1e-7);
 96 |     ASSERT_NEAR(a.imag(), 2.5, 1e-7);
 97 | }
 98 | 
 99 | 
100 | TEST(Complex, CanAddComplexFloatX)
101 | {
102 |     std::complex<floatx> a(3.2, 2.5);
103 |     std::complex<floatx> b(2.3, 1.4);
104 | 
105 |     auto res = a + b;
106 | 
107 |     ASSERT_NEAR(res.real(), 5.5, 1e-7);
108 |     ASSERT_NEAR(res.imag(), 3.9, 1e-7);
109 | }
110 | 
111 | 
112 | TEST(Complex, CanSubstractComplexFloatX)
113 | {
114 |     std::complex<floatx> a(3.2, 2.5);
115 |     std::complex<floatx> b(2.3, 1.4);
116 | 
117 |     auto res = a - b;
118 | 
119 |     ASSERT_NEAR(res.real(), 0.9, 1e-7);
120 |     ASSERT_NEAR(res.imag(), 1.1, 1e-7);
121 | }
122 | 
123 | 
124 | TEST(Complex, CanMultiplyComplexFloatX)
125 | {
126 |     std::complex<floatx> a(3.0, 2.0);
127 |     std::complex<floatx> b(2.0, 1.0);
128 | 
129 |     auto res = a * b;
130 | 
131 |     ASSERT_NEAR(res.real(), 4.0, 1e-7);
132 |     ASSERT_NEAR(res.imag(), 7.0, 1e-7);
133 | }
134 | 
135 | 
136 | TEST(Complex, CanDivideComplexFloatX)
137 | {
138 |     std::complex<floatx> a(3.0, 2.0);
139 |     std::complex<floatx> b(2.0, 1.0);
140 | 
141 |     auto res = a / b;
142 | 
143 |     ASSERT_NEAR(res.real(), 1.6, 1e-7);
144 |     ASSERT_NEAR(res.imag(), 0.2, 1e-7);
145 | }
146 | 
147 | 
148 | }  // namespace
149 | 


--------------------------------------------------------------------------------
/test/stream.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
 3 |                     IBM Research GmbH. All rights reserved.
 4 | 
 5 |    Licensed under the Apache License, Version 2.0 (the "License");
 6 |    you may not use this file except in compliance with the License.
 7 |    You may obtain a copy of the License at
 8 | 
 9 |        http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |    Unless required by applicable law or agreed to in writing, software
12 |    distributed under the License is distributed on an "AS IS" BASIS,
13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |    See the License for the specific language governing permissions and
15 |    limitations under the License.
16 | */
17 | 
18 | #include <sstream>
19 | 
20 | 
21 | #include <gtest/gtest.h>
22 | #include <floatx.hpp>
23 | 
24 | 
25 | namespace {
26 | 
27 | 
28 | TEST(FloatxStreamTest, WritesToOutputStream)
29 | {
30 |     flx::floatx<8, 23> val = 1.0 + 1e-15;
31 |     std::stringstream os;
32 |     os << val;
33 |     EXPECT_EQ("1", os.str());
34 | }
35 | 
36 | 
37 | TEST(FloatxStreamTest, ReadsFromOutputStream)
38 | {
39 |     flx::floatx<8, 23> val;
40 |     std::stringstream is("1.00000000000001");
41 |     is >> val;
42 |     EXPECT_EQ(1.0, val);
43 | }
44 | 
45 | 
46 | TEST(FloatxrStreamTest, WritesToOutputStream)
47 | {
48 |     flx::floatxr<> val = 1.0 + 1e-15;
49 |     val.set_precision(8, 23);
50 |     std::stringstream os;
51 |     os << val;
52 |     EXPECT_EQ("1", os.str());
53 | }
54 | 
55 | 
56 | TEST(FloatxrStreamTest, ReadsFromOutputStream)
57 | {
58 |     flx::floatxr<> val(8, 23);
59 |     std::stringstream is("1.00000000000001");
60 |     is >> val;
61 |     EXPECT_EQ(1.0, val);
62 | }
63 | 
64 | 
65 | }  // namespace
66 | 


--------------------------------------------------------------------------------
/test/value_representation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | #include <gtest/gtest.h>
 19 | #include <floatx.hpp>
 20 | 
 21 | #include "IEEEHelper.h"
 22 | 
 23 | // TEST(IEEE_helper, demonstrate ) {
 24 | // 	uint16_t e = 3;
 25 | // 	uint16_t m = 5;
 26 | // 	IEEEHelper h = IEEEHelper(e,m);
 27 | // 	h.showConfig();
 28 | // 	EXPECT_EQ(1, 1);
 29 | // }
 30 | 
 31 | class CheckValidRepresentationOfFloatx5_10
 32 |     : public ::testing::TestWithParam<std::tuple<uint16_t, uint16_t>> {};
 33 | 
 34 | class CheckValidRepresentationOfFloatx4_8
 35 |     : public ::testing::TestWithParam<std::tuple<uint16_t, uint16_t>> {};
 36 | 
 37 | template <typename T>
 38 | void test(double d)
 39 | {
 40 |     // Test value d.
 41 |     T fx = d;
 42 |     double recoverd = (double)(fx);
 43 | 
 44 |     // the recoverd and the original value are required to be the same.
 45 |     ASSERT_EQ(recoverd, d);
 46 | }
 47 | 
 48 | TEST_P(CheckValidRepresentationOfFloatx5_10, subnormal)
 49 | {
 50 |     uint16_t const e = std::get<0>(GetParam());
 51 |     uint16_t const m = std::get<1>(GetParam());
 52 | 
 53 |     IEEEHelper h = IEEEHelper(e, m);
 54 |     // h.showConfig();
 55 |     // printf("Subnormal Range:\n");
 56 | 
 57 |     int nm = h.countSubnormalRange();
 58 |     for (int im = 0; im < nm; ++im) {
 59 |         double d = h.iterateSubnormalRange(im);
 60 |         test<flx::floatx<5, 10>>(d);
 61 |     }
 62 | }
 63 | 
 64 | TEST_P(CheckValidRepresentationOfFloatx5_10, regular)
 65 | {
 66 |     uint16_t const e = std::get<0>(GetParam());
 67 |     uint16_t const m = std::get<1>(GetParam());
 68 | 
 69 |     IEEEHelper h = IEEEHelper(e, m);
 70 |     // h.showConfig();
 71 |     // printf("Normal Range:\n");
 72 | 
 73 |     int ne = h.countExpRange();
 74 |     int nm = h.countSubnormalRange();
 75 | 
 76 |     for (int ie = 0; ie < ne; ++ie) {
 77 |         for (int im = 0; im < nm; ++im) {
 78 |             double d = h.iterateNormalRange(ie, im);
 79 |             test<flx::floatx<5, 10>>(d);
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | TEST_P(CheckValidRepresentationOfFloatx4_8, subnormal)
 85 | {
 86 |     uint16_t const e = std::get<0>(GetParam());
 87 |     uint16_t const m = std::get<1>(GetParam());
 88 | 
 89 |     IEEEHelper h = IEEEHelper(e, m);
 90 |     // h.showConfig();
 91 |     // printf("Subnormal Range:\n");
 92 | 
 93 |     int nm = h.countSubnormalRange();
 94 |     for (int im = 0; im < nm; ++im) {
 95 |         double d = h.iterateSubnormalRange(im);
 96 |         test<flx::floatx<4, 8>>(d);
 97 |     }
 98 | }
 99 | 
100 | TEST_P(CheckValidRepresentationOfFloatx4_8, regular)
101 | {
102 |     uint16_t const e = std::get<0>(GetParam());
103 |     uint16_t const m = std::get<1>(GetParam());
104 | 
105 |     IEEEHelper h = IEEEHelper(e, m);
106 |     // h.showConfig();
107 |     // printf("Normal Range:\n");
108 | 
109 |     int ne = h.countExpRange();
110 |     int nm = h.countSubnormalRange();
111 | 
112 |     for (int ie = 0; ie < ne; ++ie) {
113 |         for (int im = 0; im < nm; ++im) {
114 |             double d = h.iterateNormalRange(ie, im);
115 |             test<flx::floatx<4, 8>>(d);
116 |         }
117 |     }
118 | }
119 | 
120 | // SUBSET TEST ON FLOATX TYPE HALF <5,10>
121 | INSTANTIATE_TEST_CASE_P(TestParams_full_subnormal_range,
122 |                         CheckValidRepresentationOfFloatx5_10,
123 |                         testing::Values(::testing::make_tuple(5, 10)));
124 | 
125 | INSTANTIATE_TEST_CASE_P(
126 |     TestParams_subset_subnormal_range, CheckValidRepresentationOfFloatx5_10,
127 |     testing::Values(::testing::make_tuple(2, 3), ::testing::make_tuple(3, 4),
128 |                     ::testing::make_tuple(4, 2), ::testing::make_tuple(4, 8),
129 |                     ::testing::make_tuple(5, 8), ::testing::make_tuple(2, 10)));
130 | 
131 | // SUBSET TEST ON FLOATX TYPE HALF <4,8>
132 | INSTANTIATE_TEST_CASE_P(TestParams_full_subnormal_range,
133 |                         CheckValidRepresentationOfFloatx4_8,
134 |                         testing::Values(::testing::make_tuple(4, 8)));
135 | 
136 | INSTANTIATE_TEST_CASE_P(TestParams_subset_subnormal_range,
137 |                         CheckValidRepresentationOfFloatx4_8,
138 |                         testing::Values(::testing::make_tuple(2, 3),
139 |                                         ::testing::make_tuple(3, 8),
140 |                                         ::testing::make_tuple(4, 3)));
141 | 


--------------------------------------------------------------------------------
/test/value_representation_bits.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | #include <gtest/gtest.h>
 19 | #include <floatx.hpp>
 20 | 
 21 | TEST(MyTestBF, BF_3_5)
 22 | {
 23 |     const uint8_t E = 3;
 24 |     const uint8_t M = 3;
 25 | 
 26 |     uint64_t mx = ((uint64_t)0x1) << (1 + E + M);
 27 |     for (uint64_t cnt = 0x0; cnt < mx; ++cnt) {
 28 |         // define input pattern
 29 |         std::bitset<1 + E + M> pattern(cnt);
 30 | 
 31 |         // get a backend number
 32 |         double bd = flx::detail::construct_number<E, M>(pattern);
 33 | 
 34 |         // check that the backend number is valid.
 35 |         // e.g. cast to fx and back to double (that should not change its value)
 36 |         flx::floatx<E, M> fx = bd;
 37 |         double r = double(fx);
 38 |         EXPECT_EQ(*reinterpret_cast<uint64_t*>(&r),
 39 |                   *reinterpret_cast<uint64_t*>(&bd));
 40 | 
 41 |         // get the reverse functionallity
 42 |         std::bitset<1 + E + M> out =
 43 |             flx::detail::get_fullbit_representation_BS<E, M>(r);
 44 | 
 45 |         // printf("value: %.20e\n", r );
 46 |         // std::cout << "IN:  " << pattern << std::endl;
 47 |         // std::cout << "OUT: " << out << std::endl;
 48 | 
 49 |         EXPECT_EQ(pattern, out);
 50 |     }
 51 | }
 52 | 
 53 | TEST(MyTestBF, BF_5_2)
 54 | {
 55 |     const uint8_t E = 5;
 56 |     const uint8_t M = 2;
 57 | 
 58 |     uint64_t mx = ((uint64_t)0x1) << (1 + E + M);
 59 |     for (uint64_t cnt = 0x0; cnt < mx; ++cnt) {
 60 |         // define input pattern
 61 |         std::bitset<1 + E + M> pattern(cnt);
 62 | 
 63 |         // get a backend number
 64 |         double bd = flx::detail::construct_number<E, M>(pattern);
 65 | 
 66 |         // check that the backend number is valid.
 67 |         // e.g. cast to fx and back to double (that should not change its value)
 68 |         flx::floatx<E, M> fx = bd;
 69 |         double r = double(fx);
 70 |         EXPECT_EQ(*reinterpret_cast<uint64_t*>(&r),
 71 |                   *reinterpret_cast<uint64_t*>(&bd));
 72 | 
 73 |         // get the reverse functionallity
 74 |         std::bitset<1 + E + M> out =
 75 |             flx::detail::get_fullbit_representation_BS<E, M>(r);
 76 | 
 77 |         // printf("value: %.20e\n", r );
 78 |         // std::cout << "IN:  " << pattern << std::endl;
 79 |         // std::cout << "OUT: " << out << std::endl;
 80 | 
 81 |         EXPECT_EQ(pattern, out);
 82 |     }
 83 | }
 84 | 
 85 | TEST(MyTestBF, BF_5_10)
 86 | {
 87 |     const uint8_t E = 5;
 88 |     const uint8_t M = 10;
 89 | 
 90 |     uint64_t mx = ((uint64_t)0x1) << (1 + E + M);
 91 |     for (uint64_t cnt = 0x0; cnt < mx; ++cnt) {
 92 |         // define input pattern
 93 |         std::bitset<1 + E + M> pattern(cnt);
 94 | 
 95 |         // get a backend number
 96 |         double bd = flx::detail::construct_number<E, M>(pattern);
 97 | 
 98 |         // check that the backend number is valid.
 99 |         // e.g. cast to fx and back to double (that should not change its value)
100 |         flx::floatx<E, M> fx = bd;
101 |         double r = double(fx);
102 |         EXPECT_EQ(*reinterpret_cast<uint64_t*>(&r),
103 |                   *reinterpret_cast<uint64_t*>(&bd));
104 | 
105 |         // get the reverse functionallity
106 |         std::bitset<1 + E + M> out =
107 |             flx::detail::get_fullbit_representation_BS<E, M>(r);
108 | 
109 |         // printf("value: %.20e\n", r );
110 |         // std::cout << "IN:  " << pattern << std::endl;
111 |         // std::cout << "OUT: " << out << std::endl;
112 | 
113 |         EXPECT_EQ(pattern, out);
114 |     }
115 | }


--------------------------------------------------------------------------------
/test/value_representation_half.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright 2018 - The OPRECOMP Project Consortium, Universitat Jaume I,
  3 |                     IBM Research GmbH. All rights reserved.
  4 | 
  5 |    Licensed under the Apache License, Version 2.0 (the "License");
  6 |    you may not use this file except in compliance with the License.
  7 |    You may obtain a copy of the License at
  8 | 
  9 |        http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |    Unless required by applicable law or agreed to in writing, software
 12 |    distributed under the License is distributed on an "AS IS" BASIS,
 13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |    See the License for the specific language governing permissions and
 15 |    limitations under the License.
 16 | */
 17 | 
 18 | #include <gtest/gtest.h>
 19 | #include <floatx.hpp>
 20 | 
 21 | void show(double d)
 22 | {
 23 |     printf("%.20e\t", d);
 24 |     uint64_t u = flx::detail::reinterpret_as_bits(d);
 25 |     printf("0x%016llx\t", u);
 26 |     std::cout << std::bitset<64>(u) << std::endl;
 27 | }
 28 | 
 29 | class MyTest : public ::testing::TestWithParam<std::tuple<uint64_t, uint64_t>> {
 30 | };
 31 | 
 32 | TEST_P(MyTest, TestFormula)
 33 | {
 34 |     uint64_t const in_number = std::get<0>(GetParam());
 35 |     uint64_t const out_number = std::get<1>(GetParam());
 36 | 
 37 |     double d = flx::detail::reinterpret_bits_as<double>(in_number);
 38 |     double expected = flx::detail::reinterpret_bits_as<double>(out_number);
 39 | 
 40 |     // IEEE half.
 41 |     flx::floatx<5, 10> fx = d;
 42 |     double recoverd_fx = (double)(fx);
 43 | 
 44 |     // printf("number:     \t"); show(d);
 45 |     // printf("recoverd_fx:\t"); show(recoverd_fx);
 46 |     // printf("expected:   \t"); show(expected);
 47 | 
 48 |     // Enforces the same reprsentation for nan's.
 49 |     uint64_t out = flx::detail::reinterpret_as_bits(recoverd_fx);
 50 |     ASSERT_EQ(out, out_number);
 51 |     // ASSERT_EQ(recoverd_fx, expected); // does not work for NAN's.
 52 | }
 53 | 
 54 | INSTANTIATE_TEST_CASE_P(
 55 |     TestWithParameters_manual_sampels, MyTest,
 56 |     testing::Values(
 57 | 
 58 |         // Case triggering >> 64 (does generates wrong masks)
 59 |         // number:     	6.09397888183593736447e-05	0x3f0ff33333333333
 60 |         // 0011111100001111111100110011001100110011001100110011001100110011
 61 |         // recoverd_sf:	6.09159469604492187500e-05	0x3f0ff00000000000
 62 |         // 0011111100001111111100000000000000000000000000000000000000000000
 63 |         ::testing::make_tuple(0x3f0ff33333333333, 0x3f0ff00000000000),
 64 | 
 65 |         // Case, triggering rounding twice after each other not the same ase
 66 |         // rounding once (sticky bit)
 67 |         // number:     	6.09517097473144544803e-05	0x3f0ff4cccccccccd
 68 |         // 0011111100001111111101001100110011001100110011001100110011001101
 69 |         // recoverd_sf:	6.09755516052246093750e-05	0x3f0ff80000000000
 70 |         // 0011111100001111111110000000000000000000000000000000000000000000
 71 |         ::testing::make_tuple(0x3f0ff4cccccccccd, 0x3f0ff80000000000),
 72 | 
 73 |         // Case that triggers a MANTISSA overflow due rounding, requires the
 74 |         // exponent to be changed.
 75 |         // number:     	3.05056571960449225526e-05	0x3efffccccccccccd
 76 |         // 0011111011111111111111001100110011001100110011001100110011001101
 77 |         // recoverd_sf:	3.05175781250000000000e-05	0x3f00000000000000
 78 |         // 0011111100000000000000000000000000000000000000000000000000000000
 79 |         ::testing::make_tuple(0x3efffccccccccccd, 0x3f00000000000000),
 80 | 
 81 |         // Super ugly (and rare) case! A very small sub-normal number requires
 82 |         // the mantissa bits almost to be moved out of the storage range.
 83 |         // Henceforth, the correct rounding should go either up / or down. In
 84 |         // that case, the rounding depends on the hidden 1, not explicitly
 85 |         // stored in the mantissa. (BUGFIX) add the hidden one, perform correct
 86 |         // rounding and go furhter in the routine. That requires to change the
 87 |         // exponent due rounding at a later point.
 88 |         // number:     	4.76837158203125026470e-08	0x3e6999999999999a
 89 |         // 0011111001101001100110011001100110011001100110011001100110011010
 90 |         // recoverd_sf:	5.96046447753906250000e-08	0x3e70000000000000
 91 |         // 0011111001110000000000000000000000000000000000000000000000000000
 92 |         ::testing::make_tuple(0x3e6999999999999a, 0x3e70000000000000),
 93 | 
 94 |         // A number smaller than the smallest subnormal, requires a full flush
 95 |         // to 0.
 96 |         // number:     	2.38418579101562513235e-08	0x3e5999999999999a
 97 |         // 0011111001011001100110011001100110011001100110011001100110011010
 98 |         // recoverd_sf:	0.00000000000000000000e+00	0x0000000000000000
 99 |         // 0000000000000000000000000000000000000000000000000000000000000000
100 |         ::testing::make_tuple(0x3e5999999999999a, 0x0000000000000000)));
101 | 
102 | 
103 | INSTANTIATE_TEST_CASE_P(
104 |     // Cases showing failuers during a float - Half - float brute force test!
105 |     TestWithParameters_manual_sampels_002, MyTest,
106 |     testing::Values(
107 |         // number:     	1.58346726468704329014e-43	0x370c400000000000
108 |         // 0011011100001100010000000000000000000000000000000000000000000000
109 |         // recoverd_sf:	0.00000000000000000000e+00	0x0000000000000000
110 |         // 0000000000000000000000000000000000000000000000000000000000000000
111 |         ::testing::make_tuple(0x370c400000000000, 0x0000000000000000),
112 |         // number:     	4.88279038108885288239e-04	0x3f3ffff680000000
113 |         // 0011111100111111111111111111011010000000000000000000000000000000
114 |         // recoverd_sf:	4.88281250000000000000e-04	0x3f40000000000000
115 |         // 0011111101000000000000000000000000000000000000000000000000000000
116 |         ::testing::make_tuple(0x3f3ffff680000000, 0x3f40000000000000),
117 |         // number:     	6.60882968750000000000e+04	0x40f02284c0000000
118 |         // 0100000011110000001000101000010011000000000000000000000000000000
119 |         // recoverd_sf:	inf	0x7ff0000000000000
120 |         // 0111111111110000000000000000000000000000000000000000000000000000
121 |         ::testing::make_tuple(0x40f02284c0000000, 0x7ff0000000000000),
122 |         // number:     	nan	0x7ff80e1780000000
123 |         // 0111111111111000000011100001011110000000000000000000000000000000
124 |         // recoverd_sf:	nan	0x7ff80c0000000000
125 |         // 0111111111111000000011000000000000000000000000000000000000000000
126 |         ::testing::make_tuple(0x7ff80e1780000000, 0x7ff80c0000000000),
127 |         // number:     	nan	0x7ff80eeeeeeeeeee
128 |         // 0111111111111000000011101110111011101110111011101110111011101110
129 |         // recoverd_sf:	nan	0x7ff80c0000000000
130 |         // 0111111111111000000011000000000000000000000000000000000000000000
131 |         ::testing::make_tuple(0x7ff80eeeeeeeeeee, 0x7ff80c0000000000),
132 |         // number:     	-2.98454239100465201773e-08	0xbe6005ec80000000
133 |         // 1011111001100000000001011110110010000000000000000000000000000000
134 |         // recoverd_sf:	-5.96046447753906250000e-08	0xbe70000000000000
135 |         // 1011111001110000000000000000000000000000000000000000000000000000
136 |         ::testing::make_tuple(0xbe6005ec80000000, 0xbe70000000000000)));
137 | 
138 | 
139 | INSTANTIATE_TEST_CASE_P(
140 |     TestWithParametersC1, MyTest,
141 |     testing::Values(
142 |         // number:     	1.00000000000000000000e+00	0x3ff0000000000000
143 |         // 0011111111110000000000000000000000000000000000000000000000000000
144 |         // recoverd_sf:	1.00000000000000000000e+00	0x3ff0000000000000
145 |         // 0011111111110000000000000000000000000000000000000000000000000000
146 |         ::testing::make_tuple(0x3ff0000000000000, 0x3ff0000000000000),
147 |         // number:     	5.00000000000000000000e-01	0x3fe0000000000000
148 |         // 0011111111100000000000000000000000000000000000000000000000000000
149 |         // recoverd_sf:	5.00000000000000000000e-01	0x3fe0000000000000
150 |         // 0011111111100000000000000000000000000000000000000000000000000000
151 |         ::testing::make_tuple(0x3fe0000000000000, 0x3fe0000000000000),
152 |         // number:     	3.33333333333333314830e-01	0x3fd5555555555555
153 |         // 0011111111010101010101010101010101010101010101010101010101010101
154 |         // recoverd_sf:	3.33251953125000000000e-01	0x3fd5540000000000
155 |         // 0011111111010101010101000000000000000000000000000000000000000000
156 |         ::testing::make_tuple(0x3fd5555555555555, 0x3fd5540000000000),
157 |         // number:     	2.50000000000000000000e-01	0x3fd0000000000000
158 |         // 0011111111010000000000000000000000000000000000000000000000000000
159 |         // recoverd_sf:	2.50000000000000000000e-01	0x3fd0000000000000
160 |         // 0011111111010000000000000000000000000000000000000000000000000000
161 |         ::testing::make_tuple(0x3fd0000000000000, 0x3fd0000000000000),
162 |         // number:     	2.00000000000000011102e-01	0x3fc999999999999a
163 |         // 0011111111001001100110011001100110011001100110011001100110011010
164 |         // recoverd_sf:	1.99951171875000000000e-01	0x3fc9980000000000
165 |         // 0011111111001001100110000000000000000000000000000000000000000000
166 |         ::testing::make_tuple(0x3fc999999999999a, 0x3fc9980000000000),
167 |         // number:     	1.66666666666666657415e-01	0x3fc5555555555555
168 |         // 0011111111000101010101010101010101010101010101010101010101010101
169 |         // recoverd_sf:	1.66625976562500000000e-01	0x3fc5540000000000
170 |         // 0011111111000101010101000000000000000000000000000000000000000000
171 |         ::testing::make_tuple(0x3fc5555555555555, 0x3fc5540000000000),
172 |         // number:     	1.42857142857142849213e-01	0x3fc2492492492492
173 |         // 0011111111000010010010010010010010010010010010010010010010010010
174 |         // recoverd_sf:	1.42822265625000000000e-01	0x3fc2480000000000
175 |         // 0011111111000010010010000000000000000000000000000000000000000000
176 |         ::testing::make_tuple(0x3fc2492492492492, 0x3fc2480000000000),
177 |         // number:     	1.25000000000000000000e-01	0x3fc0000000000000
178 |         // 0011111111000000000000000000000000000000000000000000000000000000
179 |         // recoverd_sf:	1.25000000000000000000e-01	0x3fc0000000000000
180 |         // 0011111111000000000000000000000000000000000000000000000000000000
181 |         ::testing::make_tuple(0x3fc0000000000000, 0x3fc0000000000000),
182 |         // number:     	1.11111111111111104943e-01	0x3fbc71c71c71c71c
183 |         // 0011111110111100011100011100011100011100011100011100011100011100
184 |         // recoverd_sf:	1.11083984375000000000e-01	0x3fbc700000000000
185 |         // 0011111110111100011100000000000000000000000000000000000000000000
186 |         ::testing::make_tuple(0x3fbc71c71c71c71c, 0x3fbc700000000000)));
187 | 
188 | // Brute force snippsets from extracted from softlow
189 | INSTANTIATE_TEST_CASE_P(
190 |     TestWithParameters_BF_001, MyTest,
191 |     testing::Values(
192 |         // start: 0
193 |         // stop:  50
194 |         // inc:   1
195 |         ::testing::make_tuple(0x0000000000000000, 0x0000000000000000),
196 |         ::testing::make_tuple(0x36a0000000000000, 0x0000000000000000),
197 |         ::testing::make_tuple(0x36b0000000000000, 0x0000000000000000),
198 |         ::testing::make_tuple(0x36b8000000000000, 0x0000000000000000),
199 |         ::testing::make_tuple(0x36c0000000000000, 0x0000000000000000),
200 |         ::testing::make_tuple(0x36c4000000000000, 0x0000000000000000),
201 |         ::testing::make_tuple(0x36c8000000000000, 0x0000000000000000),
202 |         ::testing::make_tuple(0x36cc000000000000, 0x0000000000000000),
203 |         ::testing::make_tuple(0x36d0000000000000, 0x0000000000000000),
204 |         ::testing::make_tuple(0x36d2000000000000, 0x0000000000000000),
205 |         ::testing::make_tuple(0x36d4000000000000, 0x0000000000000000),
206 |         ::testing::make_tuple(0x36d6000000000000, 0x0000000000000000),
207 |         ::testing::make_tuple(0x36d8000000000000, 0x0000000000000000),
208 |         ::testing::make_tuple(0x36da000000000000, 0x0000000000000000),
209 |         ::testing::make_tuple(0x36dc000000000000, 0x0000000000000000),
210 |         ::testing::make_tuple(0x36de000000000000, 0x0000000000000000),
211 |         ::testing::make_tuple(0x36e0000000000000, 0x0000000000000000),
212 |         ::testing::make_tuple(0x36e1000000000000, 0x0000000000000000),
213 |         ::testing::make_tuple(0x36e2000000000000, 0x0000000000000000),
214 |         ::testing::make_tuple(0x36e3000000000000, 0x0000000000000000),
215 |         ::testing::make_tuple(0x36e4000000000000, 0x0000000000000000),
216 |         ::testing::make_tuple(0x36e5000000000000, 0x0000000000000000),
217 |         ::testing::make_tuple(0x36e6000000000000, 0x0000000000000000),
218 |         ::testing::make_tuple(0x36e7000000000000, 0x0000000000000000),
219 |         ::testing::make_tuple(0x36e8000000000000, 0x0000000000000000),
220 |         ::testing::make_tuple(0x36e9000000000000, 0x0000000000000000),
221 |         ::testing::make_tuple(0x36ea000000000000, 0x0000000000000000),
222 |         ::testing::make_tuple(0x36eb000000000000, 0x0000000000000000),
223 |         ::testing::make_tuple(0x36ec000000000000, 0x0000000000000000),
224 |         ::testing::make_tuple(0x36ed000000000000, 0x0000000000000000),
225 |         ::testing::make_tuple(0x36ee000000000000, 0x0000000000000000),
226 |         ::testing::make_tuple(0x36ef000000000000, 0x0000000000000000),
227 |         ::testing::make_tuple(0x36f0000000000000, 0x0000000000000000),
228 |         ::testing::make_tuple(0x36f0800000000000, 0x0000000000000000),
229 |         ::testing::make_tuple(0x36f1000000000000, 0x0000000000000000),
230 |         ::testing::make_tuple(0x36f1800000000000, 0x0000000000000000),
231 |         ::testing::make_tuple(0x36f2000000000000, 0x0000000000000000),
232 |         ::testing::make_tuple(0x36f2800000000000, 0x0000000000000000),
233 |         ::testing::make_tuple(0x36f3000000000000, 0x0000000000000000),
234 |         ::testing::make_tuple(0x36f3800000000000, 0x0000000000000000),
235 |         ::testing::make_tuple(0x36f4000000000000, 0x0000000000000000),
236 |         ::testing::make_tuple(0x36f4800000000000, 0x0000000000000000),
237 |         ::testing::make_tuple(0x36f5000000000000, 0x0000000000000000),
238 |         ::testing::make_tuple(0x36f5800000000000, 0x0000000000000000),
239 |         ::testing::make_tuple(0x36f6000000000000, 0x0000000000000000),
240 |         ::testing::make_tuple(0x36f6800000000000, 0x0000000000000000),
241 |         ::testing::make_tuple(0x36f7000000000000, 0x0000000000000000),
242 |         ::testing::make_tuple(0x36f7800000000000, 0x0000000000000000),
243 |         ::testing::make_tuple(0x36f8000000000000, 0x0000000000000000),
244 |         ::testing::make_tuple(0x36f8800000000000, 0x0000000000000000)));
245 | 
246 | INSTANTIATE_TEST_CASE_P(
247 |     TestWithParameters_BF_002, MyTest,
248 |     testing::Values(
249 |         // start: 97495757619
250 |         // stop:  97495757669
251 |         // inc:   1
252 |         ::testing::make_tuple(0xbe66666660000000, 0xbe70000000000000),
253 |         ::testing::make_tuple(0xbe66666680000000, 0xbe70000000000000),
254 |         ::testing::make_tuple(0xbe666666a0000000, 0xbe70000000000000),
255 |         ::testing::make_tuple(0xbe666666c0000000, 0xbe70000000000000),
256 |         ::testing::make_tuple(0xbe666666e0000000, 0xbe70000000000000),
257 |         ::testing::make_tuple(0xbe66666700000000, 0xbe70000000000000),
258 |         ::testing::make_tuple(0xbe66666720000000, 0xbe70000000000000),
259 |         ::testing::make_tuple(0xbe66666740000000, 0xbe70000000000000),
260 |         ::testing::make_tuple(0xbe66666760000000, 0xbe70000000000000),
261 |         ::testing::make_tuple(0xbe66666780000000, 0xbe70000000000000),
262 |         ::testing::make_tuple(0xbe666667a0000000, 0xbe70000000000000),
263 |         ::testing::make_tuple(0xbe666667c0000000, 0xbe70000000000000),
264 |         ::testing::make_tuple(0xbe666667e0000000, 0xbe70000000000000),
265 |         ::testing::make_tuple(0xbe66666800000000, 0xbe70000000000000),
266 |         ::testing::make_tuple(0xbe66666820000000, 0xbe70000000000000),
267 |         ::testing::make_tuple(0xbe66666840000000, 0xbe70000000000000),
268 |         ::testing::make_tuple(0xbe66666860000000, 0xbe70000000000000),
269 |         ::testing::make_tuple(0xbe66666880000000, 0xbe70000000000000),
270 |         ::testing::make_tuple(0xbe666668a0000000, 0xbe70000000000000),
271 |         ::testing::make_tuple(0xbe666668c0000000, 0xbe70000000000000),
272 |         ::testing::make_tuple(0xbe666668e0000000, 0xbe70000000000000),
273 |         ::testing::make_tuple(0xbe66666900000000, 0xbe70000000000000),
274 |         ::testing::make_tuple(0xbe66666920000000, 0xbe70000000000000),
275 |         ::testing::make_tuple(0xbe66666940000000, 0xbe70000000000000),
276 |         ::testing::make_tuple(0xbe66666960000000, 0xbe70000000000000),
277 |         ::testing::make_tuple(0xbe66666980000000, 0xbe70000000000000),
278 |         ::testing::make_tuple(0xbe666669a0000000, 0xbe70000000000000),
279 |         ::testing::make_tuple(0xbe666669c0000000, 0xbe70000000000000),
280 |         ::testing::make_tuple(0xbe666669e0000000, 0xbe70000000000000),
281 |         ::testing::make_tuple(0xbe66666a00000000, 0xbe70000000000000),
282 |         ::testing::make_tuple(0xbe66666a20000000, 0xbe70000000000000),
283 |         ::testing::make_tuple(0xbe66666a40000000, 0xbe70000000000000),
284 |         ::testing::make_tuple(0xbe66666a60000000, 0xbe70000000000000),
285 |         ::testing::make_tuple(0xbe66666a80000000, 0xbe70000000000000),
286 |         ::testing::make_tuple(0xbe66666aa0000000, 0xbe70000000000000),
287 |         ::testing::make_tuple(0xbe66666ac0000000, 0xbe70000000000000),
288 |         ::testing::make_tuple(0xbe66666ae0000000, 0xbe70000000000000),
289 |         ::testing::make_tuple(0xbe66666b00000000, 0xbe70000000000000),
290 |         ::testing::make_tuple(0xbe66666b20000000, 0xbe70000000000000),
291 |         ::testing::make_tuple(0xbe66666b40000000, 0xbe70000000000000),
292 |         ::testing::make_tuple(0xbe66666b60000000, 0xbe70000000000000),
293 |         ::testing::make_tuple(0xbe66666b80000000, 0xbe70000000000000),
294 |         ::testing::make_tuple(0xbe66666ba0000000, 0xbe70000000000000),
295 |         ::testing::make_tuple(0xbe66666bc0000000, 0xbe70000000000000),
296 |         ::testing::make_tuple(0xbe66666be0000000, 0xbe70000000000000),
297 |         ::testing::make_tuple(0xbe66666c00000000, 0xbe70000000000000),
298 |         ::testing::make_tuple(0xbe66666c20000000, 0xbe70000000000000),
299 |         ::testing::make_tuple(0xbe66666c40000000, 0xbe70000000000000),
300 |         ::testing::make_tuple(0xbe66666c60000000, 0xbe70000000000000),
301 |         ::testing::make_tuple(0xbe66666c80000000, 0xbe70000000000000)));
302 | 
303 | INSTANTIATE_TEST_CASE_P(
304 |     TestWithParameters_BF_003, MyTest,
305 |     testing::Values(
306 |         // start: 214318868070
307 |         // stop:  214318868120
308 |         // inc:   1
309 |         ::testing::make_tuple(0xc4ccccccc0000000, 0xfff0000000000000),
310 |         ::testing::make_tuple(0xc4cccccce0000000, 0xfff0000000000000),
311 |         ::testing::make_tuple(0xc4cccccd00000000, 0xfff0000000000000),
312 |         ::testing::make_tuple(0xc4cccccd20000000, 0xfff0000000000000),
313 |         ::testing::make_tuple(0xc4cccccd40000000, 0xfff0000000000000),
314 |         ::testing::make_tuple(0xc4cccccd60000000, 0xfff0000000000000),
315 |         ::testing::make_tuple(0xc4cccccd80000000, 0xfff0000000000000),
316 |         ::testing::make_tuple(0xc4cccccda0000000, 0xfff0000000000000),
317 |         ::testing::make_tuple(0xc4cccccdc0000000, 0xfff0000000000000),
318 |         ::testing::make_tuple(0xc4cccccde0000000, 0xfff0000000000000),
319 |         ::testing::make_tuple(0xc4ccccce00000000, 0xfff0000000000000),
320 |         ::testing::make_tuple(0xc4ccccce20000000, 0xfff0000000000000),
321 |         ::testing::make_tuple(0xc4ccccce40000000, 0xfff0000000000000),
322 |         ::testing::make_tuple(0xc4ccccce60000000, 0xfff0000000000000),
323 |         ::testing::make_tuple(0xc4ccccce80000000, 0xfff0000000000000),
324 |         ::testing::make_tuple(0xc4cccccea0000000, 0xfff0000000000000),
325 |         ::testing::make_tuple(0xc4cccccec0000000, 0xfff0000000000000),
326 |         ::testing::make_tuple(0xc4cccccee0000000, 0xfff0000000000000),
327 |         ::testing::make_tuple(0xc4cccccf00000000, 0xfff0000000000000),
328 |         ::testing::make_tuple(0xc4cccccf20000000, 0xfff0000000000000),
329 |         ::testing::make_tuple(0xc4cccccf40000000, 0xfff0000000000000),
330 |         ::testing::make_tuple(0xc4cccccf60000000, 0xfff0000000000000),
331 |         ::testing::make_tuple(0xc4cccccf80000000, 0xfff0000000000000),
332 |         ::testing::make_tuple(0xc4cccccfa0000000, 0xfff0000000000000),
333 |         ::testing::make_tuple(0xc4cccccfc0000000, 0xfff0000000000000),
334 |         ::testing::make_tuple(0xc4cccccfe0000000, 0xfff0000000000000),
335 |         ::testing::make_tuple(0xc4ccccd000000000, 0xfff0000000000000),
336 |         ::testing::make_tuple(0xc4ccccd020000000, 0xfff0000000000000),
337 |         ::testing::make_tuple(0xc4ccccd040000000, 0xfff0000000000000),
338 |         ::testing::make_tuple(0xc4ccccd060000000, 0xfff0000000000000),
339 |         ::testing::make_tuple(0xc4ccccd080000000, 0xfff0000000000000),
340 |         ::testing::make_tuple(0xc4ccccd0a0000000, 0xfff0000000000000),
341 |         ::testing::make_tuple(0xc4ccccd0c0000000, 0xfff0000000000000),
342 |         ::testing::make_tuple(0xc4ccccd0e0000000, 0xfff0000000000000),
343 |         ::testing::make_tuple(0xc4ccccd100000000, 0xfff0000000000000),
344 |         ::testing::make_tuple(0xc4ccccd120000000, 0xfff0000000000000),
345 |         ::testing::make_tuple(0xc4ccccd140000000, 0xfff0000000000000),
346 |         ::testing::make_tuple(0xc4ccccd160000000, 0xfff0000000000000),
347 |         ::testing::make_tuple(0xc4ccccd180000000, 0xfff0000000000000),
348 |         ::testing::make_tuple(0xc4ccccd1a0000000, 0xfff0000000000000),
349 |         ::testing::make_tuple(0xc4ccccd1c0000000, 0xfff0000000000000),
350 |         ::testing::make_tuple(0xc4ccccd1e0000000, 0xfff0000000000000),
351 |         ::testing::make_tuple(0xc4ccccd200000000, 0xfff0000000000000),
352 |         ::testing::make_tuple(0xc4ccccd220000000, 0xfff0000000000000),
353 |         ::testing::make_tuple(0xc4ccccd240000000, 0xfff0000000000000),
354 |         ::testing::make_tuple(0xc4ccccd260000000, 0xfff0000000000000),
355 |         ::testing::make_tuple(0xc4ccccd280000000, 0xfff0000000000000),
356 |         ::testing::make_tuple(0xc4ccccd2a0000000, 0xfff0000000000000),
357 |         ::testing::make_tuple(0xc4ccccd2c0000000, 0xfff0000000000000),
358 |         ::testing::make_tuple(0xc4ccccd2e0000000, 0xfff0000000000000)));
359 | 
360 | INSTANTIATE_TEST_CASE_P(
361 |     TestWithParameters_BF_004, MyTest,
362 |     testing::Values(
363 |         // start: 429492434632
364 |         // stop:  429492434682
365 |         // inc:   1
366 |         ::testing::make_tuple(0xffffced900000000, 0xffffcc0000000000),
367 |         ::testing::make_tuple(0xffffced920000000, 0xffffcc0000000000),
368 |         ::testing::make_tuple(0xffffced940000000, 0xffffcc0000000000),
369 |         ::testing::make_tuple(0xffffced960000000, 0xffffcc0000000000),
370 |         ::testing::make_tuple(0xffffced980000000, 0xffffcc0000000000),
371 |         ::testing::make_tuple(0xffffced9a0000000, 0xffffcc0000000000),
372 |         ::testing::make_tuple(0xffffced9c0000000, 0xffffcc0000000000),
373 |         ::testing::make_tuple(0xffffced9e0000000, 0xffffcc0000000000),
374 |         ::testing::make_tuple(0xffffceda00000000, 0xffffcc0000000000),
375 |         ::testing::make_tuple(0xffffceda20000000, 0xffffcc0000000000),
376 |         ::testing::make_tuple(0xffffceda40000000, 0xffffcc0000000000),
377 |         ::testing::make_tuple(0xffffceda60000000, 0xffffcc0000000000),
378 |         ::testing::make_tuple(0xffffceda80000000, 0xffffcc0000000000),
379 |         ::testing::make_tuple(0xffffcedaa0000000, 0xffffcc0000000000),
380 |         ::testing::make_tuple(0xffffcedac0000000, 0xffffcc0000000000),
381 |         ::testing::make_tuple(0xffffcedae0000000, 0xffffcc0000000000),
382 |         ::testing::make_tuple(0xffffcedb00000000, 0xffffcc0000000000),
383 |         ::testing::make_tuple(0xffffcedb20000000, 0xffffcc0000000000),
384 |         ::testing::make_tuple(0xffffcedb40000000, 0xffffcc0000000000),
385 |         ::testing::make_tuple(0xffffcedb60000000, 0xffffcc0000000000),
386 |         ::testing::make_tuple(0xffffcedb80000000, 0xffffcc0000000000),
387 |         ::testing::make_tuple(0xffffcedba0000000, 0xffffcc0000000000),
388 |         ::testing::make_tuple(0xffffcedbc0000000, 0xffffcc0000000000),
389 |         ::testing::make_tuple(0xffffcedbe0000000, 0xffffcc0000000000),
390 |         ::testing::make_tuple(0xffffcedc00000000, 0xffffcc0000000000),
391 |         ::testing::make_tuple(0xffffcedc20000000, 0xffffcc0000000000),
392 |         ::testing::make_tuple(0xffffcedc40000000, 0xffffcc0000000000),
393 |         ::testing::make_tuple(0xffffcedc60000000, 0xffffcc0000000000),
394 |         ::testing::make_tuple(0xffffcedc80000000, 0xffffcc0000000000),
395 |         ::testing::make_tuple(0xffffcedca0000000, 0xffffcc0000000000),
396 |         ::testing::make_tuple(0xffffcedcc0000000, 0xffffcc0000000000),
397 |         ::testing::make_tuple(0xffffcedce0000000, 0xffffcc0000000000),
398 |         ::testing::make_tuple(0xffffcedd00000000, 0xffffcc0000000000),
399 |         ::testing::make_tuple(0xffffcedd20000000, 0xffffcc0000000000),
400 |         ::testing::make_tuple(0xffffcedd40000000, 0xffffcc0000000000),
401 |         ::testing::make_tuple(0xffffcedd60000000, 0xffffcc0000000000),
402 |         ::testing::make_tuple(0xffffcedd80000000, 0xffffcc0000000000),
403 |         ::testing::make_tuple(0xffffcedda0000000, 0xffffcc0000000000),
404 |         ::testing::make_tuple(0xffffceddc0000000, 0xffffcc0000000000),
405 |         ::testing::make_tuple(0xffffcedde0000000, 0xffffcc0000000000),
406 |         ::testing::make_tuple(0xffffcede00000000, 0xffffcc0000000000),
407 |         ::testing::make_tuple(0xffffcede20000000, 0xffffcc0000000000),
408 |         ::testing::make_tuple(0xffffcede40000000, 0xffffcc0000000000),
409 |         ::testing::make_tuple(0xffffcede60000000, 0xffffcc0000000000),
410 |         ::testing::make_tuple(0xffffcede80000000, 0xffffcc0000000000),
411 |         ::testing::make_tuple(0xffffcedea0000000, 0xffffcc0000000000),
412 |         ::testing::make_tuple(0xffffcedec0000000, 0xffffcc0000000000),
413 |         ::testing::make_tuple(0xffffcedee0000000, 0xffffcc0000000000),
414 |         ::testing::make_tuple(0xffffcedf00000000, 0xffffcc0000000000),
415 |         ::testing::make_tuple(0xffffcedf20000000, 0xffffcc0000000000)));
416 | 
417 | 
418 | INSTANTIATE_TEST_CASE_P(
419 |     TestWithParameters_BF_005, MyTest,
420 |     testing::Values(
421 |         // start: 97495757619
422 |         // stop:  97548186419
423 |         // inc:   1048576
424 |         ::testing::make_tuple(0xbe66666660000000, 0xbe70000000000000),
425 |         ::testing::make_tuple(0xbe68666660000000, 0xbe70000000000000),
426 |         ::testing::make_tuple(0xbe6a666660000000, 0xbe70000000000000),
427 |         ::testing::make_tuple(0xbe6c666660000000, 0xbe70000000000000),
428 |         ::testing::make_tuple(0xbe6e666660000000, 0xbe70000000000000),
429 |         ::testing::make_tuple(0xbe70666660000000, 0xbe70000000000000),
430 |         ::testing::make_tuple(0xbe72666660000000, 0xbe70000000000000),
431 |         ::testing::make_tuple(0xbe74666660000000, 0xbe70000000000000),
432 |         ::testing::make_tuple(0xbe76666660000000, 0xbe70000000000000),
433 |         ::testing::make_tuple(0xbe78666660000000, 0xbe80000000000000),
434 |         ::testing::make_tuple(0xbe7a666660000000, 0xbe80000000000000),
435 |         ::testing::make_tuple(0xbe7c666660000000, 0xbe80000000000000),
436 |         ::testing::make_tuple(0xbe7e666660000000, 0xbe80000000000000),
437 |         ::testing::make_tuple(0xbe80666660000000, 0xbe80000000000000),
438 |         ::testing::make_tuple(0xbe82666660000000, 0xbe80000000000000),
439 |         ::testing::make_tuple(0xbe84666660000000, 0xbe88000000000000),
440 |         ::testing::make_tuple(0xbe86666660000000, 0xbe88000000000000),
441 |         ::testing::make_tuple(0xbe88666660000000, 0xbe88000000000000),
442 |         ::testing::make_tuple(0xbe8a666660000000, 0xbe88000000000000),
443 |         ::testing::make_tuple(0xbe8c666660000000, 0xbe90000000000000),
444 |         ::testing::make_tuple(0xbe8e666660000000, 0xbe90000000000000),
445 |         ::testing::make_tuple(0xbe90666660000000, 0xbe90000000000000),
446 |         ::testing::make_tuple(0xbe92666660000000, 0xbe94000000000000),
447 |         ::testing::make_tuple(0xbe94666660000000, 0xbe94000000000000),
448 |         ::testing::make_tuple(0xbe96666660000000, 0xbe98000000000000),
449 |         ::testing::make_tuple(0xbe98666660000000, 0xbe98000000000000),
450 |         ::testing::make_tuple(0xbe9a666660000000, 0xbe9c000000000000),
451 |         ::testing::make_tuple(0xbe9c666660000000, 0xbe9c000000000000),
452 |         ::testing::make_tuple(0xbe9e666660000000, 0xbea0000000000000),
453 |         ::testing::make_tuple(0xbea0666660000000, 0xbea0000000000000),
454 |         ::testing::make_tuple(0xbea2666660000000, 0xbea2000000000000),
455 |         ::testing::make_tuple(0xbea4666660000000, 0xbea4000000000000),
456 |         ::testing::make_tuple(0xbea6666660000000, 0xbea6000000000000),
457 |         ::testing::make_tuple(0xbea8666660000000, 0xbea8000000000000),
458 |         ::testing::make_tuple(0xbeaa666660000000, 0xbeaa000000000000),
459 |         ::testing::make_tuple(0xbeac666660000000, 0xbeac000000000000),
460 |         ::testing::make_tuple(0xbeae666660000000, 0xbeae000000000000),
461 |         ::testing::make_tuple(0xbeb0666660000000, 0xbeb0000000000000),
462 |         ::testing::make_tuple(0xbeb2666660000000, 0xbeb2000000000000),
463 |         ::testing::make_tuple(0xbeb4666660000000, 0xbeb4000000000000),
464 |         ::testing::make_tuple(0xbeb6666660000000, 0xbeb6000000000000),
465 |         ::testing::make_tuple(0xbeb8666660000000, 0xbeb8000000000000),
466 |         ::testing::make_tuple(0xbeba666660000000, 0xbeba000000000000),
467 |         ::testing::make_tuple(0xbebc666660000000, 0xbebc000000000000),
468 |         ::testing::make_tuple(0xbebe666660000000, 0xbebe000000000000),
469 |         ::testing::make_tuple(0xbec0666660000000, 0xbec0800000000000),
470 |         ::testing::make_tuple(0xbec2666660000000, 0xbec2800000000000),
471 |         ::testing::make_tuple(0xbec4666660000000, 0xbec4800000000000),
472 |         ::testing::make_tuple(0xbec6666660000000, 0xbec6800000000000),
473 |         ::testing::make_tuple(0xbec8666660000000, 0xbec8800000000000)));
474 | 


--------------------------------------------------------------------------------
/testx/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | function(create_test test_name)
 2 |     add_executable(${test_name} ${test_name}.cpp)
 3 |     target_link_libraries(${test_name} PRIVATE floatx gtest_main)
 4 |     file(RELATIVE_PATH REL_BINARY_DIR
 5 |          ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
 6 |     add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${test_name})
 7 | endfunction(create_test)
 8 | 
 9 | create_test(add_000)
10 | create_test(sub_000)
11 | create_test(mul_000)
12 | create_test(div_000)
13 | 


--------------------------------------------------------------------------------
/third_party/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include(package_loader.cmake)
 2 | 
 3 | if(BUILD_TESTS)
 4 |     add_subdirectory(gtest)
 5 | endif()
 6 | 
 7 | if(DEVEL_TOOLS)
 8 |     add_subdirectory(git-cmake-format)
 9 | endif()
10 | 


--------------------------------------------------------------------------------
/third_party/DownloadCMakeLists.txt.in:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | include(ExternalProject)
 4 | ExternalProject_Add(${package_name}
 5 |     GIT_REPOSITORY    "${package_url}"
 6 |     GIT_TAG           "${package_tag}"
 7 |     SOURCE_DIR        "${CMAKE_CURRENT_BINARY_DIR}/src"
 8 |     BINARY_DIR        "${CMAKE_CURRENT_BINARY_DIR}/build"
 9 |     CONFIGURE_COMMAND ""
10 |     BUILD_COMMAND     ""
11 |     INSTALL_COMMAND   ""
12 |     TEST_COMMAND      ""
13 | )
14 | 


--------------------------------------------------------------------------------
/third_party/git-cmake-format/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | load_git_package(git-cmake-format
2 |     "https://github.com/kbenzie/git-cmake-format.git"
3 |     "master")
4 | 
5 | 


--------------------------------------------------------------------------------
/third_party/gtest/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Prevent overriding the parent project's compiler/linker
2 | # settings on Windows
3 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
4 | 
5 | load_git_package(gtest
6 |     "https://github.com/google/googletest.git"
7 |     "master")
8 | 
9 | 


--------------------------------------------------------------------------------
/third_party/package_loader.cmake:
--------------------------------------------------------------------------------
 1 | set(PACKAGE_DOWNLOADER_SCRIPT
 2 |     "${CMAKE_CURRENT_LIST_DIR}/DownloadCMakeLists.txt.in")
 3 | 
 4 | function(load_git_package package_name package_url package_tag)
 5 |     # Download and unpack package at configure time
 6 |     configure_file(${PACKAGE_DOWNLOADER_SCRIPT}
 7 |                    download/CMakeLists.txt)
 8 |     execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
 9 |       RESULT_VARIABLE result
10 |       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/download)
11 |     if(result)
12 |       message(FATAL_ERROR
13 |         "CMake step for ${package_name}/download failed: ${result}")
14 |     endif()
15 |     execute_process(COMMAND ${CMAKE_COMMAND} --build .
16 |       RESULT_VARIABLE result
17 |       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/download)
18 |     if(result)
19 |       message(FATAL_ERROR
20 |         "Build step for ${package_name}/download failed: ${result}")
21 |     endif()
22 | 
23 |     # Add package to the build
24 |     add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/src
25 |                      ${CMAKE_CURRENT_BINARY_DIR}/build)
26 | endfunction(load_git_package)
27 | 


--------------------------------------------------------------------------------