├── .github └── workflows │ ├── deploy_docs.yml │ ├── run_c_tests.yml │ └── run_octave_tests.yml ├── .gitmodules ├── LICENSE.md ├── Makefile ├── README.md ├── docs ├── Doxyfile-project └── source │ ├── conf.py │ ├── index.rst │ └── readme.rst ├── examples └── example_manuscript.c ├── license.spdx ├── mex ├── cpfloat.c ├── cpfloat.m ├── cpfloat_autotune.m ├── cpfloat_compile.m └── cpfloat_compile_nomake.m ├── src ├── cpfloat_autotune.c ├── cpfloat_binary32.h ├── cpfloat_binary64.h ├── cpfloat_definitions.h ├── cpfloat_docmacros.h ├── cpfloat_template.h ├── cpfloat_threshold_binary32.h └── cpfloat_threshold_binary64.h ├── test ├── cpfloat_test.m └── cpfloat_test.ts └── util └── generate_spdx.sh /.github/workflows/deploy_docs.yml: -------------------------------------------------------------------------------- 1 | name: documentation 2 | on: 3 | push: 4 | jobs: 5 | build-and-deploy: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/setup-python@v5 9 | - uses: actions/checkout@master 10 | 11 | - name: Install and Build 12 | run: | 13 | sudo apt install -y doxygen graphviz python3-sphinx python3-breathe python3-sphinx-rtd-theme python3-breathe python3-exhale python3-myst-parser 14 | make docs 15 | 16 | - name: Deploy 17 | uses: JamesIves/github-pages-deploy-action@v4 18 | with: 19 | branch: gh-pages # The branch the action should deploy to. 20 | folder: docs/html -------------------------------------------------------------------------------- /.github/workflows/run_c_tests.yml: -------------------------------------------------------------------------------- 1 | name: run-c-tests 2 | on: 3 | push: 4 | jobs: 5 | build-and-run-c-tests: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Check out repository 9 | uses: actions/checkout@v4 10 | - name: Install check library 11 | run: | 12 | sudo apt update 13 | sudo apt install -y check 14 | - name: Build and run C tests 15 | run: make ctest 16 | - name: Build and run library tests 17 | run: make libtest -------------------------------------------------------------------------------- /.github/workflows/run_octave_tests.yml: -------------------------------------------------------------------------------- 1 | name: run-octave-tests 2 | on: 3 | push: 4 | jobs: 5 | build-and-run-octave-tests: 6 | runs-on: ubuntu-22.04 7 | steps: 8 | - name: Check out repository 9 | uses: actions/checkout@v4 10 | - name: Install Octave and corresponding packages and libraries 11 | run: | 12 | sudo apt update 13 | sudo apt install -y octave-parallel liboctave-dev 14 | curl https://master.dl.sourceforge.net/project/octave/Octave%20Forge%20Packages/Individual%20Package%20Releases/fenv-0.1.0.tar.gz?viasf=1 -o fenv.tar.gz 15 | octave --eval "pkg install fenv.tar.gz" 16 | - name: Build MEX interface for Octave 17 | run: make mexoct 18 | - name: Run tests for MEX interface in Octave 19 | run: make otest -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "include/pcg-c"] 2 | path = deps/pcg-c 3 | url = https://github.com/imneme/pcg-c.git 4 | [submodule "include/float_params"] 5 | path = deps/float_params 6 | url = https://github.com/higham/float_params.git 7 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | GNU Lesser General Public License 2 | ================================= 3 | 4 | _Version 2.1, February 1999_ 5 | _Copyright © 1991, 1999 Free Software Foundation, Inc._ 6 | _51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA_ 7 | 8 | Everyone is permitted to copy and distribute verbatim copies 9 | of this license document, but changing it is not allowed. 10 | 11 | _This is the first released version of the Lesser GPL. It also counts 12 | as the successor of the GNU Library Public License, version 2, hence 13 | the version number 2.1._ 14 | 15 | ### Preamble 16 | 17 | The licenses for most software are designed to take away your 18 | freedom to share and change it. By contrast, the GNU General Public 19 | Licenses are intended to guarantee your freedom to share and change 20 | free software--to make sure the software is free for all its users. 21 | 22 | This license, the Lesser General Public License, applies to some 23 | specially designated software packages--typically libraries--of the 24 | Free Software Foundation and other authors who decide to use it. You 25 | can use it too, but we suggest you first think carefully about whether 26 | this license or the ordinary General Public License is the better 27 | strategy to use in any particular case, based on the explanations below. 28 | 29 | When we speak of free software, we are referring to freedom of use, 30 | not price. Our General Public Licenses are designed to make sure that 31 | you have the freedom to distribute copies of free software (and charge 32 | for this service if you wish); that you receive source code or can get 33 | it if you want it; that you can change the software and use pieces of 34 | it in new free programs; and that you are informed that you can do 35 | these things. 36 | 37 | To protect your rights, we need to make restrictions that forbid 38 | distributors to deny you these rights or to ask you to surrender these 39 | rights. These restrictions translate to certain responsibilities for 40 | you if you distribute copies of the library or if you modify it. 41 | 42 | For example, if you distribute copies of the library, whether gratis 43 | or for a fee, you must give the recipients all the rights that we gave 44 | you. You must make sure that they, too, receive or can get the source 45 | code. If you link other code with the library, you must provide 46 | complete object files to the recipients, so that they can relink them 47 | with the library after making changes to the library and recompiling 48 | it. And you must show them these terms so they know their rights. 49 | 50 | We protect your rights with a two-step method: **(1)** we copyright the 51 | library, and **(2)** we offer you this license, which gives you legal 52 | permission to copy, distribute and/or modify the library. 53 | 54 | To protect each distributor, we want to make it very clear that 55 | there is no warranty for the free library. Also, if the library is 56 | modified by someone else and passed on, the recipients should know 57 | that what they have is not the original version, so that the original 58 | author's reputation will not be affected by problems that might be 59 | introduced by others. 60 | 61 | Finally, software patents pose a constant threat to the existence of 62 | any free program. We wish to make sure that a company cannot 63 | effectively restrict the users of a free program by obtaining a 64 | restrictive license from a patent holder. Therefore, we insist that 65 | any patent license obtained for a version of the library must be 66 | consistent with the full freedom of use specified in this license. 67 | 68 | Most GNU software, including some libraries, is covered by the 69 | ordinary GNU General Public License. This license, the GNU Lesser 70 | General Public License, applies to certain designated libraries, and 71 | is quite different from the ordinary General Public License. We use 72 | this license for certain libraries in order to permit linking those 73 | libraries into non-free programs. 74 | 75 | When a program is linked with a library, whether statically or using 76 | a shared library, the combination of the two is legally speaking a 77 | combined work, a derivative of the original library. The ordinary 78 | General Public License therefore permits such linking only if the 79 | entire combination fits its criteria of freedom. The Lesser General 80 | Public License permits more lax criteria for linking other code with 81 | the library. 82 | 83 | We call this license the “Lesser” General Public License because it 84 | does Less to protect the user's freedom than the ordinary General 85 | Public License. It also provides other free software developers Less 86 | of an advantage over competing non-free programs. These disadvantages 87 | are the reason we use the ordinary General Public License for many 88 | libraries. However, the Lesser license provides advantages in certain 89 | special circumstances. 90 | 91 | For example, on rare occasions, there may be a special need to 92 | encourage the widest possible use of a certain library, so that it becomes 93 | a de-facto standard. To achieve this, non-free programs must be 94 | allowed to use the library. A more frequent case is that a free 95 | library does the same job as widely used non-free libraries. In this 96 | case, there is little to gain by limiting the free library to free 97 | software only, so we use the Lesser General Public License. 98 | 99 | In other cases, permission to use a particular library in non-free 100 | programs enables a greater number of people to use a large body of 101 | free software. For example, permission to use the GNU C Library in 102 | non-free programs enables many more people to use the whole GNU 103 | operating system, as well as its variant, the GNU/Linux operating 104 | system. 105 | 106 | Although the Lesser General Public License is Less protective of the 107 | users' freedom, it does ensure that the user of a program that is 108 | linked with the Library has the freedom and the wherewithal to run 109 | that program using a modified version of the Library. 110 | 111 | The precise terms and conditions for copying, distribution and 112 | modification follow. Pay close attention to the difference between a 113 | “work based on the library” and a “work that uses the library”. The 114 | former contains code derived from the library, whereas the latter must 115 | be combined with the library in order to run. 116 | 117 | ### TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 118 | 119 | **0.** This License Agreement applies to any software library or other 120 | program which contains a notice placed by the copyright holder or 121 | other authorized party saying it may be distributed under the terms of 122 | this Lesser General Public License (also called “this License”). 123 | Each licensee is addressed as “you”. 124 | 125 | A “library” means a collection of software functions and/or data 126 | prepared so as to be conveniently linked with application programs 127 | (which use some of those functions and data) to form executables. 128 | 129 | The “Library”, below, refers to any such software library or work 130 | which has been distributed under these terms. A “work based on the 131 | Library” means either the Library or any derivative work under 132 | copyright law: that is to say, a work containing the Library or a 133 | portion of it, either verbatim or with modifications and/or translated 134 | straightforwardly into another language. (Hereinafter, translation is 135 | included without limitation in the term “modification”.) 136 | 137 | “Source code” for a work means the preferred form of the work for 138 | making modifications to it. For a library, complete source code means 139 | all the source code for all modules it contains, plus any associated 140 | interface definition files, plus the scripts used to control compilation 141 | and installation of the library. 142 | 143 | Activities other than copying, distribution and modification are not 144 | covered by this License; they are outside its scope. The act of 145 | running a program using the Library is not restricted, and output from 146 | such a program is covered only if its contents constitute a work based 147 | on the Library (independent of the use of the Library in a tool for 148 | writing it). Whether that is true depends on what the Library does 149 | and what the program that uses the Library does. 150 | 151 | **1.** You may copy and distribute verbatim copies of the Library's 152 | complete source code as you receive it, in any medium, provided that 153 | you conspicuously and appropriately publish on each copy an 154 | appropriate copyright notice and disclaimer of warranty; keep intact 155 | all the notices that refer to this License and to the absence of any 156 | warranty; and distribute a copy of this License along with the 157 | Library. 158 | 159 | You may charge a fee for the physical act of transferring a copy, 160 | and you may at your option offer warranty protection in exchange for a 161 | fee. 162 | 163 | **2.** You may modify your copy or copies of the Library or any portion 164 | of it, thus forming a work based on the Library, and copy and 165 | distribute such modifications or work under the terms of Section 1 166 | above, provided that you also meet all of these conditions: 167 | 168 | * **a)** The modified work must itself be a software library. 169 | * **b)** You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | * **c)** You must cause the whole of the work to be licensed at no 172 | charge to all third parties under the terms of this License. 173 | * **d)** If a facility in the modified Library refers to a function or a 174 | table of data to be supplied by an application program that uses 175 | the facility, other than as an argument passed when the facility 176 | is invoked, then you must make a good faith effort to ensure that, 177 | in the event an application does not supply such function or 178 | table, the facility still operates, and performs whatever part of 179 | its purpose remains meaningful. 180 | (For example, a function in a library to compute square roots has 181 | a purpose that is entirely well-defined independent of the 182 | application. Therefore, Subsection 2d requires that any 183 | application-supplied function or table used by this function must 184 | be optional: if the application does not supply it, the square 185 | root function must still compute square roots.) 186 | 187 | These requirements apply to the modified work as a whole. If 188 | identifiable sections of that work are not derived from the Library, 189 | and can be reasonably considered independent and separate works in 190 | themselves, then this License, and its terms, do not apply to those 191 | sections when you distribute them as separate works. But when you 192 | distribute the same sections as part of a whole which is a work based 193 | on the Library, the distribution of the whole must be on the terms of 194 | this License, whose permissions for other licensees extend to the 195 | entire whole, and thus to each and every part regardless of who wrote 196 | it. 197 | 198 | Thus, it is not the intent of this section to claim rights or contest 199 | your rights to work written entirely by you; rather, the intent is to 200 | exercise the right to control the distribution of derivative or 201 | collective works based on the Library. 202 | 203 | In addition, mere aggregation of another work not based on the Library 204 | with the Library (or with a work based on the Library) on a volume of 205 | a storage or distribution medium does not bring the other work under 206 | the scope of this License. 207 | 208 | **3.** You may opt to apply the terms of the ordinary GNU General Public 209 | License instead of this License to a given copy of the Library. To do 210 | this, you must alter all the notices that refer to this License, so 211 | that they refer to the ordinary GNU General Public License, version 2, 212 | instead of to this License. (If a newer version than version 2 of the 213 | ordinary GNU General Public License has appeared, then you can specify 214 | that version instead if you wish.) Do not make any other change in 215 | these notices. 216 | 217 | Once this change is made in a given copy, it is irreversible for 218 | that copy, so the ordinary GNU General Public License applies to all 219 | subsequent copies and derivative works made from that copy. 220 | 221 | This option is useful when you wish to copy part of the code of 222 | the Library into a program that is not a library. 223 | 224 | **4.** You may copy and distribute the Library (or a portion or 225 | derivative of it, under Section 2) in object code or executable form 226 | under the terms of Sections 1 and 2 above provided that you accompany 227 | it with the complete corresponding machine-readable source code, which 228 | must be distributed under the terms of Sections 1 and 2 above on a 229 | medium customarily used for software interchange. 230 | 231 | If distribution of object code is made by offering access to copy 232 | from a designated place, then offering equivalent access to copy the 233 | source code from the same place satisfies the requirement to 234 | distribute the source code, even though third parties are not 235 | compelled to copy the source along with the object code. 236 | 237 | **5.** A program that contains no derivative of any portion of the 238 | Library, but is designed to work with the Library by being compiled or 239 | linked with it, is called a “work that uses the Library”. Such a 240 | work, in isolation, is not a derivative work of the Library, and 241 | therefore falls outside the scope of this License. 242 | 243 | However, linking a “work that uses the Library” with the Library 244 | creates an executable that is a derivative of the Library (because it 245 | contains portions of the Library), rather than a “work that uses the 246 | library”. The executable is therefore covered by this License. 247 | Section 6 states terms for distribution of such executables. 248 | 249 | When a “work that uses the Library” uses material from a header file 250 | that is part of the Library, the object code for the work may be a 251 | derivative work of the Library even though the source code is not. 252 | Whether this is true is especially significant if the work can be 253 | linked without the Library, or if the work is itself a library. The 254 | threshold for this to be true is not precisely defined by law. 255 | 256 | If such an object file uses only numerical parameters, data 257 | structure layouts and accessors, and small macros and small inline 258 | functions (ten lines or less in length), then the use of the object 259 | file is unrestricted, regardless of whether it is legally a derivative 260 | work. (Executables containing this object code plus portions of the 261 | Library will still fall under Section 6.) 262 | 263 | Otherwise, if the work is a derivative of the Library, you may 264 | distribute the object code for the work under the terms of Section 6. 265 | Any executables containing that work also fall under Section 6, 266 | whether or not they are linked directly with the Library itself. 267 | 268 | **6.** As an exception to the Sections above, you may also combine or 269 | link a “work that uses the Library” with the Library to produce a 270 | work containing portions of the Library, and distribute that work 271 | under terms of your choice, provided that the terms permit 272 | modification of the work for the customer's own use and reverse 273 | engineering for debugging such modifications. 274 | 275 | You must give prominent notice with each copy of the work that the 276 | Library is used in it and that the Library and its use are covered by 277 | this License. You must supply a copy of this License. If the work 278 | during execution displays copyright notices, you must include the 279 | copyright notice for the Library among them, as well as a reference 280 | directing the user to the copy of this License. Also, you must do one 281 | of these things: 282 | 283 | * **a)** Accompany the work with the complete corresponding 284 | machine-readable source code for the Library including whatever 285 | changes were used in the work (which must be distributed under 286 | Sections 1 and 2 above); and, if the work is an executable linked 287 | with the Library, with the complete machine-readable “work that 288 | uses the Library”, as object code and/or source code, so that the 289 | user can modify the Library and then relink to produce a modified 290 | executable containing the modified Library. (It is understood 291 | that the user who changes the contents of definitions files in the 292 | Library will not necessarily be able to recompile the application 293 | to use the modified definitions.) 294 | * **b)** Use a suitable shared library mechanism for linking with the 295 | Library. A suitable mechanism is one that (1) uses at run time a 296 | copy of the library already present on the user's computer system, 297 | rather than copying library functions into the executable, and (2) 298 | will operate properly with a modified version of the library, if 299 | the user installs one, as long as the modified version is 300 | interface-compatible with the version that the work was made with. 301 | * **c)** Accompany the work with a written offer, valid for at 302 | least three years, to give the same user the materials 303 | specified in Subsection 6a, above, for a charge no more 304 | than the cost of performing this distribution. 305 | * **d)** If distribution of the work is made by offering access to copy 306 | from a designated place, offer equivalent access to copy the above 307 | specified materials from the same place. 308 | * **e)** Verify that the user has already received a copy of these 309 | materials or that you have already sent this user a copy. 310 | 311 | For an executable, the required form of the “work that uses the 312 | Library” must include any data and utility programs needed for 313 | reproducing the executable from it. However, as a special exception, 314 | the materials to be distributed need not include anything that is 315 | normally distributed (in either source or binary form) with the major 316 | components (compiler, kernel, and so on) of the operating system on 317 | which the executable runs, unless that component itself accompanies 318 | the executable. 319 | 320 | It may happen that this requirement contradicts the license 321 | restrictions of other proprietary libraries that do not normally 322 | accompany the operating system. Such a contradiction means you cannot 323 | use both them and the Library together in an executable that you 324 | distribute. 325 | 326 | **7.** You may place library facilities that are a work based on the 327 | Library side-by-side in a single library together with other library 328 | facilities not covered by this License, and distribute such a combined 329 | library, provided that the separate distribution of the work based on 330 | the Library and of the other library facilities is otherwise 331 | permitted, and provided that you do these two things: 332 | 333 | * **a)** Accompany the combined library with a copy of the same work 334 | based on the Library, uncombined with any other library 335 | facilities. This must be distributed under the terms of the 336 | Sections above. 337 | * **b)** Give prominent notice with the combined library of the fact 338 | that part of it is a work based on the Library, and explaining 339 | where to find the accompanying uncombined form of the same work. 340 | 341 | **8.** You may not copy, modify, sublicense, link with, or distribute 342 | the Library except as expressly provided under this License. Any 343 | attempt otherwise to copy, modify, sublicense, link with, or 344 | distribute the Library is void, and will automatically terminate your 345 | rights under this License. However, parties who have received copies, 346 | or rights, from you under this License will not have their licenses 347 | terminated so long as such parties remain in full compliance. 348 | 349 | **9.** You are not required to accept this License, since you have not 350 | signed it. However, nothing else grants you permission to modify or 351 | distribute the Library or its derivative works. These actions are 352 | prohibited by law if you do not accept this License. Therefore, by 353 | modifying or distributing the Library (or any work based on the 354 | Library), you indicate your acceptance of this License to do so, and 355 | all its terms and conditions for copying, distributing or modifying 356 | the Library or works based on it. 357 | 358 | **10.** Each time you redistribute the Library (or any work based on the 359 | Library), the recipient automatically receives a license from the 360 | original licensor to copy, distribute, link with or modify the Library 361 | subject to these terms and conditions. You may not impose any further 362 | restrictions on the recipients' exercise of the rights granted herein. 363 | You are not responsible for enforcing compliance by third parties with 364 | this License. 365 | 366 | **11.** If, as a consequence of a court judgment or allegation of patent 367 | infringement or for any other reason (not limited to patent issues), 368 | conditions are imposed on you (whether by court order, agreement or 369 | otherwise) that contradict the conditions of this License, they do not 370 | excuse you from the conditions of this License. If you cannot 371 | distribute so as to satisfy simultaneously your obligations under this 372 | License and any other pertinent obligations, then as a consequence you 373 | may not distribute the Library at all. For example, if a patent 374 | license would not permit royalty-free redistribution of the Library by 375 | all those who receive copies directly or indirectly through you, then 376 | the only way you could satisfy both it and this License would be to 377 | refrain entirely from distribution of the Library. 378 | 379 | If any portion of this section is held invalid or unenforceable under any 380 | particular circumstance, the balance of the section is intended to apply, 381 | and the section as a whole is intended to apply in other circumstances. 382 | 383 | It is not the purpose of this section to induce you to infringe any 384 | patents or other property right claims or to contest validity of any 385 | such claims; this section has the sole purpose of protecting the 386 | integrity of the free software distribution system which is 387 | implemented by public license practices. Many people have made 388 | generous contributions to the wide range of software distributed 389 | through that system in reliance on consistent application of that 390 | system; it is up to the author/donor to decide if he or she is willing 391 | to distribute software through any other system and a licensee cannot 392 | impose that choice. 393 | 394 | This section is intended to make thoroughly clear what is believed to 395 | be a consequence of the rest of this License. 396 | 397 | **12.** If the distribution and/or use of the Library is restricted in 398 | certain countries either by patents or by copyrighted interfaces, the 399 | original copyright holder who places the Library under this License may add 400 | an explicit geographical distribution limitation excluding those countries, 401 | so that distribution is permitted only in or among countries not thus 402 | excluded. In such case, this License incorporates the limitation as if 403 | written in the body of this License. 404 | 405 | **13.** The Free Software Foundation may publish revised and/or new 406 | versions of the Lesser General Public License from time to time. 407 | Such new versions will be similar in spirit to the present version, 408 | but may differ in detail to address new problems or concerns. 409 | 410 | Each version is given a distinguishing version number. If the Library 411 | specifies a version number of this License which applies to it and 412 | “any later version”, you have the option of following the terms and 413 | conditions either of that version or of any later version published by 414 | the Free Software Foundation. If the Library does not specify a 415 | license version number, you may choose any version ever published by 416 | the Free Software Foundation. 417 | 418 | **14.** If you wish to incorporate parts of the Library into other free 419 | programs whose distribution conditions are incompatible with these, 420 | write to the author to ask for permission. For software which is 421 | copyrighted by the Free Software Foundation, write to the Free 422 | Software Foundation; we sometimes make exceptions for this. Our 423 | decision will be guided by the two goals of preserving the free status 424 | of all derivatives of our free software and of promoting the sharing 425 | and reuse of software generally. 426 | 427 | ### NO WARRANTY 428 | 429 | **15.** BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 430 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 431 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 432 | OTHER PARTIES PROVIDE THE LIBRARY “AS IS” WITHOUT WARRANTY OF ANY 433 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 434 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 435 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 436 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 437 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 438 | 439 | **16.** IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 440 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 441 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 442 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 443 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 444 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 445 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 446 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 447 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 448 | DAMAGES. 449 | 450 | _END OF TERMS AND CONDITIONS_ 451 | 452 | ### How to Apply These Terms to Your New Libraries 453 | 454 | If you develop a new library, and you want it to be of the greatest 455 | possible use to the public, we recommend making it free software that 456 | everyone can redistribute and change. You can do so by permitting 457 | redistribution under these terms (or, alternatively, under the terms of the 458 | ordinary General Public License). 459 | 460 | To apply these terms, attach the following notices to the library. It is 461 | safest to attach them to the start of each source file to most effectively 462 | convey the exclusion of warranty; and each file should have at least the 463 | “copyright” line and a pointer to where the full notice is found. 464 | 465 | 466 | Copyright (C) 467 | 468 | This library is free software; you can redistribute it and/or 469 | modify it under the terms of the GNU Lesser General Public 470 | License as published by the Free Software Foundation; either 471 | version 2.1 of the License, or (at your option) any later version. 472 | 473 | This library is distributed in the hope that it will be useful, 474 | but WITHOUT ANY WARRANTY; without even the implied warranty of 475 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 476 | Lesser General Public License for more details. 477 | 478 | You should have received a copy of the GNU Lesser General Public 479 | License along with this library; if not, write to the Free Software 480 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 481 | 482 | Also add information on how to contact you by electronic and paper mail. 483 | 484 | You should also get your employer (if you work as a programmer) or your 485 | school, if any, to sign a “copyright disclaimer” for the library, if 486 | necessary. Here is a sample; alter the names: 487 | 488 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 489 | library `Frob' (a library for tweaking knobs) written by James Random Hacker. 490 | 491 | , 1 April 1990 492 | Ty Coon, President of Vice 493 | 494 | That's all there is to it! 495 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis 2 | # SPDX-License-Identifier: LGPL-2.1-or-later 3 | 4 | ROOTDIR=$(shell pwd) 5 | DEPSDIR=$(ROOTDIR)/deps/ 6 | SRCDIR=$(ROOTDIR)/src/ 7 | DOCDIR=$(ROOTDIR)/docs/ 8 | MEXDIR=$(ROOTDIR)/mex/ 9 | EXAMPLEDIR=$(ROOTDIR)/examples/ 10 | TESTDIR=$(ROOTDIR)/test/ 11 | UTILDIR=$(ROOTDIR)/util/ 12 | BINDIR=$(ROOTDIR)/bin/ 13 | BUILDDIR=$(ROOTDIR)/build/ 14 | PREFIX?=/usr/local/ 15 | LIBDIR=$(BUILDDIR)lib/ 16 | INCDIR=$(BUILDDIR)include/ 17 | DATDIR=$(ROOTDIR)/datfiles/ 18 | 19 | PCG_HEADER=$(DEPSDIR)pcg-c/include/pcg_variants.h 20 | 21 | SHELL:=/bin/sh 22 | CP:=cp 23 | MKDIR:=mkdir 24 | MV:=mv 25 | RM:=rm -f 26 | 27 | CHECKMK:=checkmk 28 | CC:=gcc 29 | CCOV:=gcov 30 | 31 | DOXYGEN:=doxygen 32 | SPHINXBUILD:=sphinx-build 33 | GIT:=git 34 | MATLAB:=$(shell which matlab) -nodesktop -nosplash 35 | MEXEXT:=$(shell which mexext) 36 | OCTAVE:=octave 37 | 38 | WFLAGS=-Wall -Wextra -pedantic 39 | ARCHFLAGS=-march=native 40 | CFLAGS=$(WFLAGS) $(ARCHFLAGS) -std=gnu99 -I $(SRCDIR) \ 41 | -I $(PREFIX)include -L $(PREFIX)lib 42 | COPTIM=-O3 43 | CCOVFLAGS=-Og -g --coverage 44 | CLIBS=-lm -fopenmp 45 | CHECKLIBS=-lcheck -lpthread -lsubunit 46 | PCG_INCLUDE=-include $(PCG_HEADER) 47 | PCG_LIB=-L $(DEPSDIR)pcg-c/src -lpcg_random 48 | PCG_FLAGS=$(PCG_INCLUDE) $(PCG_LIB) 49 | 50 | .PRECIOUS: %.o 51 | 52 | 53 | 54 | 55 | 56 | .PHONY: all 57 | all: autotune lib mexmat mexoct 58 | 59 | 60 | 61 | 62 | 63 | init(%): 64 | $(GIT) submodule update --init deps/$% 65 | 66 | $(DEPSDIR)pcg-c/src/libpcg_random.a: init(pcg-c) 67 | cd $(DEPSDIR)pcg-c; make 68 | 69 | .PHONY: libpcg 70 | libpcg: $(DEPSDIR)pcg-c/src/libpcg_random.a 71 | 72 | $(ROOTDIR)%: 73 | $(MKDIR) -p $@ 74 | 75 | $(BINDIR)cpfloat_autotune: $(SRCDIR)cpfloat_autotune.c $(BINDIR) libpcg 76 | $(CC) $(CFLAGS) $(COPTIM) -o $@ $< $(CLIBS) $(PCG_FLAGS) 77 | 78 | .PHONY: autotune 79 | autotune: $(BINDIR)cpfloat_autotune 80 | $< 81 | $(MV) cpfloat_threshold_*.h $(SRCDIR) 82 | 83 | 84 | 85 | 86 | 87 | install: lib 88 | $(CP) $(INCDIR) $(PREFIX)include/ 89 | $(CP) $(LIBDIR) $(PREFIX)lib/ 90 | 91 | lib: autotune $(INCDIR)cpfloat_definitions.h $(INCDIR)cpfloat_docmacros.h \ 92 | $(INCDIR)cpfloat.h \ 93 | $(INCDIR)cpfloat_threshold_binary32.h \ 94 | $(INCDIR)cpfloat_threshold_binary64.h \ 95 | $(LIBDIR)libcpfloat.so $(LIBDIR)libcpfloat.a 96 | 97 | HEADERS=$(INCDIR)cpfloat_definitions.h $(INCDIR)cpfloat_docmacros.h \ 98 | $(INCDIR)cpfloat_threshold_binary32.h $(INCDIR)cpfloat_threshold_binary64.h 99 | 100 | $(HEADERS):$(INCDIR)cpfloat_%.h:$(SRCDIR)cpfloat_%.h $(INCDIR) 101 | $(CP) $< $@ 102 | 103 | $(BUILDDIR)cpfloat.tmp: $(SRCDIR)cpfloat_binary32.h $(SRCDIR)cpfloat_binary64.h 104 | sed '/CPFLOAT_BINARY\|^#include "cpfloat_\(doc\|def\)/d' \ 105 | $(SRCDIR)cpfloat_binary32.h > $(BUILDDIR)cpfloat.tmp 106 | sed '/CPFLOAT_BINARY\|^#include "cpfloat_\(doc\|def\)/d' \ 107 | $(SRCDIR)cpfloat_binary64.h >> $(BUILDDIR)cpfloat.tmp 108 | sed 's/static inline //g' $(BUILDDIR)cpfloat.tmp > $(BUILDDIR)cpfloat.tmpfinal 109 | $(MV) $(BUILDDIR)cpfloat.tmpfinal $(BUILDDIR)cpfloat.tmp 110 | 111 | $(BUILDDIR)cpfloat_template.c: $(SRCDIR)cpfloat_template.h 112 | sed 's/static inline//g' $< > $@ 113 | 114 | $(BUILDDIR)cpfloat.c: $(BUILDDIR)cpfloat.tmp $(BUILDDIR)cpfloat_template.c 115 | printf "#include \"cpfloat_docmacros.h\"\n\ 116 | #include \"cpfloat_definitions.h\"\n" > $@ 117 | sed 's/template.h/template.c/' $(BUILDDIR)cpfloat.tmp >> $@ 118 | 119 | $(INCDIR)cpfloat.h: $(BUILDDIR)cpfloat.tmp $(BUILDDIR) $(INCDIR) 120 | sed '/^\/\*\* @/,/^\/\*\* @/d' $< > $(BUILDDIR)cpfloat-h.tmp 121 | sed '/^ \*\|\/\*/d' $(BUILDDIR)cpfloat-h.tmp > $(BUILDDIR)cpfloat-h.tmpfinal 122 | $(MV) $(BUILDDIR)cpfloat-h.tmpfinal $(BUILDDIR)cpfloat-h.tmp 123 | printf "/* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */\n\ 124 | /* SPDX-License-Identifier: LGPL-2.1-or-later */\n\ 125 | \n\ 126 | /**\n\ 127 | * @file cpfloat.h\n\ 128 | * @brief CPFloat header file.\n\ 129 | */\n\ 130 | \n\ 131 | #ifndef _CPFLOAT_\n\ 132 | #define _CPFLOAT_\n\ 133 | \n\ 134 | #include \"cpfloat_docmacros.h\"\n\ 135 | #include \"cpfloat_definitions.h\"\n\ 136 | \n" > $@ 137 | cat $(BUILDDIR)cpfloat-h.tmp >> $@ 138 | printf "#endif /* #ifndef _CPFLOAT_ */" >> $@ 139 | 140 | HEADER_DEPS=$(INCDIR)cpfloat_threshold_binary32.h \ 141 | $(INCDIR)cpfloat_threshold_binary64.h \ 142 | $(DEPSDIR)pcg-c/include/pcg_variants.h 143 | 144 | $(BUILDDIR)cpfloat-shared.o: $(BUILDDIR)cpfloat.c $(HEADER_DEPS) 145 | $(CC) $(CFLAGS) $(COPTIM) -fPIC -c $< $(PCG_INCLUDE) -o $@ 146 | 147 | $(BUILDDIR)cpfloat-static.o: $(BUILDDIR)cpfloat.c $(HEADER_DEPS) 148 | $(CC) $(CFLAGS) $(COPTIM) -c $< $(PCG_INCLUDE) -o $@ 149 | 150 | LIBPCG_OBJ=$(DEPSDIR)pcg-c/src/pcg-global-32.o \ 151 | $(DEPSDIR)pcg-c/src/pcg-advance-64.o \ 152 | $(DEPSDIR)pcg-c/src/pcg-global-64.o \ 153 | $(DEPSDIR)pcg-c/src/pcg-advance-128.o 154 | 155 | $(LIBDIR)libcpfloat.so: $(BUILDDIR)cpfloat-shared.o libpcg $(LIBDIR) 156 | $(CC) -shared -o $@ $< $(LIBPCG_OBJ) $(CLIBS) $(PCG_LIB) 157 | 158 | $(LIBDIR)libcpfloat.a: $(BUILDDIR)cpfloat-static.o libpcg $(LIBDIR) 159 | ar -cr $@ $< $(LIBPCG_OBJ) 160 | 161 | 162 | 163 | 164 | 165 | MEXEXTENSION:=`$(MEXEXT)` 166 | 167 | .PHONY: mexmat 168 | mexmat: $(BINDIR)cpfloat.m $(BINDIR)cpfloat.$(MEXEXTENSION) 169 | 170 | .PHONY: mexoct 171 | mexoct: $(BINDIR)cpfloat.m $(BINDIR)cpfloat.mex 172 | 173 | $(BINDIR)cpfloat.m: $(MEXDIR)cpfloat.m $(BINDIR) 174 | $(CP) $< $@ 175 | 176 | MEXSTRING="cd $(MEXDIR); \ 177 | retval = cpfloat_compile('cpfloatdir', '$(SRCDIR)', \ 178 | 'pcgpath', '$(DEPSDIR)pcg-c/', \ 179 | 'compilerpath', '$(CC)'); \ 180 | if retval \ 181 | rehash(); \ 182 | cpfloat_autotune('cpfloatdir', '$(SRCDIR)'); \ 183 | cpfloat_compile('cpfloatdir', '$(SRCDIR)', \ 184 | 'pcgpath', '$(DEPSDIR)pcg-c/', \ 185 | 'compilerpath', '$(CC)'); \ 186 | end; \ 187 | exit;" 188 | 189 | MEXEXTENSION:=`$(MEXEXT)` 190 | 191 | $(BINDIR)cpfloat.$(MEXEXTENSION): $(MEXDIR)cpfloat.c libpcg $(BINDIR) 192 | $(MATLAB) -r $(MEXSTRING) 193 | $(MV) $(MEXDIR)cpfloat.$(MEXEXTENSION) $@ 194 | 195 | $(BINDIR)cpfloat.mex: $(MEXDIR)cpfloat.c libpcg $(BINDIR) 196 | $(OCTAVE) --eval $(MEXSTRING) 197 | $(MV) $(MEXDIR)cpfloat.mex $@ 198 | 199 | 200 | 201 | 202 | 203 | .PHONY: test 204 | test: ctest libtest mtest otest 205 | 206 | $(TESTDIR)cpfloat_test.c: $(TESTDIR)cpfloat_test.ts 207 | $(CHECKMK) clean_mode=1 $< > $@ 208 | 209 | $(BINDIR)cpfloat_test: $(TESTDIR)cpfloat_test.c libpcg $(BINDIR) 210 | $(CC) $(CFLAGS) $(COPTIM) -fsanitize=undefined -o $@ $< \ 211 | $(CHECKLIBS) $(CLIBS) $(PCG_FLAGS) 212 | 213 | .PHONY: ctest 214 | ctest: $(BINDIR)cpfloat_test 215 | $< 216 | $(MV) cpfloat_test.log $(TESTDIR) 217 | 218 | $(TESTDIR)libcpfloat_test.c: $(TESTDIR)cpfloat_test.c 219 | sed '/#include "cpfloat_binary32.h"/d' $< > $@ 220 | sed 's/#include "cpfloat_binary64.h"/#include "cpfloat.h"/g' $@ > cpfloath.temp 221 | $(MV) cpfloath.temp $@ 222 | 223 | $(BINDIR)libcpfloat_static_test: $(TESTDIR)libcpfloat_test.c lib 224 | $(CC) $(CFLAGS) $(COPTIM) -fsanitize=undefined -static -o $@ $< \ 225 | -I$(INCDIR) -L$(LIBDIR) -lcpfloat $(CHECKLIBS) 226 | 227 | $(BINDIR)libcpfloat_shared_test: $(TESTDIR)libcpfloat_test.c lib 228 | $(CC) $(CFLAGS) $(COPTIM) -fsanitize=undefined -o $@ $< \ 229 | -I$(INCDIR) -L$(LIBDIR) -lcpfloat $(CHECKLIBS) -lm 230 | 231 | .PHONY: libtest 232 | libtest: libtest-shared libtest-static 233 | 234 | .PHONY: libtest-shared 235 | libtest-shared: $(BINDIR)libcpfloat_shared_test 236 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(LIBDIR); $< 237 | $(MV) cpfloat_test.log $(TESTDIR)libcpfloat_dinamic_test.log 238 | 239 | .PHONY: libtest-static 240 | libtest-static: $(BINDIR)libcpfloat_static_test 241 | $< 242 | $(MV) cpfloat_test.log $(TESTDIR)libcpfloat_static_test.log 243 | 244 | .PHONY: mtest 245 | mtest: MTESTSTRING="addpath('$(DEPSDIR)float_params'); \ 246 | addpath('$(BINDIR)'); \ 247 | cd $(TESTDIR); \ 248 | cpfloat_test; \ 249 | exit;" 250 | 251 | mtest: $(BINDIR)cpfloat.$(MEXEXTENSION) $(BINDIR)cpfloat.m init(float_params) 252 | $(MATLAB) -r $(MTESTSTRING) 253 | 254 | .PHONY: otest 255 | otest: OTESTSTRING="pkglist=pkg('list'); \ 256 | no_fenv=true; \ 257 | for i=1:length(pkglist); \ 258 | if strcmp(pkglist{i}.name, 'fenv'); \ 259 | no_fenv = false; \ 260 | break; \ 261 | end; \ 262 | end; \ 263 | if no_fenv; \ 264 | pkg install -forge fenv; \ 265 | end; \ 266 | pkg load fenv; \ 267 | addpath('$(DEPSDIR)float_params'); \ 268 | addpath('$(BINDIR)'); \ 269 | cd $(TESTDIR); \ 270 | cpfloat_test; \ 271 | exit;" 272 | 273 | otest: $(BINDIR)cpfloat.mex $(BINDIR)cpfloat.m init(float_params) 274 | $(OCTAVE) --eval $(OTESTSTRING) 275 | 276 | 277 | 278 | 279 | 280 | .PHONY: docs 281 | docs: $(DOCDIR)html 282 | 283 | $(DOCDIR)Doxyfile: 284 | $(DOXYGEN) -g $(DOCDIR)Doxyfile 285 | 286 | $(DOCDIR)xml: $(DOCDIR)Doxyfile $(DOCDIR)Doxyfile-project 287 | $(DOXYGEN) $(DOCDIR)Doxyfile-project 288 | 289 | $(DOCDIR)html: $(DOCDIR)xml 290 | $(SPHINXBUILD) -M html "$(DOCDIR)source" "$(DOCDIR)" 291 | 292 | .PHONY: coverage 293 | coverage: $(TESTDIR)cpfloat_test.c libpcg 294 | $(CC) $(CFLAGS) $(CCOVFLAGS) -o $(TESTDIR)cpfloat_test $< \ 295 | $(CHECKLIBS) $(CLIBS) $(PCG_FLAGS) 296 | $(TESTDIR)cpfloat_test 297 | $(CP) $(TESTDIR)cpfloat_test.c . 298 | $(CCOV) cpfloat_test.c 299 | 300 | .PHONY: example 301 | example: $(BINDIR)example_manuscript 302 | 303 | $(BINDIR)example_manuscript: $(EXAMPLEDIR)example_manuscript.c libpcg $(BINDIR) 304 | $(CC) $(CFLAGS) $(COPTIM) -o $@ $< $(CLIBS) $(PCG_FLAGS) 305 | 306 | 307 | 308 | 309 | 310 | .PHONY: cleanall 311 | cleanall: clean cleanlib cleandeps cleantest cleancoverage cleandocs 312 | 313 | .PHONY: clean 314 | clean: 315 | $(RM) $(BINDIR)* 316 | 317 | .PHONY: cleanlib 318 | cleanlib: 319 | $(RM) -r $(BUILDDIR)* 320 | 321 | .PHONY: cleandeps 322 | cleandep: 323 | cd $(DEPSDIR)pcg-c; make clean 324 | 325 | .PHONY: cleantest 326 | cleantest: 327 | $(RM) $(TESTDIR)cpfloat_test $(TESTDIR)*.c $(TESTDIR)*.log 328 | 329 | .PHONY: cleancoverage 330 | cleancoverage: 331 | $(RM) cpfloat_test.c cpfloat_test.log *.gcno *.gcda *.gcov 332 | 333 | .PHONY: cleandocs 334 | cleandocs: 335 | $(RM) -r $(DOCDIR)Doxyfile $(DOCDIR)xml 336 | $(RM) -r $(DOCDIR)html $(DOCDIR)source/cpfloat 337 | 338 | 339 | 340 | 341 | 342 | license.spdx: 343 | $(UTILDIR)generate_spdx.sh > $@ 344 | 345 | # CPFloat - Custom Precision Floating-point numbers. 346 | # 347 | # Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 348 | # 349 | # This library is free software; you can redistribute it and/or modify it under 350 | # the terms of the GNU Lesser General Public License as published by the Free 351 | # Software Foundation; either version 2.1 of the License, or (at your option) 352 | # any later version. 353 | # 354 | # This library is distributed in the hope that it will be useful, but WITHOUT 355 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 356 | # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 357 | # details. 358 | # 359 | # You should have received a copy of the GNU Lesser General Public License along 360 | # with this library; if not, write to the Free Software Foundation, Inc., 51 361 | # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 362 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Version](https://img.shields.io/github/v/tag/north-numerical-computing/cpfloat?label=version)](https://github.com/north-numerical-computing/cpfloat/tags) 2 | [![C tests](https://img.shields.io/github/actions/workflow/status/north-numerical-computing/cpfloat/run_c_tests.yml?branch=main&label=c_tests)](https://github.com/north-numerical-computing/cpfloat/actions/workflows/run_c_tests.yml) 3 | [![Octave tests](https://img.shields.io/github/actions/workflow/status/north-numerical-computing/cpfloat/run_octave_tests.yml?branch=main&label=octave_tests)](https://github.com/north-numerical-computing/cpfloat/actions/workflows/run_c_tests.yml) 4 | [![GitHub](https://img.shields.io/github/license/north-numerical-computing/cpfloat)](LICENSE.md) 5 | 6 | # CPFloat: Custom-Precision Floating-Point numbers 7 | 8 | CPFloat is a C library for simulating low-precision floating-point arithmetics. CPFloat provides efficient routines for rounding, performing arithmetic operations, evaluating mathematical functions, and querying properties of the simulated low-precision format. Internally, numbers are stored in `float` or `double` arrays. The low-precision format (target format) follows an extension of the formats defined in the IEEE 754 standard [[5]](#ref5) and is entirely specified by four parameters: 9 | * a positive integer *p*, which represents the number of digits of precision; 10 | * a positive integer *e*min, which represents the minimum supported exponent; 11 | * a positive integer *e*max, which represents the maximum supported exponent; and 12 | * a Boolean variable σ, set to **true** if subnormal are supported and to **false** otherwise. 13 | 14 | Valid choices of *p*, *e*min, and *e*max depend on the format in which the converted numbers are to be stored (storage format). A more extensive description of the characteristics of the low-precision formats that can be used, together with more details on admissible values for *p*, *e*min, *e*max, and *σ* can be found in [[1]](#ref1). 15 | 16 | The library was originally intended as a faster version of the MATLAB function `chop` [[2]](#ref2), which is [available on GitHub](https://github.com/higham/chop). 17 | The latest versions of the library have a variety of subtle differences compared with `chop`. 18 | * Since [14 June 2022](https://github.com/higham/chop/commit/1d37238067042416a3554a1f5e6cdd248b613999), `chop` supports specifying the function for generating random numbers. The MEX interface of CPFloat does not offer this capability, as the pseudo-random numbers used are generated in C and not in MATLAB. 19 | * Since v0.6.0, CPFloat allows users to specify *e*min and *e*max separately. In earlier versions, users can only specify *e*max, while *e*min is set to 1 – *e*max. 20 | * Since v0.6.0, the default 8-bit format `E4M3` has *e*max = 8 and *e*min = –6, which is consistent with the homonymous format in the December 2023 revision of the OCP 8-bit Floating Point Specification (OFP8) [[3]](#ref3). In `chop`, *e*max = 7 and *e*min = –6. 21 | 22 | The code to reproduce the results of the tests in [[1]](#ref1) is [available on GitHub](https://github.com/north-numerical-computing/cpfloat_experiments). 23 | 24 | 25 | # Dependencies 26 | 27 | The only (optional) dependency of CPFloat is the [C implementation](https://github.com/imneme/pcg-c) of the [PCG Library](https://www.pcg-random.org), which provides a variety of high-quality pseudo-random number generators. For an in-depth discussion of the algorithms underlying the PCG Library, we recommend the [paper](https://www.pcg-random.org/paper.html) by [Melissa O'Neill](https://www.cs.hmc.edu/~oneill) [[4]](#ref4). If the header file `pcg_variants.h` in `include/pcg-c/include/pcg_variants.h` is not included at compile-time with the `--include` option, then CPFloat relies on the default C pseudo-random number generator. 28 | 29 | The PCG Library is free software (see the [Licensing information](#licensing-information) below), and its generators are more efficient, reliable, and flexible than any combination of the functions `srand`, `rand`, and `rand_r` from the C standard library. A warning is issued at compile time if the location of `pcg_variant.h` is not specified correctly. 30 | 31 | Compiling the MEX interface requires a reasonably recent version of MATLAB or Octave. 32 | 33 | # Developer dependencies 34 | 35 | Testing the MEX interface requires the function `float_params`, which is [available on GitHub](https://github.com/higham/float_params). The unit tests for the C implementation in `test/cpfloat_test.ts` require the [check unit testing framework for C](https://libcheck.github.io/check), including the [`checkmk`](https://github.com/libcheck/check/tree/master/checkmk) script, and the [subunit protocol](https://github.com/testing-cabal/subunit). 36 | 37 | # Installation 38 | 39 | No installation is needed in order to use CPFloat as a header-only library. The shared and static libraries can be built with 40 | ```console 41 | make lib 42 | ``` 43 | If the compilation is successful, the header and library files of CPFloat will be located in the `build/include` and `build/lib` directories, respectively. 44 | The library can be installed in `` with 45 | ```console 46 | make install --prefix= 47 | ``` 48 | which copies the header and library files in `/include` and `/lib`, respectively. 49 | The default value of ``, which is used if the `--prefix` option is not supplied, is `/usr/local`. 50 | 51 | ## MEX interface 52 | 53 | The MEX interface can be compiled automatically with either 54 | ```console 55 | make mexmat # Compile MEX interface for MATLAB. 56 | ``` 57 | or 58 | ```console 59 | make mexoct # Compile MEX interface for Octave. 60 | 61 | ``` 62 | These two commands compile and autotune the MEX interface in MATLAB and Octave, respectively, by using the functions `mex/cpfloat_compile.m` and `mex/cpfloat_autotune.m`. To use the interface, the `bin/` folder must be in MATLAB's search path. 63 | 64 | On a system where the `make` build automation tool is not available, we recommend building the MEX interface by running the script `cpfloat_compile_nomake.m` in the `mex/` folder. The script attempts to compile and autotune the MEX interface using the default C compiler. The following code will download the repository as a ZIP file, inflate it, and try to compile it: 65 | 66 | ```matlab 67 | zip_url = 'https://codeload.github.com/north-numerical-computing/cpfloat/zip/refs/heads/main'; 68 | unzip(zip_url); 69 | movefile('cpfloat-main', 'cpfloat') 70 | cd('cpfloat/mex'); 71 | cpfloat_compile_nomake 72 | ``` 73 | 74 | A different compiler can be used by setting the value of the variable `compilerpath` appropriately. 75 | If the chosen compiler does not support OpenMP, only the sequential version of the algorithm will be produced and no autotuning will take place. 76 | 77 | On Windows, we have not been able to compile the PCG Library using the C compiler recommended by MATLAB. Therefore, the script uses the pseudo-random number generator in the C standard library by default. 78 | 79 | ## Autotuning 80 | 81 | CPFloat provides a sequential and a parallel implementation of the rounding functions. OpenMP introduces some overhead, and using a single thread is typically faster for arrays with few elements. Therefore, the library provides a facility to switch between the single-threaded and the multi-threaded variants automatically, depending on the size of the input. The threshold is machine-dependent, and the best value for a given system can be found by invoking 82 | ```console 83 | make autotune 84 | ``` 85 | which compiles the file `src/cpfloat_autotune.c`, runs it, and updates the files `src/cpfloat_threshold_binary32.h` and `src/cpfloat_threshold_binary64.h`. This procedure is run automatically when building the shared and static libraries. 86 | 87 | ## Documentation 88 | 89 | The documentation of CPFloat can be generated with the command 90 | ```console 91 | make docs 92 | ``` 93 | which relies on [Doxygen](https://www.doxygen.nl) to format the Javadoc-style comments in the source files, and on [Sphinx](https://www.sphinx-doc.org), with the [Breathe](https://breathe.readthedocs.io) and [Exhale](https://exhale.readthedocs.io) extensions, to generate the HTML version of the documentation that can be found in the `docs/html/` directory. 94 | 95 | # Using CPFloat 96 | 97 | CPFloat can be used as a header-only, shared, or static library. Examples for these three scenarios can be found in the `Makefile` (cf. targets `$(BINDIR)cpfloat_test`, `$(BINDIR)libcpfloat_shared_test`, and `$(BINDIR)libcpfloat_static_test`, respectively). Here we provide a brief summary. 98 | 99 | * **Header-only library.** The only requirement is that the files in the `src/` directory be in the include path of the compiler. In order to use the PCG Library, one can either: 100 | - specify the path of the file `pcg_variants.h` using the preprocessor option `--include` (see the variable `CFLAGS` in the `Makefile` for an example); or 101 | - make sure that `pcg_variants.h` is in the include path and uncomment the preprocessor instruction on line 34 of `src/cpfloat_definitions.h`, that is, `/* #include "pcg_variants.h" */`. In either case, it is necessary link the executable against the `pcg-random` library, which can be obtained by passing the option `-lpcg-random` to the linker. The library `libpcg-random.a` must be in the load path. 102 | 103 | * **Shared library.** The five header files in the `build/include` directory must be in the include path of the compiler. The options `-lcpfloat` and `-lm` must be passed to the linker, and the libraries `libcpfloat.so` and `m.so` must be in the load path. 104 | 105 | * **Static library.** The static library uses the same five header files as the shared library, which are located in the `build/include` and must be in the include path of the compiler. Executable must be linked with the `-static` and `-lcpfloat` options, and the library file `libcpfloat.a` must be in the load path. Linking against the math library is not needed in this case. 106 | 107 | # Code validation 108 | 109 | The `test/` directory contains two sets of test, one for the C library and one for the MEX interface. The unit tests for the C implementation require the `check` library, and can be run with 110 | ```console 111 | make ctest 112 | ``` 113 | for the header-only library or with 114 | ```console 115 | make libtest 116 | ``` 117 | for the shared and static libraries. The two commands use the same batch of unit tests, which is generated from the file `test/cpfloat_test.ts` using the `checkmk` script. 118 | The Makefile target `coverage` measures the code coverage using GNU `gcov` on the same set of tests. 119 | 120 | The MEX interface can be tested by using either 121 | ```console 122 | make mtest # Test MEX interface using MATLAB. 123 | ``` 124 | or 125 | ```console 126 | make otest # Test MEX interface using Octave. 127 | ``` 128 | These two commands run, in MATLAB and Octave respectively, the function `test/cpfloat_test.m`. This set of tests is based on the MATLAB script `test_chop.m`, [available on GitHub](https://github.com/higham/chop/blob/master/test_chop.m): some changes were necessary in order to make it compatible with Octave. 129 | 130 | 131 | # References 132 | 133 | [1] Massimiliano Fasi and Mantas Mikaitis. [CPFloat: A C library for simulating low-precision arithmetic](https://doi.org/10.1145/3585515). ACM Trans. Math. Softw., 49(2), Article No.: 18, June 2023. 134 | 135 | [2] Nicholas J. Higham and Srikara Pranesh, [Simulating Low Precision Floating-Point Arithmetic](https://doi.org/10.1137/19M1251308), SIAM J. Sci. Comput., 41, C585-C602, 2019. 136 | 137 | [3] Paulius Micikevicius, Stuart Oberman, Pradeep Dubey, Marius Cornea, Andres Rodriguez, Ian Bratt, Richard Grisenthwaite, Norm Jouppi, Chiachen Chou, Amber Huffman, Michael Schulte, Ralph Wittig, Dharmesh Jani, Summer Deng. [OCP 8-bit Floating Point Specification (OFP8)](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-12-01-pdf-1), pp. 1–16, Revision 1.0, Open Compute Project, June 2023. Revised December 2023. 138 | 139 | [4] Melissa E. O'Neill, [PCG: A family of simple fast space-efficient statistically good algorithms for random number generation](https://www.pcg-random.org/paper.html), Technical report HMC-CS-2014-0905, Harvey Mudd College, Claremont, CA, September 2014. 140 | 141 | [5] [754-2019 IEEE Standard for Floating-Point Arithmetic](https://doi.org/10.1109/IEEESTD.2019.8766229), pp. 1–84, Institute of Electrical and Electronics Engineers, July 2019. Revision of IEEE Std 754-2008. 142 | 143 | # Acknowledgements 144 | 145 | The library was written by Massimiliano Fasi and Mantas Mikaitis. We thank Nicolas Louvet, Theo Mary, Ian McInerney, and Siegfried Rump for reporting bugs and suggesting improvements. 146 | 147 | # Licensing information 148 | 149 | CPFloat is distributed under the GNU Lesser General Public License, Version 2.1 150 | or later (see [LICENSE.md](LICENSE.md)). Please contact us if you would like to use CPFloat in an open source project distributed under the terms of a license that is incompatible with the GNU LGPL. We might be able to relicense the software for you. 151 | 152 | The PCG Library is distributed under the terms of either the [Apache License, Version 2.0](https://raw.githubusercontent.com/imneme/pcg-c/master/LICENSE-APACHE.txt) or the [Expat License](https://raw.githubusercontent.com/imneme/pcg-c/master/LICENSE-MIT.txt), at the option of the user. 153 | 154 | The MATLAB function `float_params` is distributed under the terms of the [BSD 2-Clause "Simplified" License](https://raw.githubusercontent.com/higham/float_params/master/license.txt). 155 | 156 | The MATLAB function `chop` is distributed under the terms of the [BSD 2-Clause "Simplified" License](https://raw.githubusercontent.com/higham/chop/master/license.txt). 157 | -------------------------------------------------------------------------------- /docs/Doxyfile-project: -------------------------------------------------------------------------------- 1 | @INCLUDE = "./docs/Doxyfile" 2 | GENERATE_HTML = NO 3 | GENERATE_LATEX = NO 4 | GENERATE_XML = YES 5 | XML_PROGRAMLISTING = NO 6 | 7 | # Project configuration. 8 | PROJECT_NAME = "CPFloat" 9 | PROJECT_NUMBER = "0.5.0" 10 | PROJECT_BRIEF = "Custom precision floating-point numbers" 11 | OUTPUT_DIRECTORY = "./docs/" 12 | 13 | # Inputs 14 | INPUT = ./src/cpfloat_definitions.h \ 15 | ./src/cpfloat_binary32.h \ 16 | ./src/cpfloat_binary64.h 17 | RECURSIVE = NO 18 | 19 | # Options 20 | EXTENSION_MAPPING = h=C 21 | MACRO_EXPANSION = YES 22 | OPTIMIZE_OUTPUT_FOR_C = YES 23 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'CPFloat' 21 | copyright = '2020, Massimiliano Fasi and Mantas Mikaitis' 22 | author = 'Massimiliano Fasi and Mantas Mikaitis' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = 'latest' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.githubpages', 35 | 'breathe', 36 | 'exhale', 37 | 'myst_parser', 38 | ] 39 | 40 | # Setup the breathe extension 41 | breathe_projects = { 42 | "CPFloat": "../xml" 43 | } 44 | breathe_default_project = "CPFloat" 45 | 46 | # Setup the exhale extension 47 | exhale_args = { 48 | # These arguments are required 49 | "containmentFolder": "./cpfloat", 50 | "rootFileName": "cpfloat_root.rst", 51 | "rootFileTitle": "CPFloat API", 52 | "doxygenStripFromPath": "..", 53 | "createTreeView": True, 54 | # TIP: if using the sphinx-bootstrap-theme, you need 55 | # "treeViewIsBootstrap": True, 56 | } 57 | primary_domain = 'c' 58 | highlight_language = 'c' 59 | 60 | 61 | # Add any paths that contain templates here, relative to this directory. 62 | templates_path = ['_templates'] 63 | 64 | # The language for content autogenerated by Sphinx. Refer to documentation 65 | # for a list of supported languages. 66 | # 67 | # This is also used if you do content translation via gettext catalogs. 68 | # Usually you set "language" from the command line for these cases. 69 | language = 'en' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | # This pattern also affects html_static_path and html_extra_path. 74 | exclude_patterns = [] 75 | 76 | # -- Options for HTML output ------------------------------------------------- 77 | 78 | # The theme to use for HTML and HTML Help pages. See the documentation for 79 | # a list of builtin themes. 80 | # 81 | html_theme = 'sphinx_rtd_theme' 82 | 83 | # Add any paths that contain custom static files (such as style sheets) here, 84 | # relative to this directory. They are copied after the builtin static files, 85 | # so a file named "default.css" will overwrite the builtin "default.css". 86 | html_static_path = ['_static'] 87 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. CPFloat documentation master file. 2 | 3 | CPFloat documentation 4 | ===================== 5 | 6 | .. toctree:: 7 | :hidden: 8 | 9 | self 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | 14 | ./readme 15 | 16 | .. toctree:: 17 | :hidden: 18 | 19 | ./cpfloat/cpfloat_root 20 | 21 | .. toctree:: 22 | :hidden: 23 | 24 | * :ref:`genindex` 25 | -------------------------------------------------------------------------------- /docs/source/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../README.md 2 | :parser: myst_parser.docutils_ 3 | -------------------------------------------------------------------------------- /examples/example_manuscript.c: -------------------------------------------------------------------------------- 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */ 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ 3 | 4 | #include 5 | #include "cpfloat_binary64.h" 6 | 7 | #define N 3 8 | 9 | int main () { 10 | // Allocate the data structure for target formats and rounding parameters. 11 | optstruct *fpopts = init_optstruct(); 12 | 13 | // Set up the parameters for binary16 target format. 14 | fpopts->precision = 11; // Bits in the significand + 1. 15 | fpopts->emin = -14; // The minimum exponent value. 16 | fpopts->emax = 15; // The maximum exponent value. 17 | fpopts->subnormal = CPFLOAT_SUBN_USE; // Support for subnormals is on. 18 | fpopts->round = CPFLOAT_RND_TP; // Round toward +infinity. 19 | fpopts->flip = CPFLOAT_SOFTERR_NO; // Bit flips are off. 20 | fpopts->p = 0; // Bit flip probability (not used). 21 | fpopts->explim = CPFLOAT_EXPRANGE_TARG; // Limited exponent in target format. 22 | 23 | // Validate the parameters in fpopts. 24 | int retval = cpfloat_validate_optstruct(fpopts); 25 | printf("The validation function returned %d.\n", retval); 26 | 27 | // Initialize C array with arbitrary elements. 28 | double X[N] = { (double)5/3, M_PI, M_E }; 29 | double Y[N] = { 1.5, 1.5, 1.5 }; 30 | double Z[N]; 31 | printf("X in binary64:\n %.15e %.15e %.15e\n", X[0], X[1], X[2]); 32 | 33 | // Round the values of X to the binary16 format and store in Z. 34 | cpfloat(Z, X, N, fpopts); 35 | printf("X rounded to binary16:\n %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]); 36 | 37 | // Round the sum of X and Y. 38 | cpf_add(Z, X, Y, N, fpopts); 39 | printf("Sum rounded to binary16:\n %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]); 40 | 41 | // Round the product of X and Y. 42 | cpf_mul(Z, X, Y, N, fpopts); 43 | printf("Product rounded to binary16:\n %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]); 44 | 45 | // Round the logarithm of X. 46 | cpf_log(Z, X, N, fpopts); 47 | printf("Log rounded to binary16:\n %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]); 48 | 49 | // Round the 2-argument arctangent of X and Y. 50 | cpf_atan2(Z, X, Y, N, fpopts); 51 | printf("Angle rounded to binary16:\n %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]); 52 | 53 | free_optstruct(fpopts); 54 | } 55 | 56 | /* 57 | * CPFloat - Custom Precision Floating-point numbers. 58 | * 59 | * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 60 | * 61 | * This library is free software; you can redistribute it and/or modify it under 62 | * the terms of the GNU Lesser General Public License as published by the Free 63 | * Software Foundation; either version 2.1 of the License, or (at your option) 64 | * any later version. 65 | * 66 | * This library is distributed in the hope that it will be useful, but WITHOUT 67 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 68 | * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 69 | * details. 70 | * 71 | * You should have received a copy of the GNU Lesser General Public License along 72 | * with this library; if not, write to the Free Software Foundation, Inc., 51 73 | * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 74 | */ 75 | -------------------------------------------------------------------------------- /license.spdx: -------------------------------------------------------------------------------- 1 | ## 2 | ## Document Creation Information 3 | ## 4 | 5 | SPDXVersion: SPDX-2.2 6 | DataLicense: CC0-1.0 7 | SPDXID: SPDXRef-DOCUMENT 8 | DocumentName: cpfloat-0.5.0 9 | DocumentNamespace: https://raw.githubusercontent.com/north-numerical-computing/cpfloat/master/license.spdx 10 | Creator: Person: Massimiliano Fasi (massimiliano.fasi@durham.ac.uk) 11 | Creator: Person: Mantas Mikaitis (mantas.mikaitis@manchester.ac.uk) 12 | Created: 2022-05-13T07:37:31Z 13 | 14 | 15 | 16 | ## 17 | ## Package Information 18 | ## 19 | 20 | PackageName: cpfloat 21 | SPDXID: SPDXRef-1 22 | PackageVersion: 0.5.0 23 | PackageDownloadLocation: git://github.com/north-numerical-computing/cpfloat 24 | PackageVerificationCode: e7abe3759c76a48cf70348f35121079126b5846e (excludes: ./license.spdx) 25 | PackageHomePage: https://github.com/north-numerical-computing/cpfloat 26 | PackageLicenseConcluded: LGPL-2.1-or-later 27 | PackageLicenseInfoFromFiles: LGPL-2.1-or-later 28 | PackageLicenseDeclared: LGPL-2.1-or-later 29 | PackageCopyrightText: Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 30 | PackageSummary:Custom Precision Floating-point numbers. 31 | 32 | 33 | 34 | ## 35 | ## File Information 36 | ## 37 | 38 | FileName: ./src/cpfloat_binary64.h 39 | SPDXID: SPDXRef-1-1 40 | FileType: SOURCE 41 | FileChecksum: SHA1: a12904f3e5531acf30bcd087c18d3d728adc26ef 42 | FileChecksum: MD5: a048b24c8c72370e134004985b956b51 43 | LicenseConcluded: LGPL-2.1-or-later 44 | LicenseInfoInFile: LGPL-2.1-or-later 45 | 46 | FileName: ./src/cpfloat_binary32.h 47 | SPDXID: SPDXRef-1-2 48 | FileType: SOURCE 49 | FileChecksum: SHA1: 0659f31bfaa75fb0bee0af49916b29e87481b902 50 | FileChecksum: MD5: ace80b75d287d8c86459430c24608cba 51 | LicenseConcluded: LGPL-2.1-or-later 52 | LicenseInfoInFile: LGPL-2.1-or-later 53 | 54 | FileName: ./src/cpfloat_threshold_binary64.h 55 | SPDXID: SPDXRef-1-3 56 | FileType: SOURCE 57 | FileChecksum: SHA1: 84d787a6e3dd3bc8d8615575e0d18272e2378b9d 58 | FileChecksum: MD5: 68ac8be2a018baae453d325084221bdd 59 | LicenseConcluded: LGPL-2.1-or-later 60 | LicenseInfoInFile: LGPL-2.1-or-later 61 | 62 | FileName: ./src/cpfloat_template.h 63 | SPDXID: SPDXRef-1-4 64 | FileType: SOURCE 65 | FileChecksum: SHA1: 35d1080ced6bff3dc57d96e2d01af65aa4fec0ab 66 | FileChecksum: MD5: a024bf10be172b3e9e42acae6bd84c40 67 | LicenseConcluded: LGPL-2.1-or-later 68 | LicenseInfoInFile: LGPL-2.1-or-later 69 | 70 | FileName: ./src/cpfloat_definitions.h 71 | SPDXID: SPDXRef-1-5 72 | FileType: SOURCE 73 | FileChecksum: SHA1: 44a75ba26b33d3b08771eafef03b62e15a419cea 74 | FileChecksum: MD5: 3c7e1ee37e5f04399dabf6862cff2062 75 | LicenseConcluded: LGPL-2.1-or-later 76 | LicenseInfoInFile: LGPL-2.1-or-later 77 | 78 | FileName: ./src/cpfloat_autotune.c 79 | SPDXID: SPDXRef-1-6 80 | FileType: SOURCE 81 | FileChecksum: SHA1: 6790f95b7876857496fc2ab77650edbeefedb8e5 82 | FileChecksum: MD5: 96bf1b5da96174fa8cbf51f33586389d 83 | LicenseConcluded: LGPL-2.1-or-later 84 | LicenseInfoInFile: LGPL-2.1-or-laterLGPL-2.1-or-later 85 | 86 | FileName: ./src/cpfloat_docmacros.h 87 | SPDXID: SPDXRef-1-7 88 | FileType: DOCUMENTATION 89 | FileChecksum: SHA1: f34e8ed8a5205320f6c1401f0ed45e5f068f59e2 90 | FileChecksum: MD5: 41b2b3faaad6061c9f5f1aa6bed6f634 91 | LicenseConcluded: LGPL-2.1-or-later 92 | LicenseInfoInFile: LGPL-2.1-or-later 93 | 94 | FileName: ./src/cpfloat_threshold_binary32.h 95 | SPDXID: SPDXRef-1-8 96 | FileType: SOURCE 97 | FileChecksum: SHA1: abf295420daa865fc903e28d6b60e05d7e569b80 98 | FileChecksum: MD5: bfca1b34e8098b5f0e667983e2ac5813 99 | LicenseConcluded: LGPL-2.1-or-later 100 | LicenseInfoInFile: LGPL-2.1-or-later 101 | 102 | FileName: ./LICENSE.md 103 | SPDXID: SPDXRef-1-9 104 | FileType: TEXT 105 | FileChecksum: SHA1: b386b371ce94933e63ced1052aa72a60da5485ff 106 | FileChecksum: MD5: 1803fa9c2c3ce8cb06b4861d75310742 107 | LicenseConcluded: LGPL-2.1-or-later 108 | LicenseInfoInFile: NONE 109 | 110 | FileName: ./examples/example_manuscript.c 111 | SPDXID: SPDXRef-1-10 112 | FileType: SOURCE 113 | FileChecksum: SHA1: 933d2b2eeac0f9b4e41c539d358c2f747ae11f40 114 | FileChecksum: MD5: 3c68422c91c8ca3fca3e3bf165d0a094 115 | LicenseConcluded: LGPL-2.1-or-later 116 | LicenseInfoInFile: LGPL-2.1-or-later 117 | 118 | FileName: ./Makefile 119 | SPDXID: SPDXRef-1-11 120 | FileType: OTHER 121 | FileChecksum: SHA1: 53accae589b9c06617915432474ffe91ecc72921 122 | FileChecksum: MD5: 26cebbb7f04a8cece8f6af36ad8557fb 123 | LicenseConcluded: LGPL-2.1-or-later 124 | LicenseInfoInFile: LGPL-2.1-or-laterLGPL-2.1-or-later 125 | 126 | FileName: ./.circleci/config.yml 127 | SPDXID: SPDXRef-1-12 128 | FileType: OTHER 129 | FileChecksum: SHA1: 0a0b634f2a0eda8c23f4107e3c7066a741593b2b 130 | FileChecksum: MD5: 24d7ea179d64d1a084aab198ddfc2569 131 | LicenseConcluded: LGPL-2.1-or-later 132 | LicenseInfoInFile: NONE 133 | 134 | FileName: ./test/cpfloat_test.m 135 | SPDXID: SPDXRef-1-13 136 | FileType: SOURCE 137 | FileChecksum: SHA1: fb9b197d5f5e6da78ae851a3e59ec70d7bccf056 138 | FileChecksum: MD5: 19db4e188c505d7c4931a57d4eb5b7f6 139 | LicenseConcluded: LGPL-2.1-or-later 140 | LicenseInfoInFile: LGPL-2.1-or-later 141 | 142 | FileName: ./test/cpfloat_test.ts 143 | SPDXID: SPDXRef-1-14 144 | FileType: SOURCE 145 | FileChecksum: SHA1: 7fafef4423e408ff0b96569248e2fe6386c7cffe 146 | FileChecksum: MD5: 8deece3b4537745bbaa9abfbddbc1957 147 | LicenseConcluded: LGPL-2.1-or-later 148 | LicenseInfoInFile: LGPL-2.1-or-later 149 | 150 | FileName: ./README.md 151 | SPDXID: SPDXRef-1-15 152 | FileType: TEXT 153 | FileChecksum: SHA1: 42d03ec1e4c3a135169cd9d5391b22b7ab64cd39 154 | FileChecksum: MD5: 30b0c4e7d8454b5af00c78f1cddad335 155 | LicenseConcluded: LGPL-2.1-or-later 156 | LicenseInfoInFile: NONE 157 | 158 | FileName: ./docs/source/conf.py 159 | SPDXID: SPDXRef-1-16 160 | FileType: DOCUMENTATION 161 | FileChecksum: SHA1: 0f5066f0d287b33573181ff889ed443a081713e1 162 | FileChecksum: MD5: 6b70370507cd57c2dd530f89e7513bc2 163 | LicenseConcluded: LGPL-2.1-or-later 164 | LicenseInfoInFile: NONE 165 | 166 | FileName: ./docs/source/readme.rst 167 | SPDXID: SPDXRef-1-17 168 | FileType: DOCUMENTATION 169 | FileChecksum: SHA1: 49d43efb093c16b067c2aee7caaaf0533e44e797 170 | FileChecksum: MD5: e251e18defff7c8f718661967ee61f9d 171 | LicenseConcluded: LGPL-2.1-or-later 172 | LicenseInfoInFile: NONE 173 | 174 | FileName: ./docs/source/index.rst 175 | SPDXID: SPDXRef-1-18 176 | FileType: DOCUMENTATION 177 | FileChecksum: SHA1: d35eb28b1379ca0e7a2de0de003fe7a72f2eb47f 178 | FileChecksum: MD5: 2e1701ca9f6ea0d59e669d0a4fe031fd 179 | LicenseConcluded: LGPL-2.1-or-later 180 | LicenseInfoInFile: NONE 181 | 182 | FileName: ./docs/Doxyfile-project 183 | SPDXID: SPDXRef-1-19 184 | FileType: DOCUMENTATION 185 | FileChecksum: SHA1: c86f08fbca4704f813dbe94b02699e8a74859337 186 | FileChecksum: MD5: ac6eb56dbf3895c4ea6e2ca1cf1899d3 187 | LicenseConcluded: LGPL-2.1-or-later 188 | LicenseInfoInFile: NONE 189 | 190 | FileName: ./mex/cpfloat_compile_nomake.m 191 | SPDXID: SPDXRef-1-20 192 | FileType: SOURCE 193 | FileChecksum: SHA1: bda8544e616b10b9062187facb703db1ea507ae6 194 | FileChecksum: MD5: 222dd89700f55d7e2e472edff1ef9f51 195 | LicenseConcluded: LGPL-2.1-or-later 196 | LicenseInfoInFile: LGPL-2.1-or-later 197 | 198 | FileName: ./mex/cpfloat_autotune.m 199 | SPDXID: SPDXRef-1-21 200 | FileType: SOURCE 201 | FileChecksum: SHA1: bb687d02ee15d94776b7d381fc906ced380df785 202 | FileChecksum: MD5: c05a7f8d328b78ac514f1098c0f62d3d 203 | LicenseConcluded: LGPL-2.1-or-later 204 | LicenseInfoInFile: LGPL-2.1-or-laterLGPL-2.1-or-later 205 | 206 | FileName: ./mex/cpfloat_compile.m 207 | SPDXID: SPDXRef-1-22 208 | FileType: SOURCE 209 | FileChecksum: SHA1: c95b613db79fd5cbf87ea400940487bdee3ba618 210 | FileChecksum: MD5: 426237158bd953e9cb686686e3c18129 211 | LicenseConcluded: LGPL-2.1-or-later 212 | LicenseInfoInFile: LGPL-2.1-or-later 213 | 214 | FileName: ./mex/cpfloat.m 215 | SPDXID: SPDXRef-1-23 216 | FileType: SOURCE 217 | FileChecksum: SHA1: fff373eccbb247b46122a789ce322866040471e9 218 | FileChecksum: MD5: ac5180f0c0af963a7943d852303e9d39 219 | LicenseConcluded: LGPL-2.1-or-later 220 | LicenseInfoInFile: LGPL-2.1-or-later 221 | 222 | FileName: ./mex/cpfloat.c 223 | SPDXID: SPDXRef-1-24 224 | FileType: SOURCE 225 | FileChecksum: SHA1: dbece5f24974ceeb3bfeb81ebbd975b469665b16 226 | FileChecksum: MD5: f99f0653546b1e3d5c00ea0321a13049 227 | LicenseConcluded: LGPL-2.1-or-later 228 | LicenseInfoInFile: LGPL-2.1-or-later 229 | 230 | FileName: ./util/generate_spdx.sh 231 | SPDXID: SPDXRef-1-25 232 | FileChecksum: SHA1: 021746898641abec17280530c3f26434038d2fd7 233 | FileChecksum: MD5: 605929ccf1b08e259e29dff8fbd1f067 234 | LicenseConcluded: LGPL-2.1-or-later 235 | LicenseInfoInFile: $file 236 | -------------------------------------------------------------------------------- /mex/cpfloat.c: -------------------------------------------------------------------------------- 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */ 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "mex.h" 11 | #include "cpfloat_binary32.h" 12 | #include "cpfloat_binary64.h" 13 | 14 | static optstruct *fpopts; 15 | void clearfpopts() { 16 | if (fpopts != NULL) 17 | mxFree(fpopts); 18 | } 19 | 20 | /******************** 21 | * GATEWAY FUNCTION * 22 | ********************/ 23 | void mexFunction(int nlhs, 24 | mxArray *plhs[], 25 | int nrhs, 26 | const mxArray *prhs[]) { 27 | 28 | /* Check for correct number of arguments. */ 29 | if(nrhs > 3) { 30 | mexErrMsgIdAndTxt("cpfloat:nrhs", 31 | "Chopfast requires at most three input arguments"); 32 | } 33 | 34 | /* Allocate fpopts struct and set fields to default. */ 35 | if (fpopts == NULL) { 36 | fpopts = mxCalloc(1, sizeof(optstruct)); 37 | mexMakeMemoryPersistent(fpopts); 38 | mexAtExit(clearfpopts); 39 | 40 | strcpy(fpopts->format, "h"); 41 | fpopts->precision = 11; 42 | fpopts->emin = -14; 43 | fpopts->emax = 15; 44 | fpopts->explim = CPFLOAT_EXPRANGE_TARG; 45 | fpopts->infinity = CPFLOAT_INF_USE; 46 | fpopts->round = CPFLOAT_RND_NE; 47 | fpopts->saturation = CPFLOAT_SAT_NO; 48 | fpopts->subnormal = CPFLOAT_SUBN_USE; 49 | 50 | fpopts->flip = CPFLOAT_SOFTERR_NO; 51 | fpopts->p = 0.5; 52 | 53 | fpopts->bitseed = NULL; 54 | fpopts->randseedf = NULL; 55 | fpopts->randseed = NULL; 56 | } 57 | 58 | /* Parse second argument and populate fpopts structure. */ 59 | if (nrhs > 1) { 60 | bool is_subn_rnd_default = false; 61 | bool is_inf_no_default = false; 62 | if(!mxIsEmpty(prhs[1]) && !mxIsStruct(prhs[1])) { 63 | mexErrMsgIdAndTxt("cpfloat:invalidstruct", 64 | "Second argument must be a struct."); 65 | } else if (!mxIsEmpty(prhs[1])) { 66 | mxArray *tmp = mxGetField(prhs[1], 0, "format"); 67 | 68 | if (tmp != NULL) { 69 | if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0) 70 | /* Set default format, for compatibility with chop. */ 71 | strcpy(fpopts->format, "h"); 72 | else if (mxGetClassID(tmp) == mxCHAR_CLASS) 73 | strcpy(fpopts->format, mxArrayToString(tmp)); 74 | } 75 | tmp = mxGetField(prhs[1], 0, "params"); 76 | if ((tmp != NULL) && 77 | (strcmp(fpopts->format, "c") 78 | && strcmp(fpopts->format, "custom"))) 79 | mexWarnMsgIdAndTxt("cpfloat:ignoredparams", 80 | "Floating-point parameters ignored."); 81 | /* Populate fpopts->params according to fpopts->format. */ 82 | if (!strcmp(fpopts->format, "q43") || 83 | !strcmp(fpopts->format, "fp8-e4m3") || 84 | !strcmp(fpopts->format, "E4M3")) { 85 | fpopts->precision = 4; 86 | fpopts->emin = -6; 87 | fpopts->emax = 8; 88 | is_inf_no_default = true; 89 | } else if (!strcmp(fpopts->format, "q52") || 90 | !strcmp(fpopts->format, "fp8-e5m2") || 91 | !strcmp(fpopts->format, "E5M2")) { 92 | fpopts->precision = 3; 93 | fpopts->emin = -14; 94 | fpopts->emax = 15; 95 | } else if (!strcmp(fpopts->format, "b") || 96 | !strcmp(fpopts->format, "bfloat16") || 97 | !strcmp(fpopts->format, "bf16")) { 98 | fpopts->precision = 8; 99 | fpopts->emin = -126; 100 | fpopts->emax = 127; 101 | is_subn_rnd_default = true; 102 | } else if (!strcmp(fpopts->format, "h") || 103 | !strcmp(fpopts->format, "half") || 104 | !strcmp(fpopts->format, "binary16") || 105 | !strcmp(fpopts->format, "fp16")) { 106 | fpopts->precision = 11; 107 | fpopts->emin = -14; 108 | fpopts->emax = 15; 109 | } else if (!strcmp(fpopts->format, "t") || 110 | !strcmp(fpopts->format, "TensorFloat-32") || 111 | !strcmp(fpopts->format, "tf32")) { 112 | fpopts->precision = 11; 113 | fpopts->emin = -126; 114 | fpopts->emax = 127; 115 | } else if (!strcmp(fpopts->format, "s") || 116 | !strcmp(fpopts->format, "single") || 117 | !strcmp(fpopts->format, "binary32") || 118 | !strcmp(fpopts->format, "fp32")) { 119 | fpopts->precision = 24; 120 | fpopts->emin = -126; 121 | fpopts->emax = 127; 122 | } else if (!strcmp(fpopts->format, "d") || 123 | !strcmp(fpopts->format, "double") || 124 | !strcmp(fpopts->format, "binary64") || 125 | !strcmp(fpopts->format, "fp64")) { 126 | fpopts->precision = 53; 127 | fpopts->emin = -1022; 128 | fpopts->emax = 1023; 129 | } else if (!strcmp(fpopts->format, "c") || 130 | !strcmp(fpopts->format, "custom")) { 131 | if ((tmp != NULL) && (mxGetClassID(tmp) == mxDOUBLE_CLASS)) { 132 | fpopts->precision = ((double *)mxGetData(tmp))[0]; 133 | fpopts->emin = ((double *)mxGetData(tmp))[1]; 134 | fpopts->emax = ((double *)mxGetData(tmp))[2]; 135 | } else { 136 | mexErrMsgIdAndTxt("cpfloat:invalidparams", 137 | "Invalid floating-point parameters specified."); 138 | } 139 | } else { 140 | mexErrMsgIdAndTxt("cpfloat:invalidformat", 141 | "Invalid floating-point format specified."); 142 | } 143 | 144 | /* Set default values to be compatible with MATLAB chop. */ 145 | tmp = mxGetField(prhs[1], 0, "subnormal"); 146 | if (tmp != NULL) { 147 | if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0) 148 | fpopts->subnormal = CPFLOAT_SUBN_USE; 149 | else if (mxGetClassID(tmp) == mxDOUBLE_CLASS) 150 | fpopts->subnormal = *((double *)mxGetData(tmp)); 151 | } else { 152 | if (is_subn_rnd_default) 153 | fpopts->subnormal = CPFLOAT_SUBN_RND; /* Default for bfloat16. */ 154 | } 155 | 156 | tmp = mxGetField(prhs[1], 0, "explim"); 157 | if (tmp != NULL) { 158 | if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0) 159 | fpopts->explim = 1; 160 | else if (mxGetClassID(tmp) == mxDOUBLE_CLASS) 161 | fpopts->explim = *((double *)mxGetData(tmp)); 162 | } 163 | 164 | tmp = mxGetField(prhs[1], 0, "infinity"); 165 | if (tmp != NULL) { 166 | if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0) 167 | fpopts->infinity = CPFLOAT_INF_USE; 168 | else if (mxGetClassID(tmp) == mxDOUBLE_CLASS) 169 | fpopts->infinity = *((double *)mxGetData(tmp)); 170 | } else { 171 | if (is_inf_no_default) 172 | fpopts->infinity = CPFLOAT_INF_NO; /* Default for E4M5. */ 173 | } 174 | 175 | tmp = mxGetField(prhs[1], 0, "round"); 176 | if (tmp != NULL) { 177 | if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0) 178 | fpopts->round = CPFLOAT_RND_NE; 179 | else if (mxGetClassID(tmp) == mxDOUBLE_CLASS) 180 | fpopts->round = *((double *)mxGetData(tmp)); 181 | } 182 | 183 | tmp = mxGetField(prhs[1], 0, "saturation"); 184 | if (tmp != NULL) { 185 | if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0) 186 | fpopts->saturation = CPFLOAT_SAT_NO; 187 | else if (mxGetClassID(tmp) == mxDOUBLE_CLASS) 188 | fpopts->saturation = *((double *)mxGetData(tmp)); 189 | } 190 | 191 | tmp = mxGetField(prhs[1], 0, "subnormal"); 192 | if (tmp != NULL) { 193 | if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0) 194 | fpopts->subnormal = CPFLOAT_SUBN_USE; 195 | else if (mxGetClassID(tmp) == mxDOUBLE_CLASS) 196 | fpopts->subnormal = *((double *)mxGetData(tmp)); 197 | } else { 198 | if (is_subn_rnd_default) 199 | fpopts->subnormal = CPFLOAT_SUBN_RND; /* Default for bfloat16. */ 200 | else 201 | fpopts->subnormal = CPFLOAT_SUBN_USE; 202 | } 203 | 204 | tmp = mxGetField(prhs[1], 0, "flip"); 205 | if (tmp != NULL) { 206 | if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0) 207 | fpopts->flip = CPFLOAT_SOFTERR_NO; 208 | else if (mxGetClassID(tmp) == mxDOUBLE_CLASS) 209 | fpopts->flip = *((double *)mxGetData(tmp)); 210 | } 211 | tmp = mxGetField(prhs[1], 0, "p"); 212 | if (tmp != NULL) { 213 | if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0) 214 | fpopts->p = 0.5; 215 | else if (mxGetClassID(tmp) == mxDOUBLE_CLASS) 216 | fpopts->p = *((double *)mxGetData(tmp)); 217 | } 218 | } 219 | } 220 | 221 | /* UNDOCUMENTED FEATURE: force number of OpenMP threads. 222 | * If algorithm = 0, do not specify how many threads to use. 223 | * If algorithm > 0, use cpfloat() with specified number of threads. 224 | * If algorithm < 0, use cpfloat_parallel() with specified number of threads. 225 | */ 226 | int algorithm; 227 | if (nrhs > 2) { 228 | double *tmp = (double *)mxGetData(prhs[2]); 229 | if (!mxIsDouble(prhs[2]) || mxIsComplex(prhs[2]) 230 | || *tmp != round(*tmp) 231 | || mxGetM(prhs[2]) != 1 || mxGetN(prhs[2]) != 1) 232 | mexErrMsgIdAndTxt("cpfloat:invalidalgorithm", 233 | "Third parameters must be an integer."); 234 | algorithm = (int)(*tmp); 235 | } else 236 | algorithm = 0; 237 | 238 | /* Parse first argument. */ 239 | if (nrhs > 0) { 240 | if (!mxIsNumeric(prhs[0]) 241 | || (!mxIsDouble(prhs[0]) && !mxIsSingle(prhs[0])) 242 | || mxIsComplex(prhs[0]) 243 | || (mxGetNumberOfDimensions(prhs[0]) != 2)) { 244 | mexErrMsgIdAndTxt("cpfloat:invalidmatrix", 245 | "First argument must be a 2D real numeric array."); 246 | } 247 | 248 | mwSize maxfbits, minexp, maxexp; 249 | if (mxIsSingle(prhs[0])) { 250 | if (!strcmp(fpopts->format, "d") || 251 | !strcmp(fpopts->format, "double") || 252 | !strcmp(fpopts->format, "binary64") || 253 | !strcmp(fpopts->format, "fp64")) { 254 | mexErrMsgIdAndTxt("cpfloat:invalidformat", 255 | "Target format is too large."); 256 | } else { 257 | maxfbits = fpopts->round<=1 ? 11 : 23; 258 | minexp = -126; 259 | maxexp = 127; 260 | } 261 | } else if(mxIsDouble(prhs[0])) { 262 | maxfbits = fpopts->round<=1 ? 25 : 52; 263 | minexp = -1022; 264 | maxexp = 1023; 265 | } 266 | if (fpopts->precision > maxfbits || fpopts->emin < minexp 267 | ||fpopts->emax > maxexp) 268 | if (!strcmp(fpopts->format, "c") || !strcmp(fpopts->format, "custom")) 269 | mexErrMsgIdAndTxt("cpfloat:invalidparams", 270 | "Invalid floating-point parameters selected."); 271 | 272 | /* Allocate and compute first output. */ 273 | mwSize m, n; 274 | m = mxGetM(prhs[0]); 275 | n = mxGetN(prhs[0]); 276 | mwSize dims[2]; 277 | dims[0] = m; 278 | dims[1] = n; 279 | 280 | if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) { 281 | double *A = (double *)mxGetData(prhs[0]); 282 | plhs[0] = mxCreateNumericArray(2, dims,mxDOUBLE_CLASS, mxREAL); 283 | double *X = (double *)mxGetData(plhs[0]); 284 | #ifdef _OPENMP 285 | if (algorithm == 0) { 286 | cpfloat(X, A, m*n, fpopts); 287 | } else if (algorithm == 1){ 288 | cpfloat_sequential(X, A, m*n, fpopts); 289 | } else if (algorithm > 0) { 290 | omp_set_num_threads(algorithm); 291 | cpfloat(X, A, m*n, fpopts); 292 | } else { 293 | omp_set_num_threads(-algorithm); 294 | cpfloat_parallel(X, A, m*n, fpopts); 295 | } 296 | #else 297 | cpfloat(X, A, m*n, fpopts); 298 | #endif 299 | } else if (mxGetClassID(prhs[0]) == mxSINGLE_CLASS) { 300 | float *A = (float *)mxGetData(prhs[0]); 301 | plhs[0] = mxCreateNumericArray(2, dims, mxSINGLE_CLASS,mxREAL); 302 | float *X = (float *)mxGetData(plhs[0]); 303 | #ifdef _OPENMP 304 | if (algorithm == 0) { 305 | cpfloatf(X, A, m*n, fpopts); 306 | } else if (algorithm == 1){ 307 | cpfloatf_sequential(X, A, m*n, fpopts); 308 | } else if (algorithm > 0) { 309 | omp_set_num_threads(algorithm); 310 | cpfloatf(X, A, m*n, fpopts); 311 | } else { 312 | omp_set_num_threads(-algorithm); 313 | cpfloatf_parallel(X, A, m*n, fpopts); 314 | } 315 | #else 316 | cpfloatf(X, A, m*n, fpopts); 317 | #endif 318 | } else { 319 | mexErrMsgIdAndTxt("cpfloat:invalidmatrix", 320 | "First argument must be a numeric array."); 321 | } 322 | } else { 323 | mwSize dims[2]; 324 | dims[0] = 0; 325 | dims[1] = 0; 326 | plhs[0] = mxCreateNumericArray(2, dims,mxDOUBLE_CLASS, mxREAL); 327 | } 328 | 329 | /* Allocate and return second output. */ 330 | if (nlhs > 1) { 331 | const char* field_names[] = {"format", "params", "explim", "infinity", 332 | "round", "saturation", "subnormal", 333 | "flip", "p"}; 334 | mwSize dims[2] = {1, 1}; 335 | plhs[1] = mxCreateStructArray(2, dims, 9, field_names); 336 | mxSetFieldByNumber(plhs[1], 0, 0, mxCreateString(fpopts->format)); 337 | 338 | mxArray *outparams = mxCreateDoubleMatrix(1,3,mxREAL); 339 | double *outparamsptr = mxGetData(outparams); 340 | outparamsptr[0] = fpopts->precision; 341 | outparamsptr[1] = fpopts->emin; 342 | outparamsptr[2] = fpopts->emax; 343 | mxSetFieldByNumber(plhs[1], 0, 1, outparams); 344 | 345 | mxArray *outexplim = mxCreateDoubleMatrix(1, 1, mxREAL); 346 | double *outexplimptr = mxGetData(outexplim); 347 | outexplimptr[0] = fpopts->explim; 348 | mxSetFieldByNumber(plhs[1], 0, 2, outexplim); 349 | 350 | mxArray *outinfinity = mxCreateDoubleMatrix(1, 1, mxREAL); 351 | double *outinfinityptr = mxGetData(outinfinity); 352 | outinfinityptr[0] = fpopts->infinity; 353 | mxSetFieldByNumber(plhs[1], 0, 3, outinfinity); 354 | 355 | mxArray *outround = mxCreateDoubleMatrix(1,1,mxREAL); 356 | double *outroundptr = mxGetData(outround); 357 | outroundptr[0] = fpopts->round; 358 | mxSetFieldByNumber(plhs[1], 0, 4, outround); 359 | 360 | mxArray *outsaturation = mxCreateDoubleMatrix(1,1,mxREAL); 361 | double *outsaturationptr = mxGetData(outsaturation); 362 | outsaturationptr[0] = fpopts->saturation; 363 | mxSetFieldByNumber(plhs[1], 0, 5, outsaturation); 364 | 365 | mxArray *outsubnormal = mxCreateDoubleMatrix(1,1,mxREAL); 366 | double *outsubnormalptr = mxGetData(outsubnormal); 367 | outsubnormalptr[0] = fpopts->subnormal; 368 | mxSetFieldByNumber(plhs[1], 0, 6, outsubnormal); 369 | 370 | mxArray *outflip = mxCreateDoubleMatrix(1,1,mxREAL); 371 | double *outflipptr = mxGetData(outflip); 372 | outflipptr[0] = fpopts->flip; 373 | mxSetFieldByNumber(plhs[1], 0, 7, outflip); 374 | 375 | mxArray *outp = mxCreateDoubleMatrix(1,1,mxREAL); 376 | double *outpptr = mxGetData(outp); 377 | outpptr[0] = fpopts->p; 378 | mxSetFieldByNumber(plhs[1], 0, 8, outp); 379 | 380 | } 381 | if (nlhs > 2) 382 | mexErrMsgIdAndTxt("cpfloat:invalidnargout", 383 | "This function returns at most two valaues."); 384 | 385 | } 386 | 387 | /* 388 | * CPFloat - Custom Precision Floating-point numbers. 389 | * 390 | * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 391 | * 392 | * This library is free software; you can redistribute it and/or modify it under 393 | * the terms of the GNU Lesser General Public License as published by the Free 394 | * Software Foundation; either version 2.1 of the License, or (at your option) 395 | * any later version. 396 | * 397 | * This library is distributed in the hope that it will be useful, but WITHOUT 398 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 399 | * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 400 | * details. 401 | * 402 | * You should have received a copy of the GNU Lesser General Public License along 403 | * with this library; if not, write to the Free Software Foundation, Inc., 51 404 | * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 405 | */ 406 | -------------------------------------------------------------------------------- /mex/cpfloat.m: -------------------------------------------------------------------------------- 1 | %CPFLOAT Round floating point numbers to lower precision. 2 | % [Y,OPTIONS] = CPFLOAT(X,FPOPTS) returns a matrix Y containing the elements 3 | % of X rounded to a lower-precision floating-point format (the target format). 4 | % The function can be used to simulate the occurrence of soft errors in the 5 | % rounded values. X must be a real matrix with entries of class 'single' or 6 | % 'double' (the storage format), and the output matrix Y will be a real matrix 7 | % of the same size with entries of the same class. The parameters that 8 | % describe the target format, the rounding mode, and the likelihood of soft 9 | % errors are stored by the function in persistent memory, and are preserved 10 | % across multiple calls to CPFLOAT. The internal configuration can be modified 11 | % by means of the structure FPOPTS, whose fields are discussed in detail 12 | % below. The parameters for which a new configuration value is not specified 13 | % take the default value on the first invocation of CPFLOAT, and keep their 14 | % previous values on subsequent calls. The parameters of the current 15 | % configuration are returned in the second output argument OPTIONS, a 16 | % structure with the same fields as FPOPTS. 17 | % 18 | % The fields of FPOPTS are interpreted as follows. 19 | % 20 | % * The string FPOPTS.format specifies the target floating-point format. 21 | % Possible values are: 22 | % 'q43', 'fp8-e4m3', 'E4M3' for OCP specification E4M3; 23 | % 'q52', 'fp8-e5m2', 'E5M2' for OCP specification E5M2; 24 | % 'b', 'bf16', 'bfloat16' for Intel bfloat16; 25 | % 'h', 'fp16', 'binary16', 'half' for IEEE binary16 (half precision); 26 | % 't', 'tf32', 'TeensorFloat-32' for NVIDIA TensorFloat-32; 27 | % 's', 'fp32', 'binary32', 'single' for IEEE binary32 (single precision); 28 | % 'd', 'fp64', 'binary64', 'double' for IEEE binary64 (double precision); 29 | % 'c', 'custom' for a custom-precision format. 30 | % In order to use a custom format, the parameters of the floating-point 31 | % format must be supplied using the FPOPTS.params field. The default value 32 | % for this field is 'h'. 33 | % 34 | % * The three-element vector FPOPTS.params specifies the parameters of the 35 | % target floating-point format, and is ignored unless FPOPTS.format is set 36 | % to either 'c' or 'custom'. The vector has the form [PRECISION,EMIN,EMAX], 37 | % where PRECISION, EMIN and EMAX are positive integers representing 38 | % the number of binary digits in the fraction and the maximum exponent of 39 | % the target format, respectively. The default value of this field is 40 | % the vector [11,-14,15]. 41 | % 42 | % * The scalar FPOPTS.explim specifies the support for an extended exponent 43 | % range. The target floating-point format will have the exponent range of 44 | % the storage format ('single' or 'double', depending on the class of X) if 45 | % this field is set to 0, and the exponent range of the format specified in 46 | % FPOPTS.format otherwise. The default value for this field is 1. 47 | % 48 | % * The scalar FPOPTS.infinity specifies whether infinities are supported. The 49 | % target floating-point format will support infinities if this field is set 50 | % to 1, and they will be replaced by NaNs otherwise. The default value for 51 | % this field is 0 if the target format is 'E4M3' and 1 otherwise. 52 | % 53 | % * The scalar FPOPTS.round specifies the rounding mode. Possible values are: 54 | % -1 for round-to-nearest with ties-to-away; 55 | % 0 for round-to-nearest with ties-to-zero; 56 | % 1 for round-to-nearest with ties-to-even; 57 | % 2 for round-toward-plus-infinity; 58 | % 3 for round-toward-minus-infinity; 59 | % 4 for round-toward-zero; 60 | % 5 for round-stochastic with proportional probabilities; 61 | % 6 for round-stochastic with equal probabilities; and 62 | % 7 for round-to-odd. 63 | % Any other value results in no rounding. The default value for this field 64 | % is 1. 65 | % 66 | % * The scalar FPOPTS.saturation specifies whether saturation arithmetic is in 67 | % use. On overflow, the target floating-point format will use the largest 68 | % representable floating-point if this field is set to 0, and infinity 69 | % otherwise. The default value for this field is 0. 70 | % 71 | % * The scalar FPOPTS.subnormal specifies the support for subnormal numbers. 72 | % The target floating-point format will not support subnormal numbers if 73 | % this field is set to 0, and will support them otherwise. The default value 74 | % for this field is 0 if the target format is 'bfloat16' and 1 otherwise. 75 | % 76 | % * The scalar FPOPTS.flip specifies whether the function should simulate the 77 | % occurrence of a single bit flip striking the floating-point representation 78 | % of elements of Y. Possible values are: 79 | % 0 no bit flips 80 | % 1 bit flips can occur in fraction of target-format representation 81 | % 2 bit flips can occur in any bit of target-format representation 82 | % The probability of a bit flip occurring in any element of Y is FPOPTS.p. 83 | % If the exponent range of the storage format is larger than that of the 84 | % target format, then subnormal numbers might be stored as normal numbers, 85 | % in which case the bit flip cannot strike the leading bit of the 86 | % representation. The default value for this field is 0. 87 | % 88 | % * The scalar FPOPTS.p specifies the probability of bit flips. If FPOPTS.flip 89 | % is not set to zero, then the value of this field must be a valid 90 | % probability, that is, a real number in the interval [0,1]. The default 91 | % value for this field is 0.5. 92 | % 93 | % The interface of CPFLOAT is mostly compatible with that of the MATLAB 94 | % function CHOP available at https://github.com/higham/chop. See 95 | % https://github.com/north-numerical-computing/cpfloat/blob/main/README.md 96 | % for an up-to-date list of differences. 97 | 98 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis 99 | % SPDX-License-Identifier: LGPL-2.1-or-later 100 | 101 | % CPFloat - Custom Precision Floating-point numbers. 102 | % 103 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 104 | % 105 | % This library is free software; you can redistribute it and/or modify it under 106 | % the terms of the GNU Lesser General Public License as published by the Free 107 | % Software Foundation; either version 2.1 of the License, or (at your option) 108 | % any later version. 109 | % 110 | % This library is distributed in the hope that it will be useful, but WITHOUT 111 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 112 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 113 | % details. 114 | % 115 | % You should have received a copy of the GNU Lesser General Public License along 116 | % with this library; if not, write to the Free Software Foundation, Inc., 51 117 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 118 | -------------------------------------------------------------------------------- /mex/cpfloat_autotune.m: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis 2 | % SPDX-License-Identifier: LGPL-2.1-or-later 3 | 4 | function cpfloat_autotune(varargin) 5 | %CPFLOAT_AUTOTUNE Autotune MEX interface to the CPFloat Library. 6 | % CPFLOAT_AUTOTUNE() runs the function CPFLOAT with inputs of class 'single' 7 | % and 'double' and computes the size at which switching from the sequential to 8 | % the parallel implementation becomes beneficial. The functions generates the 9 | % two files cpfloat_threshold_binary32.h and cpfloat_threshold_binary64.h in 10 | % the current working directory. 11 | % 12 | % CPFLOAT_AUTOTUNE('cpfloatdir',CPFLOATDIR) places the output files in the 13 | % folder CPFLOATDIR instead of the current woking directory. CPFLOATDIR must 14 | % be an existing folder. 15 | 16 | fpopts.format = 'h'; 17 | fpopts.subnormal = 1; 18 | fpopts.round = 1; 19 | fpopts.flip = 0; 20 | fpopts.p = 0.5; 21 | fpopts.explim = 1; 22 | 23 | p = inputParser; 24 | addParameter(p, 'cpfloatdir', './', @ischar); 25 | if exist('maxNumCompThreads', 'builtin') 26 | addParameter(p, 'nthreads', maxNumCompThreads(), ... 27 | @(x)(isscalar(x) && round(x) == x)); 28 | else 29 | pkg load parallel 30 | addParameter(p, 'nthreads', parcellfun_set_nproc(Inf), ... 31 | @(x)(isscalar(x) && round(x) == x)); 32 | end 33 | parse(p,varargin{:}); 34 | cpfloatdir = p.Results.cpfloatdir; 35 | nthreads = p.Results.nthreads; 36 | 37 | ntests = 100; 38 | 39 | fprintf('Test using %d OpenMP threads.\n', nthreads); 40 | if exist('timeit', 'builtin') 41 | parfaster = @(n, fpopts, ntests, fpclass)... 42 | parfaster_timeit(n, fpopts, ntests, nthreads, fpclass); 43 | else 44 | parfaster = @(n, fpopts, ntests, fpclass)... 45 | parfaster_tictoc(n, fpopts, ntests, nthreads, fpclass); 46 | end 47 | 48 | docstring =[ 49 | '/* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */\n',... 50 | '/* SPDX-License-Identifier: LGPL-2.1-or-later */\n',... 51 | '\n',... 52 | '/**\n',... 53 | ' * @file %s_threshold_%s.h\n',... 54 | ' * @brief Size of smallest `%s` array on which to use',... 55 | ' multiple OpenMP threads.\n',... 56 | ' */\n',... 57 | '\n',... 58 | '/**\n',... 59 | ' * @brief Size of smallest array on which %s() uses multiple threads.\n',... 60 | ' *\n',... 61 | ' * @details Threshold for switching between %s_sequential() and\n',... 62 | ' * %s_parallel() in %s(). The value of this constant is ignored\n',... 63 | ' * if the file that includes cpfloat_%s.h is compiled without OpenMP\n',... 64 | ' * support.\n',... 65 | ' */\n']; 66 | 67 | % Binary32 68 | fpclass = 'single'; 69 | nmin = 1; 70 | nmax = 1; 71 | while(~parfaster(nmax, fpopts, ntests, fpclass)) 72 | nmax = nmax * 2; 73 | end 74 | nmid = round((nmax + nmin) / 2); 75 | while(nmid ~= nmin && nmid ~= nmax) 76 | if(parfaster(nmid, fpopts, ntests, fpclass)) 77 | nmax = nmid; 78 | else 79 | nmin = nmid; 80 | end 81 | nmid = round((nmax + nmin) / 2); 82 | end 83 | filename = sprintf('%s/cpfloat_threshold_binary32.h', cpfloatdir); 84 | fid = fopen(filename, 'w'); 85 | fprintf(fid, docstring, 'cpfloat', 'binary32', 'float',... 86 | 'cpfloatf', 'cpfloatf', 'cpfloatf', 'cpfloatf', 'binary32'); 87 | fprintf(fid, "#define OPENMP_THRESHOLD_float %d", nmax); 88 | fclose(fid); 89 | 90 | % Binary64 91 | nmin = 1; 92 | nmax = 1; 93 | while(~parfaster(nmax, fpopts, ntests, 'double')) 94 | nmax = nmax * 2; 95 | end 96 | nmid = round((nmax + nmin) / 2); 97 | while(nmid ~= nmin && nmid ~= nmax) 98 | if(parfaster(nmid, fpopts, ntests, fpclass)) 99 | nmax = nmid; 100 | else 101 | nmin = nmid; 102 | end 103 | nmid = round((nmax + nmin) / 2); 104 | end 105 | filename = sprintf('%s/cpfloat_threshold_binary64.h', cpfloatdir); 106 | fid = fopen(filename, 'w'); 107 | fprintf(fid, docstring, 'cpfloat', 'binary64', 'double',... 108 | 'cpfloat', 'cpfloat', 'cpfloat', 'cpfloat', 'binary64'); 109 | fprintf(fid, "#define OPENMP_THRESHOLD_double %d", nmax); 110 | fclose(fid); 111 | 112 | function res = parfaster_timeit(n, fpopts, ~, nthreads, fpclass) 113 | X = rand(n, 1, fpclass); 114 | funseq = @()(cpfloat(X, fpopts, 1)); 115 | seqtime = timeit(funseq); 116 | funseq = @()(cpfloat(X, fpopts, -nthreads)); 117 | partime = timeit(funseq); 118 | res = partime < seqtime; 119 | fprintf('[%7d] %.5e %.5e\n', n, seqtime, partime); 120 | end 121 | 122 | function res = parfaster_tictoc(n, fpopts, ntests, nthreads, fpclass) 123 | X = rand(n, 1, fpclass); 124 | seqtimings = zeros(1, ntests); 125 | partimings = zeros(1, ntests); 126 | for i = 1:ntests 127 | tic; 128 | Y = cpfloat(X, fpopts, 1); 129 | seqtimings(i) = toc(); 130 | tic; 131 | Y = cpfloat(X, fpopts, -nthreads); 132 | partimings(i) = toc(); 133 | end 134 | seqtime = median(seqtimings); 135 | partime = median(partimings); 136 | res = partime < seqtime; 137 | fprintf('[%7d] %.5e %.5e\n', n, seqtime, partime); 138 | end 139 | end 140 | 141 | % CPFloat - Custom Precision Floating-point numbers. 142 | % 143 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 144 | % 145 | % This library is free software; you can redistribute it and/or modify it under 146 | % the terms of the GNU Lesser General Public License as published by the Free 147 | % Software Foundation; either version 2.1 of the License, or (at your option) 148 | % any later version. 149 | % 150 | % This library is distributed in the hope that it will be useful, but WITHOUT 151 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 152 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 153 | % details. 154 | % 155 | % You should have received a copy of the GNU Lesser General Public License along 156 | % with this library; if not, write to the Free Software Foundation, Inc., 51 157 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 158 | -------------------------------------------------------------------------------- /mex/cpfloat_compile.m: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis 2 | % SPDX-License-Identifier: LGPL-2.1-or-later 3 | 4 | function retval = cpfloat_compile(varargin) 5 | %CPFLOAT_COMPILE Compile MEX interface to the CPFloat Library. 6 | % CPFLOAT_COMPILE() compiles the MEX function chopfast using the default C 7 | % compiler. The function expects all the header files of the CPFloat Library 8 | % as well as the file pcg_variants.h from the PCG Library to be in the current 9 | % working directory. The function attempts to use the OpenMP library, if 10 | % available. 11 | % 12 | % CPFLOAT_COMPILE('cpfloatdir',CPFLOATDIR) looks for the header files of the 13 | % CPFloat Library in CPFLOATDIR rather than in the current working directory. 14 | % 15 | % CPFLOAT_COMPILE('pcgpath',PCGPATH) sets the root directory of the PCG 16 | % random number generator to PCGPATH instead of ./pcg-c/. 17 | % 18 | % CPFLOAT_COMPILE('pcgvariants',PCGVARIANTS) specifies that the path of the 19 | % header file pcg_variants.h is PCGVARIANTS. The default value is 20 | % PCGPATH/include/pcg_variants.h. 21 | % 22 | % CPFLOAT_COMPILE('pcglib',PCGLIB) specifies that the path of the library 23 | % libpcg_random.a is PCGLIB. The default value is PCGPATH/src/libpcg_random.a. 24 | % 25 | % CPFLOAT_COMPILE('compilerpath',COMPILERPATH) uses the compiler COMPILERPATH 26 | % instead of the default C compiler. 27 | 28 | retval = true; 29 | 30 | p = inputParser; 31 | addParameter(p, 'cpfloatdir', '', @ischar); 32 | addParameter(p, 'pcgpath', './pcg-c/', @ischar); 33 | addParameter(p, 'pcgvariants', '', @ischar); 34 | addParameter(p, 'pcglib', '', @ischar); 35 | addParameter(p, 'compilerpath', '', @ischar); 36 | parse(p,varargin{:}); 37 | cpfloatdir = p.Results.cpfloatdir; 38 | pcgpath = p.Results.pcgpath; 39 | pcgvariants = p.Results.pcgvariants; 40 | pcglib = p.Results.pcglib; 41 | compilerpath = p.Results.compilerpath; 42 | 43 | coptions = '-std=gnu99 -O3 -march=native'; 44 | 45 | % Try to find the PCG library. 46 | if (isempty(pcgvariants)) 47 | pcgvariants = sprintf('%s/include/pcg_variants.h', pcgpath); 48 | end 49 | if (isempty(pcglib)) 50 | pcglib = sprintf('%s/src/libpcg_random.a', pcgpath); 51 | end 52 | if exist(pcgvariants, 'file') && exist(pcglib, 'file') 53 | coptions = sprintf('%s -include%s', coptions, pcgvariants); 54 | clibs = sprintf('-L%s/', fileparts(pcglib)); 55 | else 56 | pcglib = ''; 57 | clibs = ''; 58 | end 59 | 60 | usingoctave = exist('OCTAVE_VERSION', 'builtin'); 61 | if usingoctave 62 | if ~isempty(compilerpath) 63 | setenv("CC", compilerpath); 64 | setenv("CXX", compilerpath); 65 | setenv("DL_LD", compilerpath); 66 | end 67 | if ~isempty(compilerpath) 68 | coptions = sprintf('%s -I%s', coptions, cpfloatdir) 69 | end 70 | setenv("CFLAGS", sprintf("-fopenmp %s", coptions)); 71 | libpath = deblank(evalc('mkoctfile --print OCTLIBDIR')); 72 | setenv("LDFLAGS", sprintf("-fopenmp %s -L%s", clibs, libpath)); 73 | if isempty(pcglib) 74 | [output, status] = mkoctfile('cpfloat.c', '--mex', '--verbose'); 75 | else 76 | [output, status] = mkoctfile('cpfloat.c', pcglib, '--mex', '--verbose'); 77 | end 78 | if status ~= 0 79 | warning('Compilation error, trying to compile without OpenMP.'); 80 | retval = false; 81 | setenv("CFLAGS", coptions); 82 | setenv("LDFLAGS", sprintf("%s -L%s", clibs, libpath)); 83 | if isempty(pcglib) 84 | [output, status] = mkoctfile('cpfloat.c', '--mex', '--verbose'); 85 | else 86 | [output, status] = mkoctfile('cpfloat.c', pcglib, '--mex', '--verbose'); 87 | end 88 | end 89 | else 90 | if ~isempty(cpfloatdir) 91 | include_dir = sprintf('-I%s', cpfloatdir); 92 | else 93 | include_dir = ''; 94 | end 95 | if isempty(compilerpath) 96 | compiler_string = ''; 97 | else 98 | compiler_string = ['CC="' compilerpath '"']; 99 | end 100 | try 101 | mex('cpfloat.c', pcglib, '-silent',... 102 | compiler_string, include_dir,... 103 | [sprintf('CFLAGS=$CFLAGS %s -fopenmp ', coptions)],... 104 | [sprintf('LDFLAGS=$LDFLAGS %s -fopenmp ', clibs)]); 105 | catch 106 | warning('Compilation error, trying to compile without OpenMP.'); 107 | retval = false; 108 | mex('cpfloat.c', pcglib, '-silent',... 109 | compiler_string, include_dir,... 110 | [sprintf('CFLAGS=$CFLAGS %s ', coptions)],... 111 | [sprintf('LDFLAGS=$LDFLAGS %s ', clibs)]); 112 | end 113 | end 114 | end 115 | 116 | % CPFloat - Custom Precision Floating-point numbers. 117 | % 118 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 119 | % 120 | % This library is free software; you can redistribute it and/or modify it under 121 | % the terms of the GNU Lesser General Public License as published by the Free 122 | % Software Foundation; either version 2.1 of the License, or (at your option) 123 | % any later version. 124 | % 125 | % This library is distributed in the hope that it will be useful, but WITHOUT 126 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 127 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 128 | % details. 129 | % 130 | % You should have received a copy of the GNU Lesser General Public License along 131 | % with this library; if not, write to the Free Software Foundation, Inc., 51 132 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 133 | -------------------------------------------------------------------------------- /mex/cpfloat_compile_nomake.m: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis 2 | % SPDX-License-Identifier: LGPL-2.1-or-later 3 | 4 | % This MATLAB/Octave script attempts to build the MEX interface to CPFloat on 5 | % systems where the make tool is not available. The code provides only minimal 6 | % functionalities, but should produce a MEX file on a machines where the C 7 | % building environment is configured correctly. 8 | 9 | % Absolute path of the C compiler to be used to build the MEX interface. 10 | % If the string is left empty, the default C compiler will be used. 11 | compilerpath = ''; 12 | 13 | % Absolute path of the source code of cpfloat. By default, the script 14 | % assumes that it is being run from the cpfloat/mex/ folder. 15 | cpfloat_dir = fileparts(pwd); 16 | 17 | % Compile MEX interface. 18 | cpfloat_srcdir = fullfile(cpfloat_dir, 'src'); 19 | retval = cpfloat_compile('cpfloatdir', cpfloat_srcdir,... 20 | 'compilerpath', compilerpath); 21 | 22 | % If parallel compilation was successful, auto-tune the threshold. 23 | if retval 24 | cpfloat_autotune('cpfloatdir', cpfloat_srcdir); 25 | cpfloat_compile('cpfloatdir', cpfloat_srcdir,... 26 | 'compilerpath', compilerpath); 27 | end 28 | 29 | % CPFloat - Custom Precision Floating-point numbers. 30 | % 31 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 32 | % 33 | % This library is free software; you can redistribute it and/or modify it under 34 | % the terms of the GNU Lesser General Public License as published by the Free 35 | % Software Foundation; either version 2.1 of the License, or (at your option) 36 | % any later version. 37 | % 38 | % This library is distributed in the hope that it will be useful, but WITHOUT 39 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 40 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 41 | % details. 42 | % 43 | % You should have received a copy of the GNU Lesser General Public License along 44 | % with this library; if not, write to the Free Software Foundation, Inc., 51 45 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 46 | -------------------------------------------------------------------------------- /src/cpfloat_autotune.c: -------------------------------------------------------------------------------- 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */ 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ 3 | 4 | /* 5 | * This file is part of CPFloat. 6 | * 7 | * Running this program will update the threshold values in 8 | * cpfloat_threshold_binary32.h and cpfloat_threshold_binary64.h. 9 | */ 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "cpfloat_binary32.h" 16 | #include "cpfloat_binary64.h" 17 | 18 | int cmpfun(const void *x, const void *y) { 19 | if (*(double *)x < *(double *)y) 20 | return -1; 21 | else if (*(double *)x > *(double *)y) 22 | return 1; 23 | else 24 | return 0; 25 | } 26 | 27 | double timedifference(struct timespec *start, struct timespec *end) { 28 | return 29 | (end->tv_sec - start->tv_sec) + 30 | (double)(end->tv_nsec - start->tv_nsec) * 1e-9; 31 | } 32 | 33 | 34 | bool parfaster_double(size_t n, optstruct *fpopts, 35 | struct timespec *start, struct timespec *end, 36 | double *seqtimings, double *partimings, size_t ntests) { 37 | size_t i; 38 | double *Xd = malloc(n * sizeof(*Xd)); 39 | double *Yd = malloc(n * sizeof(*Yd)); 40 | for (i = 0; i < n; i++) 41 | Xd[i] = rand() / (double)RAND_MAX; 42 | 43 | for (i = 0; i < ntests; i++) { 44 | clock_gettime(CLOCK_MONOTONIC, start); 45 | cpfloat_sequential(Yd, Xd, n, fpopts); 46 | clock_gettime(CLOCK_MONOTONIC, end); 47 | seqtimings[i] = timedifference(start, end); 48 | clock_gettime(CLOCK_MONOTONIC, start); 49 | cpfloat_parallel(Yd, Xd, n, fpopts); 50 | clock_gettime(CLOCK_MONOTONIC, end); 51 | partimings[i] = timedifference(start, end); 52 | } 53 | free(Xd); 54 | free(Yd); 55 | qsort(seqtimings, ntests, sizeof(*seqtimings), cmpfun); 56 | double seqtime = seqtimings[ntests/2]; 57 | qsort(partimings, ntests, sizeof(*partimings), cmpfun); 58 | double partime = partimings[ntests/2]; 59 | printf("[%7lu] [%.2e, %.2e, %.2e] [%.2e, %.2e, %.2e]\n", 60 | n, seqtimings[0], seqtime, seqtimings[ntests-1], 61 | partimings[0], partime, partimings[ntests-1]); 62 | return partime < seqtime ? true : false; 63 | } 64 | 65 | bool parfaster_float(size_t n, optstruct *fpopts, 66 | struct timespec *start, struct timespec *end, 67 | double *seqtimings, double *partimings, size_t ntests) { 68 | size_t i; 69 | float *Xd = malloc(n * sizeof(*Xd)); 70 | float *Yd = malloc(n * sizeof(*Yd)); 71 | for (i = 0; i < n; i++) 72 | Xd[i] = rand() / (float)RAND_MAX; 73 | for (i = 0; i < ntests; i++) { 74 | clock_gettime(CLOCK_MONOTONIC, start); 75 | cpfloatf_sequential(Yd, Xd, n, fpopts); 76 | clock_gettime(CLOCK_MONOTONIC, end); 77 | seqtimings[i] = timedifference(start, end); 78 | clock_gettime(CLOCK_MONOTONIC, start); 79 | cpfloatf_parallel(Yd, Xd, n, fpopts); 80 | clock_gettime(CLOCK_MONOTONIC, end); 81 | partimings[i] = timedifference(start, end); 82 | } 83 | free(Xd); 84 | free(Yd); 85 | qsort(seqtimings, ntests, sizeof(*seqtimings), cmpfun); 86 | float seqtime = seqtimings[ntests/2]; 87 | qsort(partimings, ntests, sizeof(*partimings), cmpfun); 88 | float partime = partimings[ntests/2]; 89 | printf("[%7lu] [%.2e, %.2e, %.2e] [%.2e, %.2e, %.2e]\n", 90 | n, seqtimings[0], seqtime, seqtimings[ntests-1], 91 | partimings[0], partime, partimings[ntests-1]); 92 | return partime < seqtime ? true : false; 93 | } 94 | 95 | int main() { 96 | 97 | /* Allocate fpopts struct and set fields to default. */ 98 | static optstruct *fpopts; 99 | fpopts = malloc(sizeof(optstruct)); 100 | strcpy(fpopts->format,"s"); 101 | fpopts->precision = 24; 102 | fpopts->emax = 127; 103 | fpopts->emin = -126; 104 | fpopts->subnormal = 0; 105 | fpopts->round = 1; 106 | fpopts->flip = 0; 107 | fpopts->p = 0.5; 108 | fpopts->explim = 1; 109 | 110 | size_t nmin, nmax, nmid; 111 | size_t ntests = 1000; 112 | struct timespec *start = malloc(sizeof(struct timespec)); 113 | struct timespec *end = malloc(sizeof(struct timespec)); 114 | 115 | double *seqtimings = malloc(ntests * sizeof(*seqtimings)); 116 | double *partimings = malloc(ntests * sizeof(*partimings)); 117 | 118 | int maxnumthreads = omp_get_max_threads(); 119 | printf("Test using %d OpenMP threads.\n", maxnumthreads); 120 | 121 | char docstring [] = 122 | "/* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */\n" 123 | "/* SPDX-License-Identifier: LGPL-2.1-or-later */\n" 124 | "\n/**\n" 125 | " * @file %s_threshold_%s.h\n" 126 | " * @brief Size of smallest `%s` array on which to use" 127 | " multiple OpenMP threads.\n" 128 | " */\n" 129 | "\n" 130 | "/**\n" 131 | " * @brief Size of smallest array on which %s() uses multiple threads.\n" 132 | " *\n" 133 | " * @details Threshold for switching between %s_sequential() and\n" 134 | " * %s_parallel() in %s(). The value of this constant is ignored\n" 135 | " * if the file that includes cpfloat_%s.h is compiled without OpenMP\n" 136 | " * support.\n" 137 | " */\n"; 138 | 139 | /* Binary32 */ 140 | nmin = 1; 141 | nmax = 1; 142 | while(!parfaster_float(nmax, fpopts, start, end, 143 | seqtimings, partimings, ntests)) 144 | nmax *= 2; 145 | nmid = (nmax + nmin) / 2; 146 | while(nmid != nmin && nmid != nmax) { 147 | if(parfaster_float(nmid, fpopts, start, end, 148 | seqtimings, partimings, ntests)) 149 | nmax = nmid; 150 | else 151 | nmin = nmid; 152 | nmid = (nmax + nmin) / 2; 153 | } 154 | printf("Optimal threshold for single is %zu.\n", nmax); 155 | const char filenamef [] = "./cpfloat_threshold_binary32.h"; 156 | FILE *fidf = fopen(filenamef, "w"); 157 | fprintf(fidf, docstring, "cpfloat", "binary32", "float", 158 | "cpfloatf", "cpfloatf", "cpfloatf", "cpfloatf", "binary32"); 159 | fprintf(fidf, "#define OPENMP_THRESHOLD_float %zu", nmax); 160 | fclose(fidf); 161 | 162 | /* Binary64 */ 163 | nmin = 1; 164 | nmax = 1; 165 | while(!parfaster_double(nmax, fpopts, start, end, 166 | seqtimings, partimings, ntests)) 167 | nmax *= 2; 168 | nmid = (nmax + nmin) / 2; 169 | while(nmid != nmin && nmid != nmax) { 170 | /* printf("[%5zu, %5zu, %5zu]\n", nmin, nmid, nmax); */ 171 | if(parfaster_double(nmid, fpopts, start, end, 172 | seqtimings, partimings, ntests)) 173 | nmax = nmid; 174 | else 175 | nmin = nmid; 176 | nmid = (nmax + nmin) / 2; 177 | } 178 | printf("Optimal threshold for double is %zu.\n", nmax); 179 | const char filenamed [] = "./cpfloat_threshold_binary64.h"; 180 | FILE *fidd = fopen(filenamed, "w"); 181 | fprintf(fidd, docstring, "cpfloat", "binary64", "double", 182 | "cpfloat", "cpfloat", "cpfloat", "cpfloat", "binary64"); 183 | fprintf(fidd, "#define OPENMP_THRESHOLD_double %zu", nmax); 184 | fclose(fidd); 185 | 186 | return 0; 187 | } 188 | 189 | /* 190 | * CPFloat - Custom Precision Floating-point numbers. 191 | * 192 | * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 193 | * 194 | * This library is free software; you can redistribute it and/or modify it under 195 | * the terms of the GNU Lesser General Public License as published by the Free 196 | * Software Foundation; either version 2.1 of the License, or (at your option) 197 | * any later version. 198 | * 199 | * This library is distributed in the hope that it will be useful, but WITHOUT 200 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 201 | * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 202 | * details. 203 | * 204 | * You should have received a copy of the GNU Lesser General Public License along 205 | * with this library; if not, write to the Free Software Foundation, Inc., 51 206 | * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 207 | */ 208 | -------------------------------------------------------------------------------- /src/cpfloat_binary32.h: -------------------------------------------------------------------------------- 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */ 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ 3 | 4 | /** 5 | * @file cpfloat_binary32.h 6 | * @brief CPFloat functions for `float` arrays. 7 | */ 8 | 9 | #ifndef _CPFLOAT_BINARY32_ 10 | #define _CPFLOAT_BINARY32_ 11 | 12 | #include "cpfloat_definitions.h" 13 | #include "cpfloat_docmacros.h" 14 | 15 | /* Validation of floating-point parameters. */ 16 | doc_cpfloat_validate_optstruct(double, 12, 24, -126, 127) 17 | static inline int cpfloat_validate_optstructf(const optstruct *fpopts); 18 | 19 | /* Rounding functions. */ 20 | doc_cpfloat(float, 24, -126, 127) 21 | static inline int cpfloatf(float *X, const float *A, const size_t numelem, 22 | optstruct *fpopts); 23 | doc_cpfloat(float, 24, -126, 127) 24 | static inline int cpf_fproundf(float *X, const float *A, 25 | const size_t numelem, optstruct *fpopts); 26 | 27 | /* Elementary arithmetic operations. */ 28 | doc_cpf_bivariate(sum, \f$ X_i = A_i + B_i \f$, 24, -126, 127) 29 | static inline int cpf_addf(float *X, const float *A, const float *B, 30 | const size_t numelem, optstruct *fpopts); 31 | doc_cpf_bivariate(difference, \f$ X_i = A_i - B_i \f$, 24, -126, 127) 32 | static inline int cpf_subf(float *X, const float *A, const float *B, 33 | const size_t numelem, optstruct *fpopts); 34 | doc_cpf_bivariate(product, \f$ X_i = A_i \times B_i \f$, 24, -126, 127) 35 | static inline int cpf_mulf(float *X, const float *A, const float *B, 36 | const size_t numelem, optstruct *fpopts); 37 | doc_cpf_bivariate(ratio, \f$ X_i = A_i / B_i \f$, 24, -126, 127) 38 | static inline int cpf_divf(float *X, const float *A, const float *B, 39 | const size_t numelem, optstruct *fpopts); 40 | 41 | /* Trigonometric functions. */ 42 | doc_cpf_univariate(trigonometric cosine, \f$ X_i = \cos(A_i) \f$, 24, -126, 127) 43 | static inline int cpf_cosf(float *X, const float *A, 44 | const size_t numelem, optstruct *fpopts); 45 | doc_cpf_univariate(trigonometric sine, \f$ X_i = \sin(A_i) \f$, 24, -126, 127) 46 | static inline int cpf_sinf(float *X, const float *A, 47 | const size_t numelem, optstruct *fpopts); 48 | doc_cpf_univariate(trigonometric tangent, \f$ X_i = \tan(A_i) \f$, 24, -126, 127) 49 | static inline int cpf_tanf(float *X, const float *A, 50 | const size_t numelem, optstruct *fpopts); 51 | 52 | doc_cpf_univariate(inverse trigonometric cosine, 53 | \f$ X_i = \mathrm{acos}(A_i) \f$, 24, -126, 127) 54 | static inline int cpf_acosf(float *X, const float *A, 55 | const size_t numelem, optstruct *fpopts); 56 | doc_cpf_univariate(inverse trigonometric sine, 57 | \f$ X_i = \mathrm{asin}(A_i) \f$, 24, -126, 127) 58 | static inline int cpf_asinf(float *X, const float *A, 59 | const size_t numelem, optstruct *fpopts); 60 | doc_cpf_univariate(inverse trigonometric tangent, 61 | \f$ X_i = \mathrm{atan}(A_i) \f$, 24, -126, 127) 62 | static inline int cpf_atanf(float *X, const float *A, 63 | const size_t numelem, optstruct *fpopts); 64 | doc_cpf_bivariate(2-argument arctangent, 65 | \f$ X_i = \mathrm{atan} (B_i / A_i) \f$, 24, -126, 127) 66 | static inline int cpf_atan2f(float *X, const float *A, const float *B, 67 | const size_t numelem, optstruct *fpopts); 68 | 69 | /* Hyperbolic functions. */ 70 | doc_cpf_univariate(hyperbolic cosine, \f$ X_i = \mathrm{cosh}(A_i) \f$, 24, -126, 127) 71 | static inline int cpf_coshf(float *X, const float *A, 72 | const size_t numelem, optstruct *fpopts); 73 | doc_cpf_univariate(hyperbolic sine, \f$ X_i = \mathrm{sinh}(A_i) \f$, 24, -126, 127) 74 | static inline int cpf_sinhf(float *X, const float *A, 75 | const size_t numelem, optstruct *fpopts); 76 | doc_cpf_univariate(hyperbolic tangent , \f$ X_i = \mathrm{tanh}(A_i) \f$, 24, -126, 127) 77 | static inline int cpf_tanhf(float *X, const float *A, 78 | const size_t numelem, optstruct *fpopts); 79 | 80 | doc_cpf_univariate(inverse hyperbolic cosine, 81 | \f$ X_i = \mathrm{arcosh}(A_i) \f$, 24, -126, 127) 82 | static inline int cpf_acoshf(float *X, const float *A, 83 | const size_t numelem, optstruct *fpopts); 84 | doc_cpf_univariate(inverse hyperbolic sine, 85 | \f$ X_i = \mathrm{arsinh}(A_i) \f$, 24, -126, 127) 86 | static inline int cpf_asinhf(float *X, const float *A, 87 | const size_t numelem, optstruct *fpopts); 88 | doc_cpf_univariate(inverse hyperbolic tangent, 89 | \f$ X_i = \mathrm{artanh}(A_i) \f$, 24, -126, 127) 90 | static inline int cpf_atanhf(float *X, const float *A, 91 | const size_t numelem, optstruct *fpopts); 92 | 93 | /* Exponentiation and logarithmic functions. */ 94 | doc_cpf_univariate(exponential, \f$ X_i = \exp(A_i) \f$, 24, -126, 127) 95 | static inline int cpf_expf(float *X, const float *A, 96 | const size_t numelem, optstruct *fpopts); 97 | 98 | doc_cpf_frexp(24, -126, 127) 99 | static inline int cpf_frexpf(float *X, int *exp, const float *A, 100 | const size_t numelem, optstruct *fpopts); 101 | doc_cpf_scaling(2, 24, -126, 127) 102 | static inline int cpf_ldexpf(float *X, const float *A, const int *exp, 103 | const size_t numelem, optstruct *fpopts); 104 | doc_cpf_univariate(natural logarithm, \f$ X_i = \log(A_i) \f$, 24, -126, 127) 105 | static inline int cpf_logf(float *X, const float *A, 106 | const size_t numelem, optstruct *fpopts); 107 | doc_cpf_univariate(base-10 logarithm, \f$ X_i = \log_{10}(A_i) \f$, 24, -126, 127) 108 | static inline int cpf_log10f(float *X, const float *A, 109 | const size_t numelem, optstruct *fpopts); 110 | doc_cpf_modf(24, -126, 127) 111 | static inline int cpf_modff(float *X, float *intpart, const float *A, 112 | const size_t numelem, optstruct *fpopts); 113 | doc_cpf_univariate(base-2 exponential, \f$ X_i = 2^{A_i} \f$, 24, -126, 127) 114 | static inline int cpf_exp2f(float *X, const float *A, 115 | const size_t numelem, optstruct *fpopts); 116 | doc_cpf_univariate(exp(x) - 1, \f$ X_i = \exp(A_i) - 1 \f$, 24, -126, 127) 117 | static inline int cpf_expm1f(float *X, const float *A, 118 | const size_t numelem, optstruct *fpopts); 119 | doc_cpf_ilogb(24, -126, 127) 120 | static inline int cpf_ilogbf(int *exp, const float *A, 121 | const size_t numelem, optstruct *fpopts); 122 | 123 | doc_cpf_univariate(natural logarithm of number shifted by one, 124 | \f$ X_i = \log(1+A_i) \f$, 24, -126, 127) 125 | static inline int cpf_log1pf(float *X, const float *A, 126 | size_t numelem, optstruct *fpopts); 127 | doc_cpf_univariate(base-2 logarithm, \f$ X_i = \log_2(A_i) \f$, 24, -126, 127) 128 | static inline int cpf_log2f(float *X, const float *A, 129 | const size_t numelem, optstruct *fpopts); 130 | doc_cpf_univariate(base-FLT_RADIX logarithm of absolute value, 131 | \f$ X_i = \log(\lvert A_i \rvert) \f$, 24, -126, 127) 132 | static inline int cpf_logbf(float *X, const float *A, 133 | const size_t numelem, optstruct *fpopts); 134 | doc_cpf_scaling(FLT\_RADIX, 24, -126, 127) 135 | static inline int cpf_scalbnf(float *X, const float *A, const int *exp, 136 | const size_t numelem, optstruct *fpopts); 137 | doc_cpf_scaling(FLT\_RADIX, 24, -126, 127) 138 | static inline int cpf_scalblnf(float *X, const float *A, 139 | const long int *exp, const size_t numelem, 140 | optstruct *fpopts); 141 | 142 | /* Power functions. */ 143 | doc_cpf_bivariate(real powers, \f$ X_i = A_i^{B_i} \f$, 24, -126, 127) 144 | static inline int cpf_powf(float *X, const float *A, const float *B, 145 | const size_t numelem, optstruct *fpopts); 146 | doc_cpf_univariate(square root, \f$ X_i = \sqrt{A_i} \f$, 24, -126, 127) 147 | static inline int cpf_sqrtf(float *X, const float *A, 148 | const size_t numelem, optstruct *fpopts); 149 | doc_cpf_univariate(cube root, \f$ X_i = \sqrt[3]{A_i} \f$, 24, -126, 127) 150 | static inline int cpf_cbrtf(float *X, const float *A, 151 | const size_t numelem, optstruct *fpopts); 152 | doc_cpf_bivariate(hypotenuse of a right-angle triangle, 153 | \f$ X_i = \sqrt{A_i^2 + B_i^2} \f$, 24, -126, 127) 154 | static inline int cpf_hypotf(float *X, const float *A, const float *B, 155 | const size_t numelem, optstruct *fpopts); 156 | 157 | /* Error and gamma functions. */ 158 | doc_cpf_univariate(error function, \f$ X_i = \mathrm{erf}(A_i) \f$, 24, -126, 127) 159 | static inline int cpf_erff(float *X, const float *A, 160 | const size_t numelem, optstruct *fpopts); 161 | doc_cpf_univariate(complementary error function, 162 | \f$ X_i = \mathrm{erfc}(A_i) \f$, 24, -126, 127) 163 | static inline int cpf_erfcf(float *X, const float *A, 164 | const size_t numelem, optstruct *fpopts); 165 | doc_cpf_univariate(gamma function, \f$ X_i = \Gamma(A_i) \f$, 24, -126, 127) 166 | static inline int cpf_tgammaf(float *X, const float *A, 167 | const size_t numelem, optstruct *fpopts); 168 | doc_cpf_univariate(natural logarithm of absolute value of gamma function, 169 | \f$ X_i = \log(\lvert \Gamma(A_i) \rvert) \f$, 24, -126, 127) 170 | static inline int cpf_lgammaf(float *X, const float *A, 171 | const size_t numelem, optstruct *fpopts); 172 | 173 | /* Rounding and remainder functions. */ 174 | doc_cpf_univariate(ceiling function, \f$ X_i = \lceil A_i \rceil \f$, 24, -126, 127) 175 | static inline int cpf_ceilf(float *X, const float *A, 176 | const size_t numelem, optstruct *fpopts); 177 | doc_cpf_univariate(floor function, \f$ X_i = \lfloor A_i \rfloor \f$, 24, -126, 127) 178 | static inline int cpf_floorf(float *X, const float *A, 179 | const size_t numelem, optstruct *fpopts); 180 | doc_cpf_bivariate(floating-point remainder of division, 181 | \f$ X_i = A_i \;\mathrm{mod}\; B_i \f$, 24, -126, 127) 182 | static inline int cpf_fmodf(float *X, const float *A, const float *B, 183 | const size_t numelem, optstruct *fpopts); 184 | doc_cpf_univariate(integer truncation, \f$ X_i = \mathrm{trunc}(A_i) \f$, 24, -126, 127) 185 | static inline int cpf_truncf(float *X, const float *A, 186 | const size_t numelem, optstruct *fpopts); 187 | 188 | doc_cpf_univariate(closest integer (with round-to-nearest), 189 | \f$ X_i = \mathrm{round}(A_i) \f$, 24, -126, 127) 190 | static inline int cpf_roundf(float *X, const float *A, 191 | const size_t numelem, optstruct *fpopts); 192 | doc_cpf_univariate(closest integer (with round-to-nearest), 193 | \f$ X_i = \mathrm{round}(A_i) \f$, 24, -126, 127) 194 | static inline int cpf_lroundf(long *X, const float *A, 195 | const size_t numelem, optstruct *fpopts); 196 | doc_cpf_univariate_nobitflip(closest integer (with round-to-nearest), 197 | \f$ X_i = \mathrm{round}(A_i) \f$, 24, -126, 127) 198 | static inline int cpf_llroundf(long long *X, const float *A, 199 | const size_t numelem, optstruct *fpopts); 200 | 201 | doc_cpf_rint(PMAX, -126, 127) 202 | static inline int cpf_rintf(float *X, int *exception, const float *A, 203 | const size_t numelem, optstruct *fpopts); 204 | doc_cpf_rint(24, -126, 127) 205 | static inline int cpf_lrintf(long *X, int *exception, const float *A, 206 | const size_t numelem, optstruct *fpopts); 207 | doc_cpf_rint(24, -126, 127) 208 | static inline int cpf_llrintf(long long *X, int *exception, const float *A, 209 | const size_t numelem, optstruct *fpopts); 210 | doc_cpf_nearbyint(24, -126, 127) 211 | static inline int cpf_nearbyintf(float *X, const float *A, 212 | const size_t numelem, optstruct *fpopts); 213 | doc_cpf_bivariate(remainder of the floating point division, 214 | \f$ X_i = A_i^2 - k \times B_i \f$ 215 | for largest \f$ k \f$ such that \f$ k \times B_i < A_i \f$, 216 | 24, -126, 127) 217 | static inline int cpf_remainderf(float *X, const float *A, const float *B, 218 | const size_t numelem, optstruct *fpopts); 219 | 220 | doc_cpf_remquo(24, -126, 127) 221 | static inline int cpf_remquof(float *X, int *quot, 222 | const float *A, const float *B, 223 | const size_t numelem, optstruct *fpopts); 224 | 225 | /* Floating-point manipulation functions. */ 226 | doc_cpf_bivariate(number from magnitude and sign, 227 | \f$ X_i = \mathrm{sign}(A_i) \times \lvert B_i \rvert \f$, 228 | 24, -126, 127) 229 | static inline int cpf_copysignf(float *X, const float *A, const float *B, 230 | const size_t numelem, optstruct *fpopts); 231 | doc_cpf_bivariate(next floating-point number in specified direction, 232 | the floating-point number closest to \f$ A_i \f$ in the 233 | direction of \f$ B_i \f$, 24, -126, 127) 234 | static inline int cpf_nextafterf(float *X, const float *A, const float *B, 235 | const size_t numelem, optstruct *fpopts); 236 | doc_cpf_bivariate(next floating-point number in specified direction, 237 | the floating-point number closest to \f$ A_i \f$ in the 238 | direction of \f$ B_i \f$, 24, -126, 127) 239 | static inline int cpf_nexttowardf(float *X, const float *A, 240 | const long double *B, 241 | const size_t numelem, 242 | optstruct *fpopts); 243 | 244 | /* Minimum, maximum, difference functions. */ 245 | doc_cpf_bivariate(positive difference, \f$ X_i = \lvert A_i - B_i \rvert \f$, 246 | 24, -126, 127) 247 | static inline int cpf_fdimf(float *X, const float *A, const float *B, 248 | const size_t numelem, optstruct *fpopts); 249 | doc_cpf_bivariate(element-wise maximum, \f$ X_i = \mathrm{max}(A_i, B_i) \f$, 250 | 24, -126, 127) 251 | static inline int cpf_fmaxf(float *X, const float *A, const float *B, 252 | const size_t numelem, optstruct *fpopts); 253 | doc_cpf_bivariate(element-wise minimum, \f$ X_i = \mathrm{min}(A_i, B_i) \f$, 254 | 24, -126, 127) 255 | static inline int cpf_fminf(float *X, const float *A, const float *B, 256 | const size_t numelem, optstruct *fpopts); 257 | 258 | /* Classification. */ 259 | doc_cpf_fpclassify(24, -126, 127) 260 | static inline int cpf_fpclassifyf(int *r, const float *A, 261 | const size_t numelem, optstruct *fpopts); 262 | doc_cpf_isfun(finite, 24, -126, 127) 263 | static inline int cpf_isfinitef(int *r, const float *A, 264 | const size_t numelem, optstruct *fpopts); 265 | doc_cpf_isfun(infinite, 24, -126, 127) 266 | static inline int cpf_isinff(int *r, const float *A, 267 | const size_t numelem, optstruct *fpopts); 268 | doc_cpf_isfun(not a number, 24, -126, 127) 269 | static inline int cpf_isnanf(int *r, const float *A, 270 | const size_t numelem, optstruct *fpopts); 271 | doc_cpf_isfun(normal, 24, -126, 127) 272 | static inline int cpf_isnormalf(int *r, const float *A, 273 | const size_t numelem, optstruct *fpopts); 274 | 275 | /* Other functions. */ 276 | doc_cpf_univariate(absolute value, \f$ X_i = \lvert A_i \rvert \f$, 24, -126, 127) 277 | static inline int cpf_fabsf(float *X, const float *A, 278 | const size_t numelem, optstruct *fpopts); 279 | doc_cpf_trivariate(fused multiply-add , \f$ X_i = A_i \times B_i + C_i \f$, 280 | 24, -126, 127) 281 | static inline int cpf_fmaf(float *X, const float *A, const float *B, 282 | const float *C, const size_t numelem, 283 | optstruct *fpopts); 284 | 285 | /** @cond */ 286 | #define FUNSUFFIX f 287 | #define FPTYPE float 288 | #define INTTYPE uint32_t 289 | #define INTSUFFIX U 290 | 291 | #define DEFPREC 24 292 | #define DEFEMIN -126 293 | #define DEFEMAX 127 294 | #define NLEADBITS 9 295 | #define NBITS 32 296 | #define FULLMASK 0xFFFFFFFFU 297 | #define ABSMASK 0x7FFFFFFFU 298 | #define SIGNMASK 0x80000000U 299 | #define EXPMASK 0x7F800000U 300 | #define FRACMASK 0x007FFFFFU 301 | 302 | #ifdef PCG_VARIANTS_H_INCLUDED 303 | #define MAXRAND 0xFFFFFFFFU 304 | #define INITRAND(seed) pcg32_srandom_r(seed, time(NULL), (intptr_t)seed); 305 | #define ADVANCERAND(seed, thread, nloc) \ 306 | pcg32_advance_r(seed, thread * nloc - 1); 307 | #define GENRAND(seed) pcg32_random_r(seed) 308 | #else /* #ifdef PCG_VARIANTS_H_INCLUDED */ 309 | #warning "The default C random number generator is being used." 310 | #warning "Please compile with -include " 311 | #warning "and link with -L -lpcg_random." 312 | #define MAXRAND 0x7FFFFFFFU 313 | #ifdef _OPENMP 314 | #define INITRAND(seed) *seed = time(NULL); 315 | #define GEN_SINGLE_RAND(seed) ((INTTYPE)rand_r((unsigned int *)seed)) 316 | #else /*# ifdef _OPENMP */ 317 | #define INITRAND(seed) srand(time(NULL)); 318 | #define GEN_SINGLE_RAND(seed) ((INTTYPE)rand()) 319 | #endif /*# ifdef _OPENMP */ 320 | #endif /* #ifndef PCG_VARIANTS_H_INCLUDED */ 321 | 322 | #include "cpfloat_threshold_binary32.h" 323 | #include "cpfloat_template.h" 324 | /** @endcond */ 325 | 326 | #endif /* #ifndef _CPFLOAT_BINARY32_ */ 327 | 328 | /* 329 | * CPFloat - Custom Precision Floating-point numbers. 330 | * 331 | * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 332 | * 333 | * This library is free software; you can redistribute it and/or modify it under 334 | * the terms of the GNU Lesser General Public License as published by the Free 335 | * Software Foundation; either version 2.1 of the License, or (at your option) 336 | * any later version. 337 | * 338 | * This library is distributed in the hope that it will be useful, but WITHOUT 339 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 340 | * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 341 | * details. 342 | * 343 | * You should have received a copy of the GNU Lesser General Public License along 344 | * with this library; if not, write to the Free Software Foundation, Inc., 51 345 | * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 346 | */ 347 | -------------------------------------------------------------------------------- /src/cpfloat_binary64.h: -------------------------------------------------------------------------------- 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */ 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ 3 | 4 | /** 5 | * @file cpfloat_binary64.h 6 | * @brief CPFloat functions for `double` arrays. 7 | */ 8 | 9 | #ifndef _CPFLOAT_BINARY64_ 10 | #define _CPFLOAT_BINARY64_ 11 | 12 | #include "cpfloat_definitions.h" 13 | #include "cpfloat_docmacros.h" 14 | 15 | /* Validation of floating-point parameters. */ 16 | doc_cpfloat_validate_optstruct(double, 26, 53, -1022, 1023) 17 | static inline int cpfloat_validate_optstruct(const optstruct *fpopts); 18 | 19 | /* Rounding functions. */ 20 | doc_cpfloat(double, 53, -1022, 1023) 21 | static inline int cpfloat(double *X, const double *A, const size_t numelem, 22 | optstruct *fpopts); 23 | doc_cpfloat(double, 53, -1022, 1023) 24 | static inline int cpf_fpround(double *X, const double *A, 25 | const size_t numelem, optstruct *fpopts); 26 | 27 | /* Elementary arithmetic operations. */ 28 | doc_cpf_bivariate(sum, \f$ X_i = A_i + B_i \f$, 53, -1022, 1023) 29 | static inline int cpf_add(double *X, const double *A, const double *B, 30 | const size_t numelem, optstruct *fpopts); 31 | doc_cpf_bivariate(difference, \f$ X_i = A_i - B_i \f$, 53, -1022, 1023) 32 | static inline int cpf_sub(double *X, const double *A, const double *B, 33 | const size_t numelem, optstruct *fpopts); 34 | doc_cpf_bivariate(product, \f$ X_i = A_i \times B_i \f$, 53, -1022, 1023) 35 | static inline int cpf_mul(double *X, const double *A, const double *B, 36 | const size_t numelem, optstruct *fpopts); 37 | doc_cpf_bivariate(ratio, \f$ X_i = A_i / B_i \f$, 53, -1022, 1023) 38 | static inline int cpf_div(double *X, const double *A, const double *B, 39 | const size_t numelem, optstruct *fpopts); 40 | 41 | /* Trigonometric functions. */ 42 | doc_cpf_univariate(trigonometric cosine, \f$ X_i = \cos(A_i) \f$, 53, -1022, 1023) 43 | static inline int cpf_cos(double *X, const double *A, 44 | const size_t numelem, optstruct *fpopts); 45 | doc_cpf_univariate(trigonometric sine, \f$ X_i = \sin(A_i) \f$, 53, -1022, 1023) 46 | static inline int cpf_sin(double *X, const double *A, 47 | const size_t numelem, optstruct *fpopts); 48 | doc_cpf_univariate(trigonometric tangent, \f$ X_i = \tan(A_i) \f$, 53, -1022, 1023) 49 | static inline int cpf_tan(double *X, const double *A, 50 | const size_t numelem, optstruct *fpopts); 51 | 52 | doc_cpf_univariate(inverse trigonometric cosine, 53 | \f$ X_i = \mathrm{acos(A_i)} \f$, 53, -1022, 1023) 54 | static inline int cpf_acos(double *X, const double *A, 55 | const size_t numelem, optstruct *fpopts); 56 | doc_cpf_univariate(inverse trigonometric sine, 57 | \f$ X_i = \mathrm{asin}(A_i) \f$, 53, -1022, 1023) 58 | static inline int cpf_asin(double *X, const double *A, 59 | const size_t numelem, optstruct *fpopts); 60 | doc_cpf_univariate(inverse trigonometric tangent, 61 | \f$ X_i = \mathrm{atan}(A_i) \f$, 53, -1022, 1023) 62 | static inline int cpf_atan(double *X, const double *A, 63 | const size_t numelem, optstruct *fpopts); 64 | doc_cpf_bivariate(2-argument arctangent, 65 | \f$ X_i = \mathrm{atan}(B_i / A_i) \f$, 53, -1022, 1023) 66 | static inline int cpf_atan2(double *X, const double *A, const double *B, 67 | const size_t numelem, optstruct *fpopts); 68 | 69 | /* Hyperbolic functions. */ 70 | doc_cpf_univariate(hyperbolic cosine, \f$ X_i = \mathrm{cosh}(A_i) \f$, 71 | 53, -1022, 1023) 72 | static inline int cpf_cosh(double *X, const double *A, 73 | const size_t numelem, optstruct *fpopts); 74 | doc_cpf_univariate(hyperbolic sine, \f$ X_i = \mathrm{sinh}(A_i) \f$, 53, -1022, 1023) 75 | static inline int cpf_sinh(double *X, const double *A, 76 | const size_t numelem, optstruct *fpopts); 77 | doc_cpf_univariate(hyperbolic tangent , \f$ X_i = \mathrm{tanh}(A_i) \f$, 78 | 53, -1022, 1023) 79 | static inline int cpf_tanh(double *X, const double *A, 80 | const size_t numelem, optstruct *fpopts); 81 | 82 | doc_cpf_univariate(inverse hyperbolic cosine, 83 | \f$ X_i = \mathrm{arcosh}(A_i) \f$, 53, -1022, 1023) 84 | static inline int cpf_acosh(double *X, const double *A, 85 | const size_t numelem, optstruct *fpopts); 86 | doc_cpf_univariate(inverse hyperbolic sine, 87 | \f$ X_i = \mathrm{arsinh}(A_i) \f$, 53, -1022, 1023) 88 | static inline int cpf_asinh(double *X, const double *A, 89 | const size_t numelem, optstruct *fpopts); 90 | doc_cpf_univariate(inverse hyperbolic tangent, 91 | \f$ X_i = \mathrm{artanh}(A_i) \f$, 53, -1022, 1023) 92 | static inline int cpf_atanh(double *X, const double *A, 93 | const size_t numelem, optstruct *fpopts); 94 | 95 | /* Exponentiation and logarithmic functions. */ 96 | doc_cpf_univariate(exponential, \f$ X_i = \exp(A_i) \f$, 53, -1022, 1023) 97 | static inline int cpf_exp(double *X, const double *A, 98 | const size_t numelem, optstruct *fpopts); 99 | 100 | doc_cpf_frexp(53, -1022, 1023) 101 | static inline int cpf_frexp(double *X, int *exp, const double *A, 102 | const size_t numelem, optstruct *fpopts); 103 | doc_cpf_scaling(2, 53, -1022, 1023) 104 | static inline int cpf_ldexp(double *X, const double *A, const int *exp, 105 | const size_t numelem, optstruct *fpopts); 106 | doc_cpf_univariate(natural logarithm, \f$ X_i = \log(A_i) \f$, 53, -1022, 1023) 107 | static inline int cpf_log(double *X, const double *A, 108 | const size_t numelem, optstruct *fpopts); 109 | doc_cpf_univariate(base - 10 logarithm, \f$ X_i = \log_{10}(A_i) \f$, 53, -1022, 1023) 110 | static inline int cpf_log10(double *X, const double *A, 111 | const size_t numelem, optstruct *fpopts); 112 | doc_cpf_modf(53, -1022, 1023) 113 | static inline int cpf_modf(double *X, double *intpart, const double *A, 114 | const size_t numelem, optstruct *fpopts); 115 | doc_cpf_univariate(base-2 exponential, \f$ X_i = 2^{A_i} \f$, 53, -1022, 1023) 116 | static inline int cpf_exp2(double *X, const double *A, 117 | const size_t numelem, optstruct *fpopts); 118 | doc_cpf_univariate(exp(x) - 1, \f$ X_i = \exp(A_i) - 1 \f$, 53, -1022, 1023) 119 | static inline int cpf_expm1(double *X, const double *A, 120 | const size_t numelem, optstruct *fpopts); 121 | doc_cpf_ilogb(53, -1022, 1023) 122 | static inline int cpf_ilogb(int *exp, const double *A, 123 | const size_t numelem, optstruct *fpopts); 124 | 125 | doc_cpf_univariate(natural logarithm of number shifted by one, 126 | \f$ X_i = \log(1+A_i) \f$, 53, -1022, 1023) 127 | static inline int cpf_log1p(double *X, const double *A, 128 | size_t numelem, optstruct *fpopts); 129 | doc_cpf_univariate(base-2 logarithm, \f$ X_i = \log_2(A_i) \f$, 53, -1022, 1023) 130 | static inline int cpf_log2(double *X, const double *A, 131 | const size_t numelem, optstruct *fpopts); 132 | doc_cpf_univariate(base-FLT_RADIX logarithm of absolute value, 133 | \f$ X_i = \log(\lvert A_i \rvert) \f$, 53, -1022, 1023) 134 | static inline int cpf_logb(double *X, const double *A, 135 | const size_t numelem, optstruct *fpopts); 136 | doc_cpf_scaling(FLT\_RADIX, 53, -1022, 1023) 137 | static inline int cpf_scalbn(double *X, const double *A, const int *exp, 138 | const size_t numelem, optstruct *fpopts); 139 | doc_cpf_scaling(FLT\_RADIX, 53, -1022, 1023) 140 | static inline int cpf_scalbln(double *X, const double *A, 141 | const long int *exp, const size_t numelem, 142 | optstruct *fpopts); 143 | 144 | /* Power functions. */ 145 | doc_cpf_bivariate(real powers, \f$ X_i = A_i^{B_i} \f$, 53, -1022, 1023) 146 | static inline int cpf_pow(double *X, const double *A, const double *B, 147 | const size_t numelem, optstruct *fpopts); 148 | doc_cpf_univariate(square root, \f$ X_i = \sqrt{A_i} \f$, 53, -1022, 1023) 149 | static inline int cpf_sqrt(double *X, const double *A, 150 | const size_t numelem, optstruct *fpopts); 151 | doc_cpf_univariate(cube root, \f$ X_i = \sqrt[3]{A_i} \f$, 53, -1022, 1023) 152 | static inline int cpf_cbrt(double *X, const double *A, 153 | const size_t numelem, optstruct *fpopts); 154 | doc_cpf_bivariate(hypotenuse of a right-angle triangle, 155 | \f$ X_i = \sqrt{A_i^2 + B_i^2} \f$, 53, -1022, 1023) 156 | static inline int cpf_hypot(double *X, const double *A, const double *B, 157 | const size_t numelem, optstruct *fpopts); 158 | 159 | /* Error and gamma functions. */ 160 | doc_cpf_univariate(error function, \f$ X_i = \mathrm{erf}(A_i) \f$, 53, -1022, 1023) 161 | static inline int cpf_erf(double *X, const double *A, 162 | const size_t numelem, optstruct *fpopts); 163 | doc_cpf_univariate(complementary error function, 164 | \f$ X_i = \mathrm{erfc}(A_i) \f$, 53, -1022, 1023) 165 | static inline int cpf_erfc(double *X, const double *A, 166 | const size_t numelem, optstruct *fpopts); 167 | doc_cpf_univariate(gamma function, \f$ X_i = \Gamma(A_i) \f$, 53, -1022, 1023) 168 | static inline int cpf_tgamma(double *X, const double *A, 169 | const size_t numelem, optstruct *fpopts); 170 | doc_cpf_univariate(natural logarithm of absolute value of gamma function, 171 | \f$ X_i = \log(\lvert \Gamma(A_i) \rvert) \f$, 53, -1022, 1023) 172 | static inline int cpf_lgamma(double *X, const double *A, 173 | const size_t numelem, optstruct *fpopts); 174 | 175 | /* Rounding and remainder functions. */ 176 | doc_cpf_univariate(ceiling function, \f$ X_i = \lceil A_i \rceil \f$, 53, -1022, 1023) 177 | static inline int cpf_ceil(double *X, const double *A, 178 | const size_t numelem, optstruct *fpopts); 179 | doc_cpf_univariate(floor function, \f$ X_i = \lfloor A_i \rfloor \f$, 53, -1022, 1023) 180 | static inline int cpf_floor(double *X, const double *A, 181 | const size_t numelem, optstruct *fpopts); 182 | doc_cpf_bivariate(floating-point remainder of division, 183 | \f$ X_i = A_i \;\mathrm{mod}\; B_i \f$, 53, -1022, 1023) 184 | static inline int cpf_fmod(double *X, const double *A, const double *B, 185 | const size_t numelem, optstruct *fpopts); 186 | doc_cpf_univariate(integer truncation, \f$ X_i = \mathrm{trunc}(A_i) \f$, 187 | 53, -1022, 1023) 188 | static inline int cpf_trunc(double *X, const double *A, 189 | const size_t numelem, optstruct *fpopts); 190 | 191 | doc_cpf_univariate(closest integer (with round-to-nearest), 192 | \f$ X_i = \mathrm{round}(A_i) \f$, 53, -1022, 1023) 193 | static inline int cpf_round(double *X, const double *A, 194 | const size_t numelem, optstruct *fpopts); 195 | doc_cpf_univariate(closest integer (with round-to-nearest), 196 | \f$ X_i = \mathrm{round}(A_i) \f$, 53, -1022, 1023) 197 | static inline int cpf_lround(long *X, const double *A, 198 | const size_t numelem, optstruct *fpopts); 199 | doc_cpf_univariate_nobitflip(closest integer (with round-to-nearest), 200 | \f$ X_i = \mathrm{round}(A_i) \f$, 53, -1022, 1023) 201 | static inline int cpf_llround(long long *X, const double *A, 202 | const size_t numelem, optstruct *fpopts); 203 | 204 | doc_cpf_rint(53, -1022, 1023) 205 | static inline int cpf_rint(double *X, int *exception, const double *A, 206 | const size_t numelem, optstruct *fpopts); 207 | doc_cpf_rint(53, -1022, 1023) 208 | static inline int cpf_lrint(long *X, int *exception, const double *A, 209 | const size_t numelem, optstruct *fpopts); 210 | doc_cpf_rint(53, -1022, 1023) 211 | static inline int cpf_llrint(long long *X, int *exception, const double *A, 212 | const size_t numelem, optstruct *fpopts); 213 | doc_cpf_nearbyint(53, -1022, 1023) 214 | static inline int cpf_nearbyint(double *X, const double *A, 215 | const size_t numelem, optstruct *fpopts); 216 | doc_cpf_bivariate(remainder of the floating point division, 217 | \f$ X_i = A_i^2 - k \times B_i \f$ 218 | for largest \f$ k \f$ such that \f$ k \times B_i < A_i \f$, 219 | 53, -1022, 1023) 220 | static inline int cpf_remainder(double *X, const double *A, const double *B, 221 | const size_t numelem, optstruct *fpopts); 222 | 223 | doc_cpf_remquo(53, -1022, 1023) 224 | static inline int cpf_remquo(double *X, int *quot, 225 | const double *A, const double *B, 226 | const size_t numelem, optstruct *fpopts); 227 | 228 | /* Floating-point manipulation functions. */ 229 | doc_cpf_bivariate(number from magnitude and sign, 230 | \f$ X_i = \mathrm{sign}(A_i) * abs(B_i) \f$, 53, -1022, 1023) 231 | static inline int cpf_copysign(double *X, const double *A, const double *B, 232 | const size_t numelem, optstruct *fpopts); 233 | doc_cpf_bivariate(next floating-point number in specified direction, 234 | the floating-point number closest to \f$ A_i \f$ in the 235 | direction of \f$ B_i \f$, 53, -1022, 1023) 236 | static inline int cpf_nextafter(double *X, const double *A, const double *B, 237 | const size_t numelem, optstruct *fpopts); 238 | doc_cpf_bivariate(next floating-point number in specified direction, 239 | the floating-point number closest to \f$ A_i \f$ in the 240 | direction of \f$ B_i \f$, 53, -1022, 1023) 241 | static inline int cpf_nexttoward(double *X, const double *A, 242 | const long double *B, const size_t numelem, 243 | optstruct *fpopts); 244 | 245 | /* Minimum, maximum, difference functions. */ 246 | doc_cpf_bivariate(positive difference, \f$ X_i = \lvert A_i \rvert - B_i \f$, 247 | 53, -1022, 1023) 248 | static inline int cpf_fdim(double *X, const double *A, const double *B, 249 | const size_t numelem, optstruct *fpopts); 250 | doc_cpf_bivariate(element-wise maximum, \f$ X_i = \mathrm{max}(A_i, B_i) \f$, 251 | 53, -1022, 1023) 252 | static inline int cpf_fmax(double *X, const double *A, const double *B, 253 | const size_t numelem, optstruct *fpopts); 254 | doc_cpf_bivariate(element-wise minimum, \f$ X_i = \mathrm{min}(A_i, B_i) \f$, 255 | 53, -1022, 1023) 256 | static inline int cpf_fmin(double *X, const double *A, const double *B, 257 | const size_t numelem, optstruct *fpopts); 258 | 259 | /* Classification. */ 260 | doc_cpf_fpclassify(53, -1022, 1023) 261 | static inline int cpf_fpclassify(int *r, const double *A, 262 | const size_t numelem, optstruct *fpopts); 263 | doc_cpf_isfun(finite, 53, -1022, 1023) 264 | static inline int cpf_isfinite(int *r, const double *A, 265 | const size_t numelem, optstruct *fpopts); 266 | doc_cpf_isfun(infinite, 53, -1022, 1023) 267 | static inline int cpf_isinf(int *r, const double *A, 268 | const size_t numelem, optstruct *fpopts); 269 | doc_cpf_isfun(not a number, 53, -1022, 1023) 270 | static inline int cpf_isnan(int *r, const double *A, 271 | const size_t numelem, optstruct *fpopts); 272 | doc_cpf_isfun(normal, 53, -1022, 1023) 273 | static inline int cpf_isnormal(int *r, const double *A, 274 | const size_t numelem, optstruct *fpopts); 275 | 276 | /* Other functions. */ 277 | doc_cpf_univariate(absolute value, \f$ X_i = \lvert A_i \rvert \f$, 53, -1022, 1023) 278 | static inline int cpf_fabs(double *X, const double *A, 279 | const size_t numelem, optstruct *fpopts); 280 | doc_cpf_trivariate(fused multiply-add , \f$ X_i = A_i \times B_i + C_i \f$, 281 | 53, -1022, 1023) 282 | static inline int cpf_fma(double *X, const double *A, const double *B, 283 | const double *C, const size_t numelem, 284 | optstruct *fpopts); 285 | 286 | /** @cond */ 287 | #define FUNSUFFIX 288 | #define FPTYPE double 289 | #define INTTYPE uint64_t 290 | #define INTSUFFIX ULL 291 | #define DEFPREC 53 292 | #define DEFEMIN -1022 293 | #define DEFEMAX 1023 294 | #define NLEADBITS 12 295 | #define NBITS 64 296 | #define FULLMASK 0xFFFFFFFFFFFFFFFFULL 297 | #define ABSMASK 0x7FFFFFFFFFFFFFFFULL 298 | #define SIGNMASK 0x8000000000000000ULL 299 | #define EXPMASK 0x7FF0000000000000ULL 300 | #define FRACMASK 0x000FFFFFFFFFFFFFULL 301 | 302 | #ifdef PCG_VARIANTS_H_INCLUDED 303 | #define MAXRAND 0xFFFFFFFFFFFFFFFFULL 304 | #define INITRAND(seed) pcg64_srandom_r(seed, time(NULL), (intptr_t)seed); 305 | #define ADVANCERAND(seed, thread, nloc) pcg64_advance_r(seed, thread *nloc - 1); 306 | #define GENRAND(seed) pcg64_random_r(seed) 307 | #else /* #ifdef PCG_VARIANTS_H_INCLUDED */ 308 | #warning "The default C random number generator is being used." 309 | #warning "Please compile with -include " 310 | #warning "and link with -L -lpcg_random." 311 | #define MAXRAND 0x3FFFFFFFFFFFFFFFULL 312 | #ifdef _OPENMP 313 | #define INITRAND(seed) *seed = time(NULL); 314 | #define GEN_SINGLE_RAND(seed) \ 315 | ((INTTYPE)rand_r((unsigned int *)seed) + \ 316 | ((INTTYPE)rand_r((unsigned int *)seed) << 31)) 317 | #else /*# ifdef _OPENMP */ 318 | #define INITRAND(seed) srand(time(NULL)); 319 | #define GEN_SINGLE_RAND(seed) ((INTTYPE)rand() + ((INTTYPE)rand() << 31)) 320 | #endif /*# ifdef _OPENMP */ 321 | #endif /* #ifdef PCG_VARIANTS_H_INCLUDED */ 322 | 323 | #include "cpfloat_threshold_binary64.h" 324 | #include "cpfloat_template.h" 325 | /** @endcond */ 326 | 327 | #endif /* #ifndef _CPFLOAT_BINARY64_ */ 328 | 329 | /* 330 | * CPFloat - Custom Precision Floating-point numbers. 331 | * 332 | * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 333 | * 334 | * This library is free software; you can redistribute it and/or modify it under 335 | * the terms of the GNU Lesser General Public License as published by the Free 336 | * Software Foundation; either version 2.1 of the License, or (at your option) 337 | * any later version. 338 | * 339 | * This library is distributed in the hope that it will be useful, but WITHOUT 340 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 341 | * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 342 | * details. 343 | * 344 | * You should have received a copy of the GNU Lesser General Public License along 345 | * with this library; if not, write to the Free Software Foundation, Inc., 51 346 | * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 347 | */ 348 | -------------------------------------------------------------------------------- /src/cpfloat_definitions.h: -------------------------------------------------------------------------------- 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */ 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ 3 | 4 | /** 5 | * @file cpfloat_definitions.h 6 | * @brief Definition of CPFloat data types. 7 | * 8 | * @details This file includes all the external header files used by CPFloat, 9 | * defines the enumerated types 10 | * 11 | * + @ref cpfloat_explim_t, 12 | * + @ref cpfloat_infinity_t, 13 | * + @ref cpfloat_rounding_t, 14 | * + @ref cpfloat_saturation_t, 15 | * + @ref cpfloat_softerr_t, 16 | * + @ref cpfloat_subnormal_t, 17 | * 18 | * and the structured data type @ref optstruct. It is not necessary to include 19 | * this file in order to use CPFloat, as it is already included by @ref 20 | * cpfloat_binary32.h and by @ref cpfloat_binary64.h. 21 | */ 22 | 23 | #ifndef _CHOPFAST_DEFINITIONS_ 24 | #define _CHOPFAST_DEFINITIONS_ 25 | 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | /* #include "pcg_variants.h" */ 37 | 38 | #if defined(_OPENMP) 39 | #include 40 | #endif /* #if defined(_OPENMP) */ 41 | 42 | /** 43 | * @brief Prefix for all library function names. 44 | */ 45 | #define MAINFUNNAME cpf 46 | 47 | /** 48 | * @brief Data type for specifying number of precision bits in target format. 49 | */ 50 | typedef unsigned int cpfloat_precision_t; 51 | 52 | /** 53 | * @brief Data type for specifying exponents in target format. 54 | */ 55 | typedef int cpfloat_exponent_t; 56 | 57 | /** 58 | * @brief Extended exponent range modes available in CPFloat. 59 | */ 60 | typedef enum { 61 | /** Use exponent range of storage format. */ 62 | CPFLOAT_EXPRANGE_STOR = 0, 63 | /** Use exponent range of target format. */ 64 | CPFLOAT_EXPRANGE_TARG = 1 65 | } cpfloat_explim_t; 66 | 67 | /** 68 | * @brief Infinity support modes available in CPFloat. 69 | */ 70 | typedef enum { 71 | /** Use infinities in target format. */ 72 | CPFLOAT_INF_NO = 0, 73 | /** Replace infinities with NaNs in target format. */ 74 | CPFLOAT_INF_USE = 1, 75 | } cpfloat_infinity_t; 76 | 77 | /** 78 | * @brief Rounding modes available in CPFloat. 79 | */ 80 | typedef enum { 81 | /** Use round-to-nearest with ties-to-away. */ 82 | CPFLOAT_RND_NA = -1, 83 | /** Use round-to-nearest with ties-to-zero. */ 84 | CPFLOAT_RND_NZ = 0, 85 | /** Use round-to-nearest with ties-to-even. */ 86 | CPFLOAT_RND_NE = 1, 87 | /** Use round-toward-+∞. */ 88 | CPFLOAT_RND_TP = 2, 89 | /** Use round-toward-−∞. */ 90 | CPFLOAT_RND_TN = 3, 91 | /** Use round toward zero */ 92 | CPFLOAT_RND_TZ = 4, 93 | /** Stochastic rounding with proportional probabilities. */ 94 | CPFLOAT_RND_SP = 5, 95 | /** Stochastic rounding with equal probabilities. */ 96 | CPFLOAT_RND_SE = 6, 97 | /** Use round-to-odd. */ 98 | CPFLOAT_RND_OD = 7, 99 | /** Do not perform rounding. */ 100 | CPFLOAT_NO_RND = 8, 101 | } cpfloat_rounding_t; 102 | 103 | /** 104 | * @brief Saturation modes available in CPFloat. 105 | */ 106 | typedef enum { 107 | /** Use standard arithmetic. */ 108 | CPFLOAT_SAT_NO = 0, 109 | /** Use saturation arithmetic. */ 110 | CPFLOAT_SAT_USE = 1, 111 | } cpfloat_saturation_t; 112 | 113 | /** 114 | * @brief Soft fault simulation modes available in CPFloat. 115 | */ 116 | typedef enum { 117 | /** Do not introduce soft errors. */ 118 | CPFLOAT_SOFTERR_NO = 0, 119 | /** Soft errors in fraction of target-format floating-point representation.*/ 120 | CPFLOAT_SOFTERR_FRAC = 1, 121 | /** Soft errors anywhere in target-format floating-point representation. */ 122 | CPFLOAT_SOFTERR_FP = 2 123 | } cpfloat_softerr_t; 124 | 125 | /** 126 | * @brief Subnormal support modes available in CPFloat. 127 | */ 128 | typedef enum { 129 | /** Round subnormal numbers according to the current rounding mode. */ 130 | CPFLOAT_SUBN_RND = 0, 131 | /** Support storage of subnormal numbers. */ 132 | CPFLOAT_SUBN_USE = 1 133 | } cpfloat_subnormal_t; 134 | 135 | /** @cond */ 136 | #ifdef PCG_VARIANTS_H_INCLUDED 137 | #define CPFLOAT_BITSEEDTYPE pcg32_random_t 138 | #define CPFLOAT_RANDSEEDTYPEF pcg32_random_t 139 | #define CPFLOAT_RANDSEEDTYPE pcg64_random_t 140 | #else /* #ifdef PCG_VARIANTS_H_INCLUDED */ 141 | #define CPFLOAT_BITSEEDTYPE unsigned int 142 | #define CPFLOAT_RANDSEEDTYPEF size_t 143 | #define CPFLOAT_RANDSEEDTYPE size_t 144 | #endif /* #ifdef PCG_VARIANTS_H_INCLUDED */ 145 | /** @endcond */ 146 | 147 | /** 148 | * @brief Internal state of the pseudo-random bit generator. 149 | */ 150 | typedef CPFLOAT_BITSEEDTYPE cpfloat_bitseed_t; 151 | 152 | /** 153 | * @brief Internal state of the pseudo-random `float` generator. 154 | */ 155 | typedef CPFLOAT_RANDSEEDTYPEF cpfloat_randseedf_t; 156 | 157 | /** 158 | * @brief Internal state of the pseudo-random `double` generator. 159 | */ 160 | typedef CPFLOAT_RANDSEEDTYPE cpfloat_randseed_t; 161 | 162 | /** 163 | * @brief Specify target format, rounding mode, and occurrence of soft faults. 164 | * 165 | * @details The fields of this structure determine the parameters of the 166 | * floating-point format to be simulated, the rounding mode to be used during 167 | * the conversion process, and whether soft faults striking the rounded numbers 168 | * should be simulated. 169 | */ 170 | typedef struct { 171 | /** 172 | * @brief String specifying target format. 173 | * 174 | * @details This field is defined only for compatibility with the MATLAB 175 | * function `chop`, and its value is used by the MEX interface but ignored by 176 | * the pure C implementation. 177 | * 178 | * Possible values are: 179 | * + `q43`, `e4m3`, `E4M3` for E4M3 (4-bit exponent, 4-bit significand); 180 | * + `q52`, `e5m2`, `E5M2` for E5M2 (5-bit exponent, 2-bit significand); 181 | * + `b`, `bf16`, `bfloat16` for bfloat16; 182 | * + `h`, `fp16`, `binary16`, `half` for binary16; 183 | * + `t`, `tf32`, `TensorFloat-32`, for TensorFloat-32; 184 | * + `s`, `fp32`, `binary32`, `single` for binary32; 185 | * + `d`, `fp64`, `binary64`, `double` for binary64; and 186 | * + `custom`, `c` for a format specifying `precision`, `emin`, and `emax`. 187 | * 188 | * The validation functions cpfloatf_validate_optstruct() and 189 | * cpfloat_validate_optstruct() return a warning code if this field is not set 190 | * to either the empty string or one of the strings above. 191 | */ 192 | char format [15]; 193 | /** 194 | * @brief Bits of precision of target format. 195 | * 196 | * @details The maximum values allowed are 24 and 53 if the storage format is 197 | * `float` or `double`, respectively. 198 | * 199 | * For compatibility with the MATLAB function `chop`, in the MEX interface the 200 | * number of digits of precision for `float` and `double` cannot exceed 11 and 201 | * 25, respectively, when using stochastic rounding, and cannot exceed 23 and 202 | * 52, respectively, for other rounding modes. The C implementation does not 203 | * have any such restrictions, but using larger values can cause double 204 | * rounding. 205 | * 206 | * The validation functions cpfloatf_validate_optstruct() and 207 | * cpfloat_validate_optstruct() return an error code if the required number of 208 | * digits is larger than the maximum allowed by the storage format, and a 209 | * warning code if the required number of digits is above the maximum allowed 210 | * by the MEX interface. 211 | */ 212 | cpfloat_precision_t precision; 213 | /** 214 | * @brief Minimum exponent of target format. 215 | * 216 | * @details The minimum values allowed are -126 and -1022 if the storage 217 | * format is `float` or `double`, respectively. If a smaller value is chosen, 218 | * it is changed to the minimum allowed value without warning. This field is 219 | * ignored unless `explim` is set to `CPFLOAT_EXPRANGE_TARG`. 220 | * 221 | * The validation functions cpfloatf_validate_optstruct() and 222 | * cpfloat_validate_optstruct() return an error code if the required minimum 223 | * exponent is smaller than the minimum allowed by the storage format. 224 | */ 225 | cpfloat_exponent_t emin; 226 | /** 227 | * @brief Maximum exponent of target format. 228 | * 229 | * @details The maximum values allowed are 127 and 1023 if the storage format 230 | * is `float` or `double`, respectively. If a larger value is chosen, it is 231 | * changed to the maximum allowed value without warning. This field is ignored 232 | * unless `explim` is set to `CPFLOAT_EXPRANGE_TARG`. 233 | * 234 | * The validation functions cpfloatf_validate_optstruct() and 235 | * cpfloat_validate_optstruct() return an error code if the required maximum 236 | * exponent is larger than the maximum allowed by the storage format. 237 | */ 238 | cpfloat_exponent_t emax; 239 | /** 240 | * @brief Support for extended exponents in target format. 241 | * 242 | * @details The upper limit of the exponent range is set to `emax` if this 243 | * field is set to `CPFLOAT_EXPRANGE_TARG`, and to the upper limit of the 244 | * exponent range of the storage format if it is set to 245 | * `CPFLOAT_EXPRANGE_STOR`. 246 | */ 247 | cpfloat_explim_t explim; 248 | /** 249 | * @brief Support for infinities in target format. 250 | * 251 | * @details If this field is set to `CPFLOAT_INF_USE`, the target format 252 | * supports signed infinities. If the field is set to `CPFLOAT_INF_NO`, 253 | * infinities are replaced with a quiet NaN. 254 | */ 255 | cpfloat_infinity_t infinity; 256 | /** 257 | * @brief Rounding mode to be used for the conversion. 258 | * 259 | * @details The values of this field are consistent with those of the MATLAB 260 | *function `chop`. 261 | * 262 | * Possible values are: 263 | * + CPFLOAT_RND_NA for round-to-nearest with ties-to-away; 264 | * + CPFLOAT_RND_NZ for round-to-nearest with ties-to-zero; 265 | * + CPFLOAT_RND_NE for round-to-nearest with ties-to-even; 266 | * + CPFLOAT_RND_TP for round-to-+∞ 267 | * + CPFLOAT_RND_TN for round-to-−∞ 268 | * + CPFLOAT_RND_TZ for round-to-zero; 269 | * + CPFLOAT_RND_SP for stochastic rounding with proportional probabilities; 270 | * + CPFLOAT_RND_SE for stochastic rounding with equal probabilities; 271 | * + CPFLOAT_RND_OD for round-to-odd; and 272 | * + CPFLOAT_NO_RND for no rounding. 273 | * 274 | * No rounding is performed if this field is set to any other value. 275 | * 276 | * The validation functions cpfloatf_validate_optstruct() and 277 | * cpfloat_validate_optstruct() return a warning code if a value other than 278 | * those in the list above is specified. 279 | */ 280 | cpfloat_rounding_t round; 281 | /** 282 | * @brief Support for saturation arithmetic in target format. 283 | * 284 | * @details If this field is set to `CPFLOAT_SAT_USE`, numbers too large to be 285 | * represented in the target format are clamped to the largest floating-point 286 | * number of appropriate sign. If this field is set to `CPFLOAT_SAT_NO`, 287 | * numbers that are too large to be represented are rounded to either the 288 | * largest normal value of appropriate sign or the closest infinity according 289 | * to the current rounding mode. 290 | */ 291 | cpfloat_saturation_t saturation; 292 | /** 293 | * @brief Support for subnormal numbers in target format. 294 | * 295 | * @details Subnormal numbers are supported if this field is set to 296 | * `CPFLOAT_SUBN_USE` and rounded to a normal number according to the current 297 | * rounding mode if it is set to `CPFLOAT_SUBN_RND`. 298 | */ 299 | cpfloat_subnormal_t subnormal; 300 | 301 | /* Bit flips. */ 302 | /** 303 | * @brief Support for soft errors. 304 | * 305 | * @details If this field is not set to `CPFLOAT_SOFTERR_NO`, a single bit 306 | * flip is introduced in the binary floating-point representation of the 307 | * rounded result with probability `p`. The bit flip can strike only the 308 | * target-format fraction (significand without the implicit bit) if this field 309 | * is set to `CPFLOAT_SOFTERR_FRAC` and any bit in the target-format 310 | * representation if it is set to `CPFLOAT_SOFTERR_FP`. 311 | */ 312 | cpfloat_softerr_t flip; 313 | /** 314 | * @brief Probability of bit flips. 315 | * 316 | * @details The probability of flipping a single bit in the binary 317 | * floating-point representation or in the fraction (significand without the 318 | * implicit bit) of a number after rounding. This field is ignored if `flip` 319 | * is set to `CPFLOAT_SOFTERR_NO`. 320 | * 321 | * The validation functions cpfloatf_validate_optstruct() and 322 | * cpfloat_validate_optstruct() return an error code if `flip` is set to 323 | * `CPFLOAT_FP_SOFTERR` or `CPFLOAT_SOFTERR_FRAC` and this field does not 324 | * contain a number in the interval [0,1]. 325 | */ 326 | double p; 327 | 328 | /* Internal: state of pseudo-random number generator. */ 329 | /** 330 | * @brief Internal state of pseudo-random number generator for single bits. 331 | * 332 | * @details This field is used to store the internal state of the random 333 | * number generator used when @ref round is set to `CPFLOAT_RND_SE`. This 334 | * value should be initialized to `NULL`. 335 | */ 336 | cpfloat_bitseed_t *bitseed; 337 | /** 338 | * @brief Internal state of pseudo-random number generator for `float`s. 339 | * 340 | * @details This field is used to store the internal state of the random 341 | * number generator used when @ref round is set to `CPFLOAT_RND_SP` and 342 | * `float` arrays are used. This value should be initialized to `NULL`. 343 | */ 344 | cpfloat_randseedf_t *randseedf; 345 | /** 346 | * @brief Internal state of pseudo-random number generator for `double`s. 347 | * 348 | * @details This field is used to store the internal state of the random 349 | * number generator used when @ref round is set to `CPFLOAT_RND_SP` and 350 | * `double` arrays are used. This value should be initialized to `NULL`. 351 | */ 352 | cpfloat_randseed_t *randseed; 353 | } optstruct; 354 | 355 | /** 356 | @brief Allocate @ref optstruct struct to store parameters of target format. 357 | 358 | @details This function allocates and initializes an @ref optstruct struct. 359 | 360 | @return The function returns a pointer to the allocated memory if the 361 | execution was successful, and @b NULL otherwise.

362 | */ 363 | optstruct *init_optstruct(); 364 | 365 | /** 366 | @brief Free the memory underlying an @ref optstruct struct. 367 | 368 | @details This function attempts to free all the memory used by @p fpopts. 369 | 370 | @param[in] fpopts Pointer to @ref optstruct struct to be deallocated. 371 | 372 | @return The function returns @p 0 if the unless @p fpopts is set to @p NULL, 373 | in which case it return @p -1.

374 | */ 375 | int free_optstruct(optstruct *fpopts); 376 | 377 | #endif /* #ifndef _CHOPFAST_DEFINITIONS_ */ 378 | 379 | /* 380 | * CPFloat - Custom Precision Floating-point numbers. 381 | * 382 | * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 383 | * 384 | * This library is free software; you can redistribute it and/or modify it under 385 | * the terms of the GNU Lesser General Public License as published by the Free 386 | * Software Foundation; either version 2.1 of the License, or (at your option) 387 | * any later version. 388 | * 389 | * This library is distributed in the hope that it will be useful, but WITHOUT 390 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 391 | * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 392 | * details. 393 | * 394 | * You should have received a copy of the GNU Lesser General Public License along 395 | * with this library; if not, write to the Free Software Foundation, Inc., 51 396 | * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 397 | */ 398 | -------------------------------------------------------------------------------- /src/cpfloat_docmacros.h: -------------------------------------------------------------------------------- 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */ 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ 3 | 4 | #ifndef _CPFLOAT_DOCMACROS_ 5 | #define _CPFLOAT_DOCMACROS_ 6 | 7 | #define doc_cpfloat_validate_optstruct(FPTYPE, PMIN, PMAX, EMIN, EMAX) \ 8 | /** \ 9 | @brief Validate fields of @ref optstruct struct for `FPTYPE` storage format. \ 10 | \ 11 | @details This function checks whether the parameters stored in @p fpopts are \ 12 | valid when `FPTYPE` is used as storage format. \ 13 | \ 14 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 15 | and the probability of soft errors striking the rounded values. \ 16 | \ 17 | @return The function returns @b 0 if all the parameters are valid, and a \ 18 | positive number if at least one of them is not. A negative number should be \ 19 | understood as a warning, and indicates that a CPFloat function @em will \ 20 | return @p 0 if @p fpopts is used as fourth argument, but might not perform as \ 21 | intended.

\ 22 | \ 23 | Possible return values are: \ 24 | \li @b -4 The rounding mode specified in @p fpopts->round does not correspond \ 25 | to a valid choice, thus no rounding will be performed. \ 26 | \li @b -2 The required number of digits in @p fpopts->precision is between \ 27 | PMIN and PMAX inclusive, which might cause double rounding if round-to-\ 28 | nearest is used. \ 29 | \li @b -1 The string in @p fpopts->format is not valid. This is not an error \ 30 | as this value is not used by the C functions, but only by the MEX interface. \ 31 | \li @b 0 All the parameters in @p fpopts are valid. \ 32 | \li @b 2 The required number of digits in @p fpopts->precision is larger \ 33 | than PMAX, the number of significant digits in a variable of type `FPTYPE`. \ 34 | \li @b 3 The required minimum exponent in @p fpopts->emin is larger than \ 35 | EMIN, the largest possible exponent for a variable of type `FPTYPE`, or \ 36 | the required maximum exponent in @p fpopts->emax is larger than \ 37 | EMAX, the largest possible exponent for a variable of type `FPTYPE`. \ 38 | \li @b 5 The value of @p fpopts->flip indicates that soft errors should be \ 39 | introduced, but @p fpopts->p is not a real number between 0 and 1 and thus \ 40 | does not represent a valid probability.

\ 41 | \ 42 | Errors take precedence over warnings, thus a nonpositive return value \ 43 | implies no errors. In case of multiple issues, the return value is that of \ 44 | the first error (or warning, if no error is present) encountered in the order \ 45 | given in the list above. \ 46 | */ 47 | 48 | #define doc_cpfloat(FPTYPE, PMAX, EMIN, EMAX) \ 49 | /** \ 50 | @brief Round `FPTYPE` array to lower precision. \ 51 | \ 52 | @details If the function executes without errors, then the array @p X \ 53 | contains the @p numelem entries of the array @p A rounded to a \ 54 | lower-precision target format. The parameters of the target format and the \ 55 | rounding mode to be used are encoded in @p fpopts. If required, the function \ 56 | flips one bit in some of the entries of @p X.

\ 57 | \ 58 | If OpenMP support is specified at compile time, several OpenMP threads are \ 59 | used if @p numelem is large enough. This parameter is machine-dependent.

\ 60 | \ 61 | @param[out] X Array of rounded values. \ 62 | @param[in] A Input array. \ 63 | @param[in] numelem Number of elements in @p X and @p A. \ 64 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 65 | and the probability of soft errors striking the rounded values. \ 66 | \ 67 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 68 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 69 | than EMAX, and @b 0 otherwise. \ 70 | */ 71 | 72 | #define doc_cpf_univariate(MATHFUN, FUNSTRING, PMAX, EMIN, EMAX) \ 73 | /** \ 74 | @brief Compute MATHFUN rounded to lower precision. \ 75 | \ 76 | @details If the function executes without errors, then 77 | FUNSTRING \ 78 | rounded to a lower-precision target format. The parameters of the \ 79 | target format and the rounding mode to be used are encoded in @p fpopts. If \ 80 | required, the function flips one bit in some of the entries of @p X.

\ 81 | \ 82 | If OpenMP support is specified at compile time, several OpenMP threads are \ 83 | used if @p numelem is large enough. This parameter is machine dependent.\ 84 | \ 85 | @param[out] X Array of rounded values. \ 86 | @param[in] A Input array. \ 87 | @param[in] numelem Number of elements in @p X and @p A. \ 88 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 89 | and the probability of soft errors striking the rounded values. \ 90 | \ 91 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 92 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 93 | than EMAX, and @b 0 otherwise. \ 94 | */ 95 | 96 | #define doc_cpf_univariate_nobitflip(MATHFUN, FUNSTRING, PMAX, EMIN, EMAX) \ 97 | /** \ 98 | @brief Compute MATHFUN in lower precision. \ 99 | \ 100 | @details If the function executes without errors, then 101 | FUNSTRING \ 102 | rounded to a lower-precision target format. The parameters of the \ 103 | target format and the rounding mode to be used are encoded in @p fpopts.

\ 104 | \ 105 | If OpenMP support is specified at compile time, several OpenMP threads are \ 106 | used if @p numelem is large enough. This parameter is machine dependent.\ 107 | \ 108 | @param[out] X Array of rounded values. \ 109 | @param[in] A Input array. \ 110 | @param[in] numelem Number of elements in @p X and @p A. \ 111 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 112 | and the probability of soft errors striking the rounded values. \ 113 | \ 114 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 115 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 116 | than EMAX, and @b 0 otherwise. \ 117 | */ 118 | 119 | #define doc_cpf_bivariate(MATHFUN, FUNSTRING, PMAX, EMIN, EMAX) \ 120 | /** \ 121 | @brief Compute MATHFUN in lower precision. \ 122 | \ 123 | @details If the function executes without errors, then \ 124 | FUNSTRING \ 125 | rounded to a lower-precision target format. The parameters of the \ 126 | target format and the rounding mode to be used are encoded in @p fpopts. If \ 127 | required, the function flips one bit in some of the entries of @p X.

\ 128 | \ 129 | If OpenMP support is specified at compile time, several OpenMP threads are \ 130 | used if @p numelem is large enough. This parameter is machine dependent.\ 131 | \ 132 | @param[out] X Array of rounded values. \ 133 | @param[in] A Input array. \ 134 | @param[in] B Input array. \ 135 | @param[in] numelem Number of elements in @p X, @p A, and @p B. \ 136 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 137 | and the probability of soft errors striking the rounded values. \ 138 | \ 139 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 140 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 141 | than EMAX, and @b 0 otherwise. \ 142 | */ 143 | 144 | #define doc_cpf_trivariate(MATHFUN, FUNSTRING, PMAX, EMIN, EMAX) \ 145 | /** \ 146 | @brief Compute MATHFUN in lower precision. \ 147 | \ 148 | @details If the function executes without errors, then \ 149 | FUNSTRING \ 150 | rounded to a lower-precision target format. The parameters of the \ 151 | target format and the rounding mode to be used are encoded in @p fpopts. If \ 152 | required, the function flips one bit in some of the entries of @p X.

\ 153 | \ 154 | If OpenMP support is specified at compile time, several OpenMP threads are \ 155 | used if @p numelem is large enough. This parameter is machine dependent.\ 156 | \ 157 | @param[out] X Array of rounded values. \ 158 | @param[in] A Input array. \ 159 | @param[in] B Input array. \ 160 | @param[in] C Input array. \ 161 | @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \ 162 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 163 | and the probability of soft errors striking the rounded values. \ 164 | \ 165 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 166 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 167 | than EMAX, and @b 0 otherwise. \ 168 | */ 169 | 170 | #define doc_cpf_frexp(PMAX, EMIN, EMAX) \ 171 | /** \ 172 | @brief Exponent and normalized fraction of rounded floating-point number. \ 173 | \ 174 | @details If the function executes without errors, then: \ 175 | \li if \f$ A_i \f$ is 0, then \f$ X_i \f$ and \f$ \exp_i \f$ are both set to \ 176 | zero;\ 177 | \li otherwise, \f$ X_i \f$ is a value in the range \f$ (-1;-0.5] \cup 178 | [0.5; 1) \f$ and \f$ \exp_i \f$ is an integer such that \f$ 2^{\exp_i} \ 179 | \times X_i \f$ is equal to \f$ A_i \f$ rounded to a lower-precision target \ 180 | format.

\ 181 | \ 182 | The parameters of the target format and the rounding mode to be used are \ 183 | encoded in @p fpopts. If \ required, the function flips one bit in some of \ 184 | the entries of @p X.

\ 185 | \ 186 | If OpenMP support is specified at compile time, several OpenMP threads are \ 187 | used if @p numelem is large enough. This parameter is machine dependent.\ 188 | \ 189 | @param[out] X Array of floating-point values in \ 190 | \f$ (-1;-0.5] \f$, \f$ [0.5; 1) \f$. \ 191 | @param[out] exp Array of integer exponents. \ 192 | @param[in] A Input array. \ 193 | @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \ 194 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 195 | and the probability of soft errors striking the rounded values. \ 196 | \ 197 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 198 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 199 | than EMAX, and @b 0 otherwise. \ 200 | */ 201 | 202 | #define doc_cpf_scaling(BASE, PMAX, EMIN, EMAX) \ 203 | /** \ 204 | @brief Scale number by power of BASE in lower precision. \ 205 | \ 206 | @details If the function executes without errors, then \f$ X_i = A_i \times \ 207 | \mathrm{BASE}^{\exp_i} \f$ rounded to a lower-precision target format. \ 208 | The parameters of the target format and the rounding mode to be used are \ 209 | encoded in @p fpopts. If required, the function flips one bit in some of the \ 210 | entries of @p X.

\ 211 | \ 212 | If OpenMP support is specified at compile time, several OpenMP threads are \ 213 | used if @p numelem is large enough. This parameter is machine dependent.\ 214 | \ 215 | @param[out] X Array of rounded values. \ 216 | @param[in] A Input array. \ 217 | @param[in] exp Array of integer exponents. \ 218 | @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \ 219 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 220 | and the probability of soft errors striking the rounded values. \ 221 | \ 222 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 223 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 224 | than EMAX, and @b 0 otherwise. \ 225 | */ 226 | 227 | #define doc_cpf_modf(PMAX, EMIN, EMAX) \ 228 | /** \ 229 | @brief Compute integral and fractional part. \ 230 | \ 231 | @details If the function executes without errors, then \f$ X_i \f$ is a value \ 232 | the range \f$ (-1,1) \f$ and \f$ \mathrm{intpart}_i \f$ is an integer such \ 233 | that \f$ X_i + \mathrm{intpart}_i \f$ is equal to \f$ A_i \f$ rounded to a \ 234 | lower-precision target format. The parameters of the target format and the \ 235 | rounding mode to be used are encoded in @p fpopts. If required, the function \ 236 | flips one bit in some of the entries of @p X.

\ 237 | \ 238 | If OpenMP support is specified at compile time, several OpenMP threads are \ 239 | used if @p numelem is large enough. This parameter is machine dependent.\ 240 | \ 241 | @param[out] X Array of floating-point values in (-1, 1). \ 242 | @param[out] intpart Array of integer parts. \ 243 | @param[in] A Input array. \ 244 | @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \ 245 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 246 | and the probability of soft errors striking the rounded values. \ 247 | \ 248 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 249 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 250 | than EMAX, and @b 0 otherwise. \ 251 | */ 252 | 253 | #define doc_cpf_ilogb(PMAX, EMIN, EMAX) \ 254 | /** \ 255 | @brief Compute integral part of the logarithm of the absolute value. \ 256 | \ 257 | @details If the function executes without errors, the integer \f$ \exp_i \f$ \ 258 | is the exponent used internally to express the floating-point value \ 259 | \f$ A_i \f$ rounded to a lower-precision target format. In other words, \ 260 | \f$ X_i \f$ is equal to \f$ \mathrm{trunc}(\log_b^{\lvert A_i \rvert}) \f$ \ 261 | where \f$ b = \mathrm{FLT\_RADIX} \f$ is typically 2. The parameters of the \ 262 | target format and the rounding mode to be used are encoded in @p fpopts.

\ 263 | \ 264 | If OpenMP support is specified at compile time, several OpenMP threads are \ 265 | used if @p numelem is large enough. This parameter is machine dependent.\ 266 | \ 267 | @param[out] exp Array of floating-point values in \f$ (-1, 1) \f$. \ 268 | @param[in] A Input array. \ 269 | @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \ 270 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 271 | and the probability of soft errors striking the rounded values. \ 272 | \ 273 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 274 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 275 | than EMAX, and @b 0 otherwise. \ 276 | */ 277 | 278 | #define doc_cpf_rint(PMAX, EMIN, EMAX) \ 279 | /** \ 280 | @brief Compute the closest integer with specified rounding mode. \ 281 | \ 282 | @details If the function executes without errors, then \f$ X_i \f$ is the \ 283 | integral part of \f$ A_i \f$ rounded to a lower-precision target format and \ 284 | \f$ \mathrm{exception}_i \f$ is set to 0 if \f$ X_i \f$ is equal to \ 285 | \f$ A_i \f$ and to FE_INEXACT otherwise. The parameters of the target format \ 286 | and the rounding mode to be used are encoded in @p fpopts. If required, the \ 287 | function flips one bit in some of the entries of @p X.

\ 288 | \ 289 | If OpenMP support is specified at compile time, several OpenMP threads are \ 290 | used if @p numelem is large enough. This parameter is machine dependent.\ 291 | \ 292 | @param[out] X Array of rounded values. \ 293 | @param[out] exception Array of floating-point exceptions. \ 294 | @param[in] A Input array. \ 295 | @param[in] numelem Number of elements in @p X and @p A. \ 296 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 297 | and the probability of soft errors striking the rounded values. \ 298 | \ 299 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 300 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 301 | than EMAX, and @b 0 otherwise. \ 302 | */ 303 | 304 | #define doc_cpf_nearbyint(PMAX, EMIN, EMAX) \ 305 | /** \ 306 | @brief Compute the closest integer with specified rounding mode. \ 307 | \ 308 | @details If the function executes without errors, then \f$ X_i \f$ is the \ 309 | integral part of \f$ A_i \f$ rounded to lower-precision target format. \ 310 | The parameters of the target format and the rounding mode to be used are \ 311 | encoded in @p fpopts. If required, the function flips one bit in some of the \ 312 | entries of @p X.

\ 313 | \ 314 | If OpenMP support is specified at compile time, several OpenMP threads are \ 315 | used if @p numelem is large enough. This parameter is machine dependent.\ 316 | \ 317 | @param[out] X Array of rounded values. \ 318 | @param[in] A Input array. \ 319 | @param[in] numelem Number of elements in @p X and @p A. \ 320 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 321 | and the probability of soft errors striking the rounded values. \ 322 | \ 323 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 324 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 325 | than EMAX, and @b 0 otherwise. \ 326 | */ 327 | 328 | 329 | #define doc_cpf_remquo(PMAX, EMIN, EMAX) \ 330 | /** \ 331 | @brief Compute reminder and quotient of rounded numbers. \ 332 | \ 333 | @details If the function executes without errors, then \f$ \mathrm{quot}_i \f$ \ 334 | and \f$ X_i \f$ are the (integral) quotient and the reminder of the division \ 335 | \f$ A_i / B_i \f$ with \f$ A_i \f$ and \f$ B_i \f$ rounded to a \ 336 | lower-precision target format. The parameters of the target format and the \ 337 | rounding mode to be used are encoded in @p fpopts. If required, the function \ 338 | flips one bit in some of the entries of @p X.

\ 339 | \ 340 | If OpenMP support is specified at compile time, several OpenMP threads are \ 341 | used if @p numelem is large enough. This parameter is machine dependent.\ 342 | \ 343 | @param[out] X Array of reminders. \ 344 | @param[out] quot Array of quotients. \ 345 | @param[in] A Input array. \ 346 | @param[in] B Input array. \ 347 | @param[in] numelem Number of elements in @p X and @p A. \ 348 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 349 | and the probability of soft errors striking the rounded values. \ 350 | \ 351 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 352 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 353 | than EMAX, and @b 0 otherwise. \ 354 | */ 355 | 356 | #define doc_cpf_fpclassify(PMAX, EMIN, EMAX) \ 357 | /** \ 358 | @brief Categorize floating-point values. \ 359 | \ 360 | @details If the function executes without errors, then \f$ r_i \f$ has value: \ 361 | \li FP_INFINITE, if \f$ A_i \f$ is finite in the lower-precising target format; \ 362 | \li FP_NAN, if \f$ A_i \f$ is a NaN in the lower-precising target format; \ 363 | \li FP_NORMAL, if \f$ A_i \f$ is normal in the lower-precising target format; \ 364 | \li FP_SUBNORMAL, if \f$ A_i \f$ is subnormal in the lower-precising target format; and \ 365 | \li FP_ZERO, if \f$ A_i \f$ is zero in the lower-precising target format.

\ 366 | The parameters of the target format and the rounding mode to be used are \ 367 | encoded in @p fpopts.

\ 368 | \ 369 | If OpenMP support is specified at compile time, several OpenMP threads are \ 370 | used if @p numelem is large enough. This parameter is machine dependent.\ 371 | \ 372 | @param[out] r Array of classes. \ 373 | @param[in] A Input array. \ 374 | @param[in] numelem Number of elements in @p X and @p A. \ 375 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 376 | and the probability of soft errors striking the rounded values. \ 377 | \ 378 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 379 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 380 | than EMAX, and @b 0 otherwise. \ 381 | */ 382 | 383 | #define doc_cpf_isfun(STRING, PMAX, EMIN, EMAX) \ 384 | /** \ 385 | @brief Check whether value is STRING in lower precision target format. \ 386 | \ 387 | @details If the function executes without errors, then \f$ r_i \f$ is a \ 388 | nonzero integral value if \f$ A_i \f$ is STRING in a lower-precision target \ 389 | format, and zero otherwise. The parameters of the target format and the \ 390 | rounding mode to be used are encoded in @p fpopts.

\ 391 | \ 392 | If OpenMP support is specified at compile time, several OpenMP threads are \ 393 | used if @p numelem is large enough. This parameter is machine dependent.\ 394 | \ 395 | @param[out] r Array of Boolean values. \ 396 | @param[in] A Input array. \ 397 | @param[in] numelem Number of elements in @p X and @p A. \ 398 | @param[in] fpopts Parameters describing the target format, the rounding mode, \ 399 | and the probability of soft errors striking the rounded values. \ 400 | \ 401 | @return The function returns @b 1 if @p fpopts->precision is larger than \ 402 | PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \ 403 | than EMAX, and @b 0 otherwise. \ 404 | */ 405 | 406 | #endif /* #ifndef _CPFLOAT_DOCMACROS_ */ 407 | 408 | /* 409 | * CPFloat - Custom Precision Floating-point numbers. 410 | * 411 | * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 412 | * 413 | * This library is free software; you can redistribute it and/or modify it under 414 | * the terms of the GNU Lesser General Public License as published by the Free 415 | * Software Foundation; either version 2.1 of the License, or (at your option) 416 | * any later version. 417 | * 418 | * This library is distributed in the hope that it will be useful, but WITHOUT 419 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 420 | * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 421 | * details. 422 | * 423 | * You should have received a copy of the GNU Lesser General Public License along 424 | * with this library; if not, write to the Free Software Foundation, Inc., 51 425 | * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 426 | */ 427 | -------------------------------------------------------------------------------- /src/cpfloat_threshold_binary32.h: -------------------------------------------------------------------------------- 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */ 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ 3 | 4 | /** 5 | * @file cpfloat_threshold_binary32.h 6 | * @brief Size of smallest `float` array on which to use multiple OpenMP threads. 7 | */ 8 | 9 | /** 10 | * @brief Size of smallest array on which cpfloatf() uses multiple threads. 11 | * 12 | * @details Threshold for switching between cpfloatf_sequential() and 13 | * cpfloatf_parallel() in cpfloatf(). The value of this constant is ignored 14 | * if the file that includes cpfloat_binary32.h is compiled without OpenMP 15 | * support. 16 | */ 17 | #define OPENMP_THRESHOLD_float 1 18 | -------------------------------------------------------------------------------- /src/cpfloat_threshold_binary64.h: -------------------------------------------------------------------------------- 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */ 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ 3 | 4 | /** 5 | * @file cpfloat_threshold_binary64.h 6 | * @brief Size of smallest `double` array on which to use multiple OpenMP threads. 7 | */ 8 | 9 | /** 10 | * @brief Size of smallest array on which cpfloat() uses multiple threads. 11 | * 12 | * @details Threshold for switching between cpfloat_sequential() and 13 | * cpfloat_parallel() in cpfloat(). The value of this constant is ignored 14 | * if the file that includes cpfloat_binary64.h is compiled without OpenMP 15 | * support. 16 | */ 17 | #define OPENMP_THRESHOLD_double 1 18 | -------------------------------------------------------------------------------- /test/cpfloat_test.m: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis 2 | % SPDX-License-Identifier: LGPL-2.1-or-later 3 | 4 | function cpfloat_test 5 | %TEST_CPFLOAT Test the cpfloat function. 6 | % The tests are for single precision and fp16. 7 | 8 | clear cpfloat fp options options2 assert_eq 9 | 10 | usingoctave = exist('OCTAVE_VERSION', 'builtin'); 11 | 12 | if usingoctave 13 | rand('seed', 1); 14 | else 15 | rng(1); 16 | end 17 | 18 | n = 0; 19 | uh = 2^(-11); % Unit roundoff for fp16. 20 | pi_h = 6432*uh; % fp16(pi) 21 | 22 | % Check handling of defaults and persistent variable. 23 | fp.format = 'bfloat16'; 24 | [~,options] = cpfloat(pi,fp); 25 | assert_eq(fp.format,options.format) 26 | assert_eq(options.subnormal,0) 27 | 28 | fp.format = []; 29 | [~,options] = cpfloat(pi,fp); 30 | assert_eq(options.format,'h') % Check default; 31 | 32 | fp.explim = []; 33 | [~,options] = cpfloat(pi,fp); 34 | assert_eq(options.explim,1) % Check default. 35 | 36 | fp.explim = 0; 37 | [~,options] = cpfloat(pi,fp); 38 | assert_eq(options.explim,0) % Check no default. 39 | 40 | fp.round = []; 41 | [~,options] = cpfloat(pi,fp); 42 | assert_eq(options.round,1) % Check default. 43 | 44 | fp.saturation = 1; 45 | [~,options] = cpfloat(pi,fp); 46 | assert_eq(options.saturation,1) 47 | 48 | fp.saturation = []; 49 | [~,options] = cpfloat(pi,fp); 50 | assert_eq(options.saturation,0) % Check default; 51 | 52 | fp.subnormal = 0; 53 | [~,options] = cpfloat(pi,fp); 54 | assert_eq(options.subnormal,0) 55 | 56 | fp.subnormal = []; 57 | [~,options] = cpfloat(pi,fp); 58 | assert_eq(options.subnormal,1) % Check default; 59 | 60 | fp.flip = []; 61 | [~,options] = cpfloat(pi,fp); 62 | assert_eq(options.flip,0) % Check no default. 63 | 64 | clear cpfloat fp options 65 | fp.flip = 1; 66 | [~,options] = cpfloat([],fp); 67 | assert_eq(options.format,'h') 68 | assert_eq(options.round,1) 69 | assert_eq(options.saturation,0) 70 | assert_eq(options.subnormal,1) 71 | 72 | clear cpfloat fp options 73 | % check all default options 74 | fp.format = []; 75 | fp.round = []; 76 | fp.saturation = []; 77 | fp.subnormal = []; 78 | fp.flip = []; 79 | fp.p = []; 80 | [~,options] = cpfloat(pi,fp); 81 | assert_eq(options.format,'h') 82 | assert_eq(options.round,1) 83 | assert_eq(options.saturation,0) 84 | assert_eq(options.subnormal,1) 85 | assert_eq(options.flip,0) 86 | assert_eq(options.p,0.5) 87 | % % Takes different path from previous test since fpopts exists. 88 | % fp.subnormal = 0; 89 | % fp.format = []; [c,options] = cpfloat(pi,fp); 90 | % assert_eq(options.format,'h') 91 | 92 | % Check flip output. 93 | clear cpfloat fp 94 | fp.flip = 1; fp.format = 'd'; 95 | c = ones(8,1); 96 | d = cpfloat(c,fp); 97 | assert_eq(norm(d-c,1)>0,true); 98 | d = cpfloat(c',fp); 99 | assert_eq(norm(d-c',1)>0,true); 100 | fp.p = 0; % No bits flipped. 101 | d = cpfloat(c,fp); 102 | assert_eq(d,d); 103 | fp.p = 1; % All bits flipped. 104 | d = cpfloat(c,fp); 105 | assert_eq(all(d ~= c),true); 106 | 107 | clear cpfloat 108 | [~,fp] = cpfloat; 109 | assert_eq(fp.subnormal,1) 110 | assert_eq(fp.format,'h') 111 | [~,options] = cpfloat(pi); 112 | assert_eq(options.format,'h') 113 | assert_eq(options.subnormal,1) 114 | assert_eq(options.round,1) 115 | assert_eq(options.flip,0) 116 | assert_eq(options.p,0.5) 117 | 118 | clear fp 119 | fp.format = 'd'; 120 | [~,options] = cpfloat(pi,fp); 121 | assert_eq(options.format,'d') 122 | assert_eq(options.subnormal,1) 123 | assert_eq(options.params, [53 -1022 1023]) 124 | [~,fp] = cpfloat; 125 | assert_eq(fp.format,'d') 126 | assert_eq(fp.subnormal,1) 127 | assert_eq(fp.params, [53 -1022 1023]) 128 | 129 | clear fp 130 | fp.format = 'E4M3'; 131 | [~,options] = cpfloat(pi,fp); 132 | assert_eq(options.format,'E4M3') 133 | assert_eq(options.infinity,0) 134 | assert_eq(options.params, [4 -6 8]) 135 | [~,fp] = cpfloat; 136 | assert_eq(fp.format,'E4M3') 137 | assert_eq(fp.infinity,0) 138 | assert_eq(fp.params, [4 -6 8]) 139 | 140 | 141 | clear fp 142 | fp.format = 'bfloat16'; 143 | [~,options] = cpfloat(pi,fp); 144 | assert_eq(options.format,'bfloat16') 145 | assert_eq(options.subnormal,0) 146 | assert_eq(options.params, [8 -126 127]) 147 | [~,fp] = cpfloat; 148 | assert_eq(fp.format,'bfloat16') 149 | assert_eq(fp.subnormal,0) 150 | assert_eq(fp.params, [8 -126 127]) 151 | 152 | clear cpfloat 153 | [~,fp] = cpfloat; 154 | fp.format = 'b'; 155 | fp = rmfield(fp, 'params'); 156 | [~,options] = cpfloat(pi,fp); 157 | assert_eq(options.saturation,0) % No saturation if that field was empty. 158 | assert_eq(options.subnormal,1) % No subnormals only if that field was empty. 159 | 160 | % Check these usages do not give an error. 161 | c = cpfloat([]); 162 | cpfloat([]); 163 | cpfloat([],fp); 164 | cpfloat(1,[]); 165 | cpfloat(1,fp); 166 | c = cpfloat(1,fp); 167 | 168 | % Test matrix. 169 | options.format = 'b'; 170 | options = rmfield(options, 'params'); 171 | A = magic(4); 172 | C = cpfloat(A,options); 173 | assert_eq(A,C); 174 | B = A + randn(size(A))*1e-12; 175 | C = cpfloat(B,options); 176 | assert_eq(A,C); 177 | A2 = hilb(6); 178 | C = cpfloat(A2); 179 | 180 | options.format = 'c'; 181 | options.params = [8 -126 127]; % bfloat16 182 | C1 = cpfloat(A,options); 183 | assert_eq(A,C1); 184 | C2 = cpfloat(B,options); 185 | assert_eq(A,C2); 186 | assert_eq(C,cpfloat(A2)); 187 | 188 | clear options 189 | options.format = 'c'; 190 | options.params = [11 -14 15]; % h 191 | options2.format = 'h'; 192 | A = hilb(6); 193 | [X1,opt] = cpfloat(A,options); 194 | [X2,opt2] = cpfloat(A,options2); 195 | assert_eq(X1,X2) 196 | assert_eq(cpfloat(A,opt),cpfloat(A,opt2)); 197 | 198 | % Row vector 199 | clear options 200 | options.format = 'h'; 201 | A = -10:10; 202 | C = cpfloat(A,options); 203 | assert_eq(A,C); 204 | B = A + randn(size(A))*1e-12; 205 | C = cpfloat(B,options); 206 | assert_eq(A,C); 207 | 208 | % Column vector 209 | options.format = 's'; 210 | A = (-10:10)'; 211 | C = cpfloat(A,options); 212 | assert_eq(A,C); 213 | B = A + A.*rand(size(A))*1e-14; % Keep 0 as 0. 214 | C = cpfloat(B,options); 215 | assert_eq(A,C); 216 | 217 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 218 | % Main loop: test single and half formats. 219 | for i = 1:4 220 | clear cpfloat fp options 221 | 222 | if i == 1 223 | % Single precision tests. 224 | [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('single'); 225 | options.format = 's'; 226 | elseif i == 2 227 | % Half precision tests. 228 | [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('half'); 229 | options.format = 'h'; 230 | elseif i == 3 231 | % Quarter precision tests. 232 | [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('q43'); 233 | options.format = 'E4M3'; 234 | % Modification for OCP compliant q43. 235 | emin = -6; % Previously thought to be 1-emax=-7. 236 | emax = 8; % Previously thought to be 7 237 | emins = emin + 1 - p; % Exponent of smallest subnormal number. 238 | xmins = 2^emins; 239 | xmin = 2^emin; 240 | xmax = 2^emax * (2-2^(1-p)); 241 | elseif i == 4 242 | % Quarter precision tests. 243 | [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('q52'); 244 | options.format = 'E5M2'; 245 | end 246 | options.subnormal = 0; 247 | 248 | x = pi; 249 | if i == 1 250 | y = double(single(x)); 251 | elseif i == 2 252 | y = pi_h; % double(fp16(x)); 253 | elseif i == 3 254 | y = 3.25; 255 | elseif i == 4 256 | y = 3.0; 257 | end 258 | c = cpfloat(x,options); 259 | assert_eq(c,y); 260 | x = -pi; 261 | c = cpfloat(x,options); 262 | assert_eq(c,-y); 263 | 264 | % Next number power of 2. 265 | y = 2^10; 266 | if i == 1 267 | dy = double(eps(single(y))); 268 | elseif i == 2 269 | dy = 2*y*uh; % double(eps(fp16(y))); 270 | elseif i == 3 271 | y = 2^4; 272 | dy = 2*y*u; 273 | elseif i == 4 274 | y = 2^4; 275 | dy = 2*y*u; 276 | end 277 | x = y + dy; 278 | c = cpfloat(x,options); 279 | assert_eq(c,x) 280 | 281 | % Number just before a power of 2. 282 | x = y - dy; 283 | c = cpfloat(x,options); 284 | assert_eq(c,x) 285 | 286 | % Next number power of 2. 287 | y = 2^(-4); 288 | if i == 1 289 | dy = double(eps(single(y))); 290 | elseif i == 2 291 | dy = 2*y*uh; % double(eps(fp16(y))); 292 | elseif i == 3 293 | dy = 2*y*u; 294 | elseif i == 4 295 | dy = 2*y*u; 296 | end 297 | x = y + dy; 298 | c = cpfloat(x,options); 299 | assert_eq(c,x) 300 | 301 | % Check other rounding options 302 | for rmode = 1:6 303 | options.round = rmode; 304 | x = y + (dy*10^(-3)); 305 | c = cpfloat(x,options); 306 | if options.round == 2 307 | assert_eq(c,y+dy) % Rounding up. 308 | elseif options.round >= 5 309 | % Check rounded either up or down. 310 | if c ~= y+dy 311 | assert_eq(c,y); 312 | end 313 | else 314 | assert_eq(c,y); 315 | end 316 | end 317 | 318 | % Overflow tests. 319 | for j = 1:6 320 | options.round = j; 321 | x = xmax; 322 | c = cpfloat(x,options); 323 | assert_eq(c,x) 324 | end 325 | 326 | % Saturation tests. 327 | options.saturation = 1; 328 | for j = 1:6 329 | options.round = j; 330 | x = inf; 331 | c = cpfloat(x,options); 332 | assert_eq(c,xmax) 333 | c = cpfloat(-x,options); 334 | assert_eq(c,-xmax) 335 | end 336 | 337 | % Infinities tests. 338 | [~,fpopts] = cpfloat; 339 | prev_infinity = fpopts.infinity; 340 | options.infinity = 1; 341 | options.saturation = 0; 342 | for j = 1:6 343 | options.round = j; 344 | x = inf; 345 | c = cpfloat(x,options); 346 | assert_eq(c,x) 347 | c = cpfloat(-x,options); 348 | assert_eq(c,-x) 349 | end 350 | 351 | % IEEE 754-2019, page 27: rule for rounding to infinity. 352 | % Round to nearest 353 | options.round = 1; % reset the rounding mode to default 354 | x = 2^emax * (2-(1/2)*2^(1-p)); % Round to inf. 355 | c = cpfloat(x,options); 356 | assert_eq(c,inf) 357 | c = cpfloat(-x,options); 358 | assert_eq(c,-inf) 359 | 360 | x = 2^emax * (2-(3/4)*2^(1-p)); % Round to realmax. 361 | c = cpfloat(x,options); 362 | assert_eq(c,xmax) 363 | c = cpfloat(-x,options); 364 | assert_eq(c,-xmax) 365 | 366 | % Round toward plus infinity 367 | options.round = 2; 368 | x = 2^emax * (2-(1/2)*2^(1-p)); 369 | c = cpfloat(x,options); 370 | assert_eq(c,inf) 371 | c = cpfloat(-x,options); 372 | assert_eq(c,-xmax) 373 | 374 | % Round toward minus infinity 375 | options.round = 3; 376 | c = cpfloat(x,options); 377 | assert_eq(c,xmax) 378 | c = cpfloat(-x,options); 379 | assert_eq(c,-inf) 380 | 381 | % Round toward zero 382 | options.round = 4; 383 | c = cpfloat(x,options); 384 | assert_eq(c,xmax) 385 | c = cpfloat(-x,options); 386 | assert_eq(c,-xmax) 387 | 388 | % Round to nearest. 389 | options.round = 1; % reset the rounding mode to default 390 | if i == 2 391 | x = 1 + 2^(-11); 392 | c = cpfloat(x,options); 393 | assert_eq(c,1) 394 | end 395 | 396 | % Underflow tests. 397 | if i == 1 398 | delta = double(eps(single(1))); 399 | else 400 | delta = 2*uh; % double(eps(fp16(1))); 401 | end 402 | 403 | options.subnormal = 1; 404 | c = cpfloat(xmin,options); 405 | assert_eq(c,xmin) 406 | x = [xmins xmin/2 xmin 0 xmax 2*xmax 1-delta/5 1+delta/4]; 407 | c = cpfloat(x,options); 408 | c_expected = [x(1:5) inf 1 1]; 409 | assert_eq(c,c_expected) 410 | 411 | options.subnormal = 0; 412 | c = cpfloat(xmin,options); 413 | assert_eq(c,xmin) 414 | x = [xmins xmin/2 xmin 0 xmax 2*xmax 1-delta/5 1+delta/4]; 415 | c = cpfloat(x,options); 416 | c_expected = [0 0 x(3:5) inf 1 1]; 417 | assert_eq(c,c_expected) 418 | options.infinity = prev_infinity; 419 | 420 | % Smallest normal number and spacing between the subnormal numbers. 421 | y = xmin; delta = xmin*2^(1-p); 422 | x = y - delta; % The largest subnormal number. 423 | options.subnormal = 1; 424 | c = cpfloat(x,options); 425 | assert_eq(c,x) 426 | % Round up if subnormals are not supported. 427 | options.subnormal = 0; 428 | c = cpfloat(x,options); 429 | assert_eq(c,xmin) 430 | % Flush subnormals to zero if subnormals are not supported. 431 | options.subnormal = 0; 432 | c = cpfloat(xmins,options); 433 | assert_eq(c,0) 434 | 435 | options.subnormal = 1; 436 | x = xmins*8; % A subnormal number. 437 | c = cpfloat(x,options); 438 | assert_eq(c,x) 439 | 440 | % Numbers smaller than smallest representable number. 441 | options.subnormal = 0; 442 | x = xmin / 2; 443 | c = cpfloat(x,options); 444 | assert_eq(c,0) 445 | x = -xmin / 2; 446 | c = cpfloat(x,options); 447 | assert_eq(c,-0) 448 | x = xmin / 4; 449 | c = cpfloat(x,options); 450 | assert_eq(c,0) 451 | x = -xmin / 4; 452 | c = cpfloat(x,options); 453 | assert_eq(c,0) 454 | 455 | options.subnormal = 1; 456 | x = xmins / 2; 457 | c = cpfloat(x,options); 458 | assert_eq(c,0) 459 | x = -xmins / 2; 460 | c = cpfloat(x,options); 461 | assert_eq(c,0) 462 | x = xmins / 4; 463 | c = cpfloat(x,options); 464 | assert_eq(c,0) 465 | x = -xmins / 4; 466 | c = cpfloat(x,options); 467 | assert_eq(c,0) 468 | 469 | % Do not limit exponent. 470 | options.explim = 0; 471 | x = xmin/2; 472 | c = cpfloat(x,options); 473 | assert_eq(c,x) 474 | x = -xmin/2; 475 | c = cpfloat(x,options); 476 | assert_eq(c,x) 477 | x = xmax*2; 478 | c = cpfloat(x,options); 479 | assert_eq(c,x) 480 | x = -xmax*2; 481 | c = cpfloat(x,options); 482 | assert_eq(c,x) 483 | x = xmins/2; 484 | c = cpfloat(x,options); 485 | assert_eq(c,x) 486 | x = -xmins/2; 487 | c = cpfloat(x,options); 488 | assert_eq(c,x) 489 | A = [pi -pi; pi -pi]; 490 | C = cpfloat(A,options); 491 | options.explim = 1; 492 | assert_eq(C,cpfloat(A,options)); 493 | 494 | % Round toward plus infinity 495 | options.round = 2; 496 | options.subnormal = 0; 497 | x = xmin / 2; 498 | c = cpfloat(x,options); 499 | assert_eq(c,xmin) 500 | x = -xmin / 2; 501 | c = cpfloat(x,options); 502 | assert_eq(c,0) 503 | 504 | options.subnormal = 1; 505 | x = xmins / 2; 506 | c = cpfloat(x,options); 507 | assert_eq(c,xmins) 508 | x = -xmins / 2; 509 | c = cpfloat(x,options); 510 | assert_eq(c,0) 511 | x = xmins / 4; 512 | c = cpfloat(x,options); 513 | assert_eq(c,xmins) 514 | x = -xmins / 4; 515 | c = cpfloat(x,options); 516 | assert_eq(c,0) 517 | 518 | % Round toward minus infinity 519 | options.round = 3; 520 | options.subnormal = 0; 521 | x = xmin / 2; 522 | c = cpfloat(x,options); 523 | assert_eq(c,0) 524 | x = -xmin / 2; 525 | c = cpfloat(x,options); 526 | assert_eq(c,-xmin) 527 | 528 | options.subnormal = 1; 529 | x = xmins / 2; 530 | c = cpfloat(x,options); 531 | assert_eq(c,0) 532 | x = -xmins / 2; 533 | c = cpfloat(x,options); 534 | assert_eq(c,-xmins) 535 | x = xmins / 4; 536 | c = cpfloat(x,options); 537 | assert_eq(c,0) 538 | x = -xmins / 4; 539 | c = cpfloat(x,options); 540 | assert_eq(c,-xmins) 541 | 542 | % Round toward zero. 543 | options.round = 4; 544 | options.subnormal = 0; 545 | x = xmin / 2; 546 | c = cpfloat(x,options); 547 | assert_eq(c,0) 548 | x = -xmin / 2; 549 | c = cpfloat(x,options); 550 | assert_eq(c,0) 551 | 552 | options.subnormal = 1; 553 | x = xmins / 2; 554 | c = cpfloat(x,options); 555 | assert_eq(c,0) 556 | x = -xmins / 2; 557 | c = cpfloat(x,options); 558 | assert_eq(c,0) 559 | x = xmins / 4; 560 | c = cpfloat(x,options); 561 | assert_eq(c,0) 562 | x = -xmins / 4; 563 | c = cpfloat(x,options); 564 | assert_eq(c,0) 565 | 566 | end % for i 567 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 568 | clear options 569 | 570 | % Test rounding with CHOPFAST versus native rounding. 571 | options.format = 's'; 572 | m = 100; 573 | y = zeros(3,n); 574 | z = y; 575 | for i = 1:m 576 | x = randn; 577 | options.round = 2; 578 | y(i,1) = cpfloat(x,options); 579 | options.round = 3; 580 | y(i,2) = cpfloat(x,options); 581 | options.round = 4; 582 | y(i,3) = cpfloat(x,options); 583 | if usingoctave 584 | fesetround(inf); 585 | z(i,1) = single(x); 586 | fesetround(-inf); 587 | z(i,2) = single(x); 588 | fesetround(0); 589 | z(i,3) = single(x); 590 | else 591 | % Use undocumented function to set rounding mode in MATLAB. 592 | feature('setround',inf); 593 | z(i,1) = single(x); 594 | feature('setround',-inf); 595 | z(i,2) = single(x); 596 | feature('setround',0); 597 | z(i,3) = single(x); 598 | end 599 | end 600 | assert_eq(y,z) 601 | % Switch back to round to nearest. 602 | if usingoctave 603 | fesetround(0.5); 604 | else 605 | feature('setround',0.5) 606 | end 607 | 608 | % Double precision tests. 609 | [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('d'); 610 | options.format = 'd'; 611 | x = [1e-309 1e-320 1 1e306]; % First two entries are subnormal. 612 | c = cpfloat(x,options); 613 | assert_eq(c,x) 614 | options.subnormal = 0; 615 | c = cpfloat(x,options); 616 | assert_eq(c,[0 0 x(3:4)]) 617 | 618 | options.format = 'd'; 619 | options.subnormal = 0; 620 | cpfloat([],options); 621 | a = cpfloat(pi); 622 | assert_eq(a,pi) 623 | 624 | options.format = 'd'; 625 | options.subnormal = 1; 626 | cpfloat([],options); 627 | a = cpfloat(pi); assert_eq(a,pi) 628 | 629 | x = pi^2; 630 | clear options 631 | options.format = 'd'; 632 | y = cpfloat(x,options); % Should not change x. 633 | assert_eq(x,y); 634 | options.round = 2; 635 | y = cpfloat(x,options); % Should not change x. 636 | assert_eq(x,y); 637 | options.round = 3; 638 | y = cpfloat(x,options); % Should not change x. 639 | assert_eq(x,y); 640 | options.round = 4; 641 | y = cpfloat(x,options); % Should not change x. 642 | assert_eq(x,y); 643 | 644 | % Test on single inputs. 645 | clear options 646 | ps = single(pi); 647 | pd = double(ps); 648 | options.format = 'b'; 649 | ys = cpfloat(ps,options); 650 | assert_eq(isa(ys,'single'),true) 651 | yd = cpfloat(pd); 652 | assert_eq(double(ys),yd) 653 | 654 | options.format = 'h'; 655 | options.round = 2; 656 | as = single(rand(n,1)); 657 | ad = double(as); 658 | delta = single(rand(n,1)); 659 | cd = cpfloat(ad + 1e-5*double(delta),options); 660 | cs = cpfloat(as + 1e-5*delta,options); 661 | assert_eq(cd,double(cs)); 662 | 663 | options.format = 'c'; 664 | options.params = [11 -4 5]; 665 | temp1 = cpfloat(single(pi),options); 666 | options.format = 'h'; 667 | options = rmfield(options, 'params'); 668 | temp2 = cpfloat(single(pi),options); 669 | assert_eq(temp1,temp2) 670 | 671 | % Test base 2 logarithm 672 | options.format = 'h'; 673 | options.round = 4; 674 | x = single(2^-3 * (sum(2.^(-(0:23))))); 675 | assert_eq(cpfloat(x,options), single(2^-3 * (sum(2.^(-(0:10)))))) 676 | 677 | x = 2^-3 * (sum(2.^(-(0:52)))); 678 | assert_eq(cpfloat(x,options), 2^-3 * (sum(2.^(-(0:10))))) 679 | 680 | options.format = 's'; 681 | x = single(2^-3 * (sum(2.^(-(0:23))))); 682 | assert_eq(cpfloat(x,options), x) 683 | 684 | x = 2^-3 * (sum(2.^(-(0:52)))); 685 | assert_eq(cpfloat(x,options), 2^-3 * (sum(2.^(-(0:23))))) 686 | 687 | options.format = 'd'; 688 | x = 2^-3 * (sum(2.^(-(0:52)))); 689 | assert_eq(cpfloat(x,options), x) 690 | 691 | options.round = 1; 692 | temp = 0; 693 | try 694 | options.format = 'c'; 695 | options.params = [12 -4 5]; 696 | temp = cpfloat(single(pi),options); % Error - double rounding! 697 | catch 698 | end 699 | assert_eq(temp,0) 700 | try 701 | options.format = 'c'; 702 | options.params = [26 -8 9]; 703 | temp = cpfloat(pi,options); % Error - double rounding! 704 | catch 705 | end 706 | assert_eq(temp,0) 707 | try 708 | temp = cpfloat(complex(1,1)); % Error - complex data! 709 | catch 710 | end 711 | assert_eq(temp,0) 712 | 713 | fprintf('All tests successful!\n') 714 | 715 | clear cpfloat fp options options2 assert_eq 716 | 717 | %%%%%%%%%%%%%%%%%%%%%%% 718 | function assert_eq(a,b) 719 | % if isempty(n), n = 0; end % First call. 720 | n = n+1; 721 | if ~isequal(a,b) 722 | error('Failure') 723 | end 724 | fprintf('Test %g succeeded.\n', n ) 725 | end 726 | 727 | end 728 | 729 | % CPFloat - Custom Precision Floating-point numbers. 730 | % 731 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 732 | % 733 | % This library is free software; you can redistribute it and/or modify it under 734 | % the terms of the GNU Lesser General Public License as published by the Free 735 | % Software Foundation; either version 2.1 of the License, or (at your option) 736 | % any later version. 737 | % 738 | % This library is distributed in the hope that it will be useful, but WITHOUT 739 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 740 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 741 | % details. 742 | % 743 | % You should have received a copy of the GNU Lesser General Public License along 744 | % with this library; if not, write to the Free Software Foundation, Inc., 51 745 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 746 | -------------------------------------------------------------------------------- /util/generate_spdx.sh: -------------------------------------------------------------------------------- 1 | #! /bin/zsh 2 | 3 | VERSION='0.5.0' 4 | 5 | # Compute package hash using spdx-tools. 6 | CURL=curl 7 | DEPSDIR=./deps/ 8 | SPDX_TOOLS_JAR='spdx-tools.jar' 9 | SPDX_TOOLS_ULR='https://github.com/spdx/tools/releases/download/v2.2.4/spdx-tools-2.2.4-jar-with-dependencies.jar' 10 | if [ ! -f $DEPSDIR$SPDX_TOOLS_JAR ]; then \ 11 | $CURL -sL -o $DEPSDIR$SPDX_TOOLS_JAR $SPDX_TOOLS_ULR 12 | fi 13 | 14 | OUTPUT=`java -jar $DEPSDIR$SPDX_TOOLS_JAR \ 15 | GenerateVerificationCode . ".*\.spdx|.*/deps/.*|.*\.sh" \ 16 | | awk -F ' ' 'NR==1{print $4}'` 17 | 18 | 19 | # Add document and package information. 20 | echo "## 21 | ## Document Creation Information 22 | ## 23 | 24 | SPDXVersion: SPDX-2.2 25 | DataLicense: CC0-1.0 26 | SPDXID: SPDXRef-DOCUMENT 27 | DocumentName: cpfloat-$VERSION 28 | DocumentNamespace: https://raw.githubusercontent.com/north-numerical-computing/cpfloat/master/license.spdx 29 | Creator: Person: Massimiliano Fasi (massimiliano.fasi@durham.ac.uk) 30 | Creator: Person: Mantas Mikaitis (mantas.mikaitis@manchester.ac.uk) 31 | Created: `date -u +%Y-%m-%dT%H:%M:%SZ` 32 | 33 | 34 | 35 | ## 36 | ## Package Information 37 | ## 38 | 39 | PackageName: cpfloat 40 | SPDXID: SPDXRef-1 41 | PackageVersion: $VERSION 42 | PackageDownloadLocation: git://github.com/north-numerical-computing/cpfloat 43 | PackageVerificationCode: $OUTPUT (excludes: ./license.spdx) 44 | PackageHomePage: https://github.com/north-numerical-computing/cpfloat 45 | PackageLicenseConcluded: LGPL-2.1-or-later 46 | PackageLicenseInfoFromFiles: LGPL-2.1-or-later 47 | PackageLicenseDeclared: LGPL-2.1-or-later 48 | PackageCopyrightText: Copyright 2020 Massimiliano Fasi and Mantas Mikaitis 49 | PackageSummary:Custom Precision Floating-point numbers. 50 | 51 | 52 | 53 | ## 54 | ## File Information 55 | ##" 56 | 57 | # Add file information. 58 | counter=1 59 | for file in `find .`; do 60 | if [[ ! -d $file && $file != (./.git*|./deps/*|*.spdx) ]]; then 61 | echo "" 62 | echo "FileName: $file" 63 | echo "SPDXID: SPDXRef-1-$counter" 64 | counter=$((counter+1)) 65 | case $file in 66 | *.sh|.git*|*/deps/*|license.spdx) 67 | # Ignore: 68 | # * housekeeping scripts; 69 | # * git files; 70 | # * third-party files; 71 | # * license.spdx file. 72 | ;; 73 | *doc*|Doxyfile|cpfloat.m) 74 | echo "FileType: DOCUMENTATION" 75 | ;; 76 | Makefile|*.c|*.h|*.ts|*.m|*.cpp) 77 | echo "FileType: SOURCE" 78 | ;; 79 | *.md|*.txt) 80 | echo "FileType: TEXT" 81 | ;; 82 | *.spdx) 83 | echo "FileType: SPDX" 84 | ;; 85 | *) 86 | echo "FileType: OTHER" 87 | esac 88 | echo "FileChecksum: SHA1: `shasum -a 1 $file | \ 89 | awk -F ' ' '{print $1}'`" 90 | echo "FileChecksum: MD5: `md5sum $file | awk -F ' ' '{print $1}'`" 91 | echo "LicenseConcluded: LGPL-2.1-or-later" 92 | LICENSE=`grep "SPDX-License-Identifier" $file | \ 93 | awk -F ' ' '{printf $3}'` 94 | if [[ $LICENSE = "" ]]; then 95 | LICENSE=NONE 96 | fi 97 | echo "LicenseInfoInFile: $LICENSE" 98 | fi 99 | done 100 | --------------------------------------------------------------------------------