├── .github
    └── workflows
    │   ├── deploy_docs.yml
    │   ├── run_c_tests.yml
    │   └── run_octave_tests.yml
├── .gitmodules
├── LICENSE.md
├── Makefile
├── README.md
├── docs
    ├── Doxyfile-project
    └── source
    │   ├── conf.py
    │   ├── index.rst
    │   └── readme.rst
├── examples
    └── example_manuscript.c
├── license.spdx
├── mex
    ├── cpfloat.c
    ├── cpfloat.m
    ├── cpfloat_autotune.m
    ├── cpfloat_compile.m
    └── cpfloat_compile_nomake.m
├── src
    ├── cpfloat_autotune.c
    ├── cpfloat_binary32.h
    ├── cpfloat_binary64.h
    ├── cpfloat_definitions.h
    ├── cpfloat_docmacros.h
    ├── cpfloat_template.h
    ├── cpfloat_threshold_binary32.h
    └── cpfloat_threshold_binary64.h
├── test
    ├── cpfloat_test.m
    └── cpfloat_test.ts
└── util
    └── generate_spdx.sh


/.github/workflows/deploy_docs.yml:
--------------------------------------------------------------------------------
 1 | name: documentation
 2 | on:
 3 |   push:
 4 | jobs:
 5 |   build-and-deploy:
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - uses: actions/setup-python@v5
 9 |       - uses: actions/checkout@master
10 | 
11 |       - name: Install and Build
12 |         run: |
13 |           sudo apt install -y doxygen graphviz python3-sphinx python3-breathe python3-sphinx-rtd-theme python3-breathe python3-exhale python3-myst-parser
14 |           make docs
15 | 
16 |       - name: Deploy
17 |         uses: JamesIves/github-pages-deploy-action@v4
18 |         with:
19 |           branch: gh-pages # The branch the action should deploy to.
20 |           folder: docs/html


--------------------------------------------------------------------------------
/.github/workflows/run_c_tests.yml:
--------------------------------------------------------------------------------
 1 | name: run-c-tests
 2 | on:
 3 |   push:
 4 | jobs:
 5 |   build-and-run-c-tests:
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - name: Check out repository
 9 |         uses: actions/checkout@v4
10 |       - name: Install check library
11 |         run: |
12 |           sudo apt update
13 |           sudo apt install -y check
14 |       - name: Build and run C tests
15 |         run: make ctest
16 |       - name: Build and run library tests
17 |         run: make libtest


--------------------------------------------------------------------------------
/.github/workflows/run_octave_tests.yml:
--------------------------------------------------------------------------------
 1 | name: run-octave-tests
 2 | on:
 3 |   push:
 4 | jobs:
 5 |   build-and-run-octave-tests:
 6 |     runs-on: ubuntu-22.04
 7 |     steps:
 8 |       - name: Check out repository
 9 |         uses: actions/checkout@v4
10 |       - name: Install Octave and corresponding packages and libraries
11 |         run: |
12 |           sudo apt update
13 |           sudo apt install -y octave-parallel liboctave-dev
14 |           curl https://master.dl.sourceforge.net/project/octave/Octave%20Forge%20Packages/Individual%20Package%20Releases/fenv-0.1.0.tar.gz?viasf=1 -o fenv.tar.gz
15 |           octave --eval "pkg install fenv.tar.gz"
16 |       - name: Build MEX interface for Octave
17 |         run: make mexoct
18 |       - name: Run tests for MEX interface in Octave
19 |         run: make otest


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "include/pcg-c"]
2 | 	path = deps/pcg-c
3 | 	url = https://github.com/imneme/pcg-c.git
4 | [submodule "include/float_params"]
5 | 	path = deps/float_params
6 | 	url = https://github.com/higham/float_params.git
7 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | GNU Lesser General Public License
  2 | =================================
  3 | 
  4 | _Version 2.1, February 1999_
  5 | _Copyright © 1991, 1999 Free Software Foundation, Inc._
  6 | _51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA_
  7 | 
  8 | Everyone is permitted to copy and distribute verbatim copies
  9 | of this license document, but changing it is not allowed.
 10 | 
 11 | _This is the first released version of the Lesser GPL.  It also counts
 12 | as the successor of the GNU Library Public License, version 2, hence
 13 | the version number 2.1._
 14 | 
 15 | ### Preamble
 16 | 
 17 | The licenses for most software are designed to take away your
 18 | freedom to share and change it.  By contrast, the GNU General Public
 19 | Licenses are intended to guarantee your freedom to share and change
 20 | free software--to make sure the software is free for all its users.
 21 | 
 22 | This license, the Lesser General Public License, applies to some
 23 | specially designated software packages--typically libraries--of the
 24 | Free Software Foundation and other authors who decide to use it.  You
 25 | can use it too, but we suggest you first think carefully about whether
 26 | this license or the ordinary General Public License is the better
 27 | strategy to use in any particular case, based on the explanations below.
 28 | 
 29 | When we speak of free software, we are referring to freedom of use,
 30 | not price.  Our General Public Licenses are designed to make sure that
 31 | you have the freedom to distribute copies of free software (and charge
 32 | for this service if you wish); that you receive source code or can get
 33 | it if you want it; that you can change the software and use pieces of
 34 | it in new free programs; and that you are informed that you can do
 35 | these things.
 36 | 
 37 | To protect your rights, we need to make restrictions that forbid
 38 | distributors to deny you these rights or to ask you to surrender these
 39 | rights.  These restrictions translate to certain responsibilities for
 40 | you if you distribute copies of the library or if you modify it.
 41 | 
 42 | For example, if you distribute copies of the library, whether gratis
 43 | or for a fee, you must give the recipients all the rights that we gave
 44 | you.  You must make sure that they, too, receive or can get the source
 45 | code.  If you link other code with the library, you must provide
 46 | complete object files to the recipients, so that they can relink them
 47 | with the library after making changes to the library and recompiling
 48 | it.  And you must show them these terms so they know their rights.
 49 | 
 50 | We protect your rights with a two-step method: **(1)** we copyright the
 51 | library, and **(2)** we offer you this license, which gives you legal
 52 | permission to copy, distribute and/or modify the library.
 53 | 
 54 | To protect each distributor, we want to make it very clear that
 55 | there is no warranty for the free library.  Also, if the library is
 56 | modified by someone else and passed on, the recipients should know
 57 | that what they have is not the original version, so that the original
 58 | author's reputation will not be affected by problems that might be
 59 | introduced by others.
 60 | 
 61 | Finally, software patents pose a constant threat to the existence of
 62 | any free program.  We wish to make sure that a company cannot
 63 | effectively restrict the users of a free program by obtaining a
 64 | restrictive license from a patent holder.  Therefore, we insist that
 65 | any patent license obtained for a version of the library must be
 66 | consistent with the full freedom of use specified in this license.
 67 | 
 68 | Most GNU software, including some libraries, is covered by the
 69 | ordinary GNU General Public License.  This license, the GNU Lesser
 70 | General Public License, applies to certain designated libraries, and
 71 | is quite different from the ordinary General Public License.  We use
 72 | this license for certain libraries in order to permit linking those
 73 | libraries into non-free programs.
 74 | 
 75 | When a program is linked with a library, whether statically or using
 76 | a shared library, the combination of the two is legally speaking a
 77 | combined work, a derivative of the original library.  The ordinary
 78 | General Public License therefore permits such linking only if the
 79 | entire combination fits its criteria of freedom.  The Lesser General
 80 | Public License permits more lax criteria for linking other code with
 81 | the library.
 82 | 
 83 | We call this license the “Lesser” General Public License because it
 84 | does Less to protect the user's freedom than the ordinary General
 85 | Public License.  It also provides other free software developers Less
 86 | of an advantage over competing non-free programs.  These disadvantages
 87 | are the reason we use the ordinary General Public License for many
 88 | libraries.  However, the Lesser license provides advantages in certain
 89 | special circumstances.
 90 | 
 91 | For example, on rare occasions, there may be a special need to
 92 | encourage the widest possible use of a certain library, so that it becomes
 93 | a de-facto standard.  To achieve this, non-free programs must be
 94 | allowed to use the library.  A more frequent case is that a free
 95 | library does the same job as widely used non-free libraries.  In this
 96 | case, there is little to gain by limiting the free library to free
 97 | software only, so we use the Lesser General Public License.
 98 | 
 99 | In other cases, permission to use a particular library in non-free
100 | programs enables a greater number of people to use a large body of
101 | free software.  For example, permission to use the GNU C Library in
102 | non-free programs enables many more people to use the whole GNU
103 | operating system, as well as its variant, the GNU/Linux operating
104 | system.
105 | 
106 | Although the Lesser General Public License is Less protective of the
107 | users' freedom, it does ensure that the user of a program that is
108 | linked with the Library has the freedom and the wherewithal to run
109 | that program using a modified version of the Library.
110 | 
111 | The precise terms and conditions for copying, distribution and
112 | modification follow.  Pay close attention to the difference between a
113 | “work based on the library” and a “work that uses the library”.  The
114 | former contains code derived from the library, whereas the latter must
115 | be combined with the library in order to run.
116 | 
117 | ### TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
118 | 
119 | **0.** This License Agreement applies to any software library or other
120 | program which contains a notice placed by the copyright holder or
121 | other authorized party saying it may be distributed under the terms of
122 | this Lesser General Public License (also called “this License”).
123 | Each licensee is addressed as “you”.
124 | 
125 | A “library” means a collection of software functions and/or data
126 | prepared so as to be conveniently linked with application programs
127 | (which use some of those functions and data) to form executables.
128 | 
129 | The “Library”, below, refers to any such software library or work
130 | which has been distributed under these terms.  A “work based on the
131 | Library” means either the Library or any derivative work under
132 | copyright law: that is to say, a work containing the Library or a
133 | portion of it, either verbatim or with modifications and/or translated
134 | straightforwardly into another language.  (Hereinafter, translation is
135 | included without limitation in the term “modification”.)
136 | 
137 | “Source code” for a work means the preferred form of the work for
138 | making modifications to it.  For a library, complete source code means
139 | all the source code for all modules it contains, plus any associated
140 | interface definition files, plus the scripts used to control compilation
141 | and installation of the library.
142 | 
143 | Activities other than copying, distribution and modification are not
144 | covered by this License; they are outside its scope.  The act of
145 | running a program using the Library is not restricted, and output from
146 | such a program is covered only if its contents constitute a work based
147 | on the Library (independent of the use of the Library in a tool for
148 | writing it).  Whether that is true depends on what the Library does
149 | and what the program that uses the Library does.
150 | 
151 | **1.** You may copy and distribute verbatim copies of the Library's
152 | complete source code as you receive it, in any medium, provided that
153 | you conspicuously and appropriately publish on each copy an
154 | appropriate copyright notice and disclaimer of warranty; keep intact
155 | all the notices that refer to this License and to the absence of any
156 | warranty; and distribute a copy of this License along with the
157 | Library.
158 | 
159 | You may charge a fee for the physical act of transferring a copy,
160 | and you may at your option offer warranty protection in exchange for a
161 | fee.
162 | 
163 | **2.** You may modify your copy or copies of the Library or any portion
164 | of it, thus forming a work based on the Library, and copy and
165 | distribute such modifications or work under the terms of Section 1
166 | above, provided that you also meet all of these conditions:
167 | 
168 | * **a)** The modified work must itself be a software library.
169 | * **b)** You must cause the files modified to carry prominent notices
170 | stating that you changed the files and the date of any change.
171 | * **c)** You must cause the whole of the work to be licensed at no
172 | charge to all third parties under the terms of this License.
173 | * **d)** If a facility in the modified Library refers to a function or a
174 | table of data to be supplied by an application program that uses
175 | the facility, other than as an argument passed when the facility
176 | is invoked, then you must make a good faith effort to ensure that,
177 | in the event an application does not supply such function or
178 | table, the facility still operates, and performs whatever part of
179 | its purpose remains meaningful.
180 | (For example, a function in a library to compute square roots has
181 | a purpose that is entirely well-defined independent of the
182 | application.  Therefore, Subsection 2d requires that any
183 | application-supplied function or table used by this function must
184 | be optional: if the application does not supply it, the square
185 | root function must still compute square roots.)
186 | 
187 | These requirements apply to the modified work as a whole.  If
188 | identifiable sections of that work are not derived from the Library,
189 | and can be reasonably considered independent and separate works in
190 | themselves, then this License, and its terms, do not apply to those
191 | sections when you distribute them as separate works.  But when you
192 | distribute the same sections as part of a whole which is a work based
193 | on the Library, the distribution of the whole must be on the terms of
194 | this License, whose permissions for other licensees extend to the
195 | entire whole, and thus to each and every part regardless of who wrote
196 | it.
197 | 
198 | Thus, it is not the intent of this section to claim rights or contest
199 | your rights to work written entirely by you; rather, the intent is to
200 | exercise the right to control the distribution of derivative or
201 | collective works based on the Library.
202 | 
203 | In addition, mere aggregation of another work not based on the Library
204 | with the Library (or with a work based on the Library) on a volume of
205 | a storage or distribution medium does not bring the other work under
206 | the scope of this License.
207 | 
208 | **3.** You may opt to apply the terms of the ordinary GNU General Public
209 | License instead of this License to a given copy of the Library.  To do
210 | this, you must alter all the notices that refer to this License, so
211 | that they refer to the ordinary GNU General Public License, version 2,
212 | instead of to this License.  (If a newer version than version 2 of the
213 | ordinary GNU General Public License has appeared, then you can specify
214 | that version instead if you wish.)  Do not make any other change in
215 | these notices.
216 | 
217 | Once this change is made in a given copy, it is irreversible for
218 | that copy, so the ordinary GNU General Public License applies to all
219 | subsequent copies and derivative works made from that copy.
220 | 
221 | This option is useful when you wish to copy part of the code of
222 | the Library into a program that is not a library.
223 | 
224 | **4.** You may copy and distribute the Library (or a portion or
225 | derivative of it, under Section 2) in object code or executable form
226 | under the terms of Sections 1 and 2 above provided that you accompany
227 | it with the complete corresponding machine-readable source code, which
228 | must be distributed under the terms of Sections 1 and 2 above on a
229 | medium customarily used for software interchange.
230 | 
231 | If distribution of object code is made by offering access to copy
232 | from a designated place, then offering equivalent access to copy the
233 | source code from the same place satisfies the requirement to
234 | distribute the source code, even though third parties are not
235 | compelled to copy the source along with the object code.
236 | 
237 | **5.** A program that contains no derivative of any portion of the
238 | Library, but is designed to work with the Library by being compiled or
239 | linked with it, is called a “work that uses the Library”.  Such a
240 | work, in isolation, is not a derivative work of the Library, and
241 | therefore falls outside the scope of this License.
242 | 
243 | However, linking a “work that uses the Library” with the Library
244 | creates an executable that is a derivative of the Library (because it
245 | contains portions of the Library), rather than a “work that uses the
246 | library”.  The executable is therefore covered by this License.
247 | Section 6 states terms for distribution of such executables.
248 | 
249 | When a “work that uses the Library” uses material from a header file
250 | that is part of the Library, the object code for the work may be a
251 | derivative work of the Library even though the source code is not.
252 | Whether this is true is especially significant if the work can be
253 | linked without the Library, or if the work is itself a library.  The
254 | threshold for this to be true is not precisely defined by law.
255 | 
256 | If such an object file uses only numerical parameters, data
257 | structure layouts and accessors, and small macros and small inline
258 | functions (ten lines or less in length), then the use of the object
259 | file is unrestricted, regardless of whether it is legally a derivative
260 | work.  (Executables containing this object code plus portions of the
261 | Library will still fall under Section 6.)
262 | 
263 | Otherwise, if the work is a derivative of the Library, you may
264 | distribute the object code for the work under the terms of Section 6.
265 | Any executables containing that work also fall under Section 6,
266 | whether or not they are linked directly with the Library itself.
267 | 
268 | **6.** As an exception to the Sections above, you may also combine or
269 | link a “work that uses the Library” with the Library to produce a
270 | work containing portions of the Library, and distribute that work
271 | under terms of your choice, provided that the terms permit
272 | modification of the work for the customer's own use and reverse
273 | engineering for debugging such modifications.
274 | 
275 | You must give prominent notice with each copy of the work that the
276 | Library is used in it and that the Library and its use are covered by
277 | this License.  You must supply a copy of this License.  If the work
278 | during execution displays copyright notices, you must include the
279 | copyright notice for the Library among them, as well as a reference
280 | directing the user to the copy of this License.  Also, you must do one
281 | of these things:
282 | 
283 | * **a)** Accompany the work with the complete corresponding
284 | machine-readable source code for the Library including whatever
285 | changes were used in the work (which must be distributed under
286 | Sections 1 and 2 above); and, if the work is an executable linked
287 | with the Library, with the complete machine-readable “work that
288 | uses the Library”, as object code and/or source code, so that the
289 | user can modify the Library and then relink to produce a modified
290 | executable containing the modified Library.  (It is understood
291 | that the user who changes the contents of definitions files in the
292 | Library will not necessarily be able to recompile the application
293 | to use the modified definitions.)
294 | * **b)** Use a suitable shared library mechanism for linking with the
295 | Library.  A suitable mechanism is one that (1) uses at run time a
296 | copy of the library already present on the user's computer system,
297 | rather than copying library functions into the executable, and (2)
298 | will operate properly with a modified version of the library, if
299 | the user installs one, as long as the modified version is
300 | interface-compatible with the version that the work was made with.
301 | * **c)** Accompany the work with a written offer, valid for at
302 | least three years, to give the same user the materials
303 | specified in Subsection 6a, above, for a charge no more
304 | than the cost of performing this distribution.
305 | * **d)** If distribution of the work is made by offering access to copy
306 | from a designated place, offer equivalent access to copy the above
307 | specified materials from the same place.
308 | * **e)** Verify that the user has already received a copy of these
309 | materials or that you have already sent this user a copy.
310 | 
311 | For an executable, the required form of the “work that uses the
312 | Library” must include any data and utility programs needed for
313 | reproducing the executable from it.  However, as a special exception,
314 | the materials to be distributed need not include anything that is
315 | normally distributed (in either source or binary form) with the major
316 | components (compiler, kernel, and so on) of the operating system on
317 | which the executable runs, unless that component itself accompanies
318 | the executable.
319 | 
320 | It may happen that this requirement contradicts the license
321 | restrictions of other proprietary libraries that do not normally
322 | accompany the operating system.  Such a contradiction means you cannot
323 | use both them and the Library together in an executable that you
324 | distribute.
325 | 
326 | **7.** You may place library facilities that are a work based on the
327 | Library side-by-side in a single library together with other library
328 | facilities not covered by this License, and distribute such a combined
329 | library, provided that the separate distribution of the work based on
330 | the Library and of the other library facilities is otherwise
331 | permitted, and provided that you do these two things:
332 | 
333 | * **a)** Accompany the combined library with a copy of the same work
334 | based on the Library, uncombined with any other library
335 | facilities.  This must be distributed under the terms of the
336 | Sections above.
337 | * **b)** Give prominent notice with the combined library of the fact
338 | that part of it is a work based on the Library, and explaining
339 | where to find the accompanying uncombined form of the same work.
340 | 
341 | **8.** You may not copy, modify, sublicense, link with, or distribute
342 | the Library except as expressly provided under this License.  Any
343 | attempt otherwise to copy, modify, sublicense, link with, or
344 | distribute the Library is void, and will automatically terminate your
345 | rights under this License.  However, parties who have received copies,
346 | or rights, from you under this License will not have their licenses
347 | terminated so long as such parties remain in full compliance.
348 | 
349 | **9.** You are not required to accept this License, since you have not
350 | signed it.  However, nothing else grants you permission to modify or
351 | distribute the Library or its derivative works.  These actions are
352 | prohibited by law if you do not accept this License.  Therefore, by
353 | modifying or distributing the Library (or any work based on the
354 | Library), you indicate your acceptance of this License to do so, and
355 | all its terms and conditions for copying, distributing or modifying
356 | the Library or works based on it.
357 | 
358 | **10.** Each time you redistribute the Library (or any work based on the
359 | Library), the recipient automatically receives a license from the
360 | original licensor to copy, distribute, link with or modify the Library
361 | subject to these terms and conditions.  You may not impose any further
362 | restrictions on the recipients' exercise of the rights granted herein.
363 | You are not responsible for enforcing compliance by third parties with
364 | this License.
365 | 
366 | **11.** If, as a consequence of a court judgment or allegation of patent
367 | infringement or for any other reason (not limited to patent issues),
368 | conditions are imposed on you (whether by court order, agreement or
369 | otherwise) that contradict the conditions of this License, they do not
370 | excuse you from the conditions of this License.  If you cannot
371 | distribute so as to satisfy simultaneously your obligations under this
372 | License and any other pertinent obligations, then as a consequence you
373 | may not distribute the Library at all.  For example, if a patent
374 | license would not permit royalty-free redistribution of the Library by
375 | all those who receive copies directly or indirectly through you, then
376 | the only way you could satisfy both it and this License would be to
377 | refrain entirely from distribution of the Library.
378 | 
379 | If any portion of this section is held invalid or unenforceable under any
380 | particular circumstance, the balance of the section is intended to apply,
381 | and the section as a whole is intended to apply in other circumstances.
382 | 
383 | It is not the purpose of this section to induce you to infringe any
384 | patents or other property right claims or to contest validity of any
385 | such claims; this section has the sole purpose of protecting the
386 | integrity of the free software distribution system which is
387 | implemented by public license practices.  Many people have made
388 | generous contributions to the wide range of software distributed
389 | through that system in reliance on consistent application of that
390 | system; it is up to the author/donor to decide if he or she is willing
391 | to distribute software through any other system and a licensee cannot
392 | impose that choice.
393 | 
394 | This section is intended to make thoroughly clear what is believed to
395 | be a consequence of the rest of this License.
396 | 
397 | **12.** If the distribution and/or use of the Library is restricted in
398 | certain countries either by patents or by copyrighted interfaces, the
399 | original copyright holder who places the Library under this License may add
400 | an explicit geographical distribution limitation excluding those countries,
401 | so that distribution is permitted only in or among countries not thus
402 | excluded.  In such case, this License incorporates the limitation as if
403 | written in the body of this License.
404 | 
405 | **13.** The Free Software Foundation may publish revised and/or new
406 | versions of the Lesser General Public License from time to time.
407 | Such new versions will be similar in spirit to the present version,
408 | but may differ in detail to address new problems or concerns.
409 | 
410 | Each version is given a distinguishing version number.  If the Library
411 | specifies a version number of this License which applies to it and
412 | “any later version”, you have the option of following the terms and
413 | conditions either of that version or of any later version published by
414 | the Free Software Foundation.  If the Library does not specify a
415 | license version number, you may choose any version ever published by
416 | the Free Software Foundation.
417 | 
418 | **14.** If you wish to incorporate parts of the Library into other free
419 | programs whose distribution conditions are incompatible with these,
420 | write to the author to ask for permission.  For software which is
421 | copyrighted by the Free Software Foundation, write to the Free
422 | Software Foundation; we sometimes make exceptions for this.  Our
423 | decision will be guided by the two goals of preserving the free status
424 | of all derivatives of our free software and of promoting the sharing
425 | and reuse of software generally.
426 | 
427 | ### NO WARRANTY
428 | 
429 | **15.** BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
430 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
431 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
432 | OTHER PARTIES PROVIDE THE LIBRARY “AS IS” WITHOUT WARRANTY OF ANY
433 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
434 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
435 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
436 | LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
437 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
438 | 
439 | **16.** IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
440 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
441 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
442 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
443 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
444 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
445 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
446 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
447 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
448 | DAMAGES.
449 | 
450 | _END OF TERMS AND CONDITIONS_
451 | 
452 | ### How to Apply These Terms to Your New Libraries
453 | 
454 | If you develop a new library, and you want it to be of the greatest
455 | possible use to the public, we recommend making it free software that
456 | everyone can redistribute and change.  You can do so by permitting
457 | redistribution under these terms (or, alternatively, under the terms of the
458 | ordinary General Public License).
459 | 
460 | To apply these terms, attach the following notices to the library.  It is
461 | safest to attach them to the start of each source file to most effectively
462 | convey the exclusion of warranty; and each file should have at least the
463 | “copyright” line and a pointer to where the full notice is found.
464 | 
465 |     <one line to give the library's name and a brief idea of what it does.>
466 |     Copyright (C) <year>  <name of author>
467 | 
468 |     This library is free software; you can redistribute it and/or
469 |     modify it under the terms of the GNU Lesser General Public
470 |     License as published by the Free Software Foundation; either
471 |     version 2.1 of the License, or (at your option) any later version.
472 | 
473 |     This library is distributed in the hope that it will be useful,
474 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
475 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
476 |     Lesser General Public License for more details.
477 | 
478 |     You should have received a copy of the GNU Lesser General Public
479 |     License along with this library; if not, write to the Free Software
480 |     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
481 | 
482 | Also add information on how to contact you by electronic and paper mail.
483 | 
484 | You should also get your employer (if you work as a programmer) or your
485 | school, if any, to sign a “copyright disclaimer” for the library, if
486 | necessary.  Here is a sample; alter the names:
487 | 
488 |     Yoyodyne, Inc., hereby disclaims all copyright interest in the
489 |     library `Frob' (a library for tweaking knobs) written by James Random Hacker.
490 | 
491 |     <signature of Ty Coon>, 1 April 1990
492 |     Ty Coon, President of Vice
493 | 
494 | That's all there is to it!
495 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis
  2 | # SPDX-License-Identifier: LGPL-2.1-or-later
  3 | 
  4 | ROOTDIR=$(shell pwd)
  5 | DEPSDIR=$(ROOTDIR)/deps/
  6 | SRCDIR=$(ROOTDIR)/src/
  7 | DOCDIR=$(ROOTDIR)/docs/
  8 | MEXDIR=$(ROOTDIR)/mex/
  9 | EXAMPLEDIR=$(ROOTDIR)/examples/
 10 | TESTDIR=$(ROOTDIR)/test/
 11 | UTILDIR=$(ROOTDIR)/util/
 12 | BINDIR=$(ROOTDIR)/bin/
 13 | BUILDDIR=$(ROOTDIR)/build/
 14 | PREFIX?=/usr/local/
 15 | LIBDIR=$(BUILDDIR)lib/
 16 | INCDIR=$(BUILDDIR)include/
 17 | DATDIR=$(ROOTDIR)/datfiles/
 18 | 
 19 | PCG_HEADER=$(DEPSDIR)pcg-c/include/pcg_variants.h
 20 | 
 21 | SHELL:=/bin/sh
 22 | CP:=cp
 23 | MKDIR:=mkdir
 24 | MV:=mv
 25 | RM:=rm -f
 26 | 
 27 | CHECKMK:=checkmk
 28 | CC:=gcc
 29 | CCOV:=gcov
 30 | 
 31 | DOXYGEN:=doxygen
 32 | SPHINXBUILD:=sphinx-build
 33 | GIT:=git
 34 | MATLAB:=$(shell which matlab) -nodesktop -nosplash
 35 | MEXEXT:=$(shell which mexext)
 36 | OCTAVE:=octave
 37 | 
 38 | WFLAGS=-Wall -Wextra -pedantic
 39 | ARCHFLAGS=-march=native
 40 | CFLAGS=$(WFLAGS) $(ARCHFLAGS) -std=gnu99 -I $(SRCDIR) \
 41 | 	-I $(PREFIX)include -L $(PREFIX)lib
 42 | COPTIM=-O3
 43 | CCOVFLAGS=-Og -g --coverage
 44 | CLIBS=-lm -fopenmp
 45 | CHECKLIBS=-lcheck -lpthread -lsubunit
 46 | PCG_INCLUDE=-include $(PCG_HEADER)
 47 | PCG_LIB=-L $(DEPSDIR)pcg-c/src -lpcg_random
 48 | PCG_FLAGS=$(PCG_INCLUDE) $(PCG_LIB)
 49 | 
 50 | .PRECIOUS: %.o
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | .PHONY: all
 57 | all: autotune lib mexmat mexoct
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | init(%):
 64 | 	$(GIT) submodule update --init deps/$%
 65 | 
 66 | $(DEPSDIR)pcg-c/src/libpcg_random.a: init(pcg-c)
 67 | 	cd $(DEPSDIR)pcg-c; make
 68 | 
 69 | .PHONY: libpcg
 70 | libpcg: $(DEPSDIR)pcg-c/src/libpcg_random.a
 71 | 
 72 | $(ROOTDIR)%:
 73 | 	$(MKDIR) -p $@
 74 | 
 75 |  $(BINDIR)cpfloat_autotune: $(SRCDIR)cpfloat_autotune.c $(BINDIR) libpcg
 76 | 	$(CC) $(CFLAGS) $(COPTIM) -o $@ $< $(CLIBS) $(PCG_FLAGS)
 77 | 
 78 | .PHONY: autotune
 79 | autotune: $(BINDIR)cpfloat_autotune
 80 | 	$<
 81 | 	$(MV) cpfloat_threshold_*.h $(SRCDIR)
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | install: lib
 88 | 	$(CP) $(INCDIR) $(PREFIX)include/
 89 | 	$(CP) $(LIBDIR) $(PREFIX)lib/
 90 | 
 91 | lib: autotune $(INCDIR)cpfloat_definitions.h $(INCDIR)cpfloat_docmacros.h \
 92 | 	$(INCDIR)cpfloat.h \
 93 | 	$(INCDIR)cpfloat_threshold_binary32.h \
 94 | 	$(INCDIR)cpfloat_threshold_binary64.h \
 95 | 	$(LIBDIR)libcpfloat.so $(LIBDIR)libcpfloat.a
 96 | 
 97 | HEADERS=$(INCDIR)cpfloat_definitions.h $(INCDIR)cpfloat_docmacros.h \
 98 | 	$(INCDIR)cpfloat_threshold_binary32.h $(INCDIR)cpfloat_threshold_binary64.h
 99 | 
100 | $(HEADERS):$(INCDIR)cpfloat_%.h:$(SRCDIR)cpfloat_%.h $(INCDIR)
101 | 	$(CP) $< $@
102 | 
103 | $(BUILDDIR)cpfloat.tmp: $(SRCDIR)cpfloat_binary32.h $(SRCDIR)cpfloat_binary64.h
104 | 	sed '/CPFLOAT_BINARY\|^#include "cpfloat_\(doc\|def\)/d' \
105 | 		$(SRCDIR)cpfloat_binary32.h > $(BUILDDIR)cpfloat.tmp
106 | 	sed '/CPFLOAT_BINARY\|^#include "cpfloat_\(doc\|def\)/d' \
107 | 		$(SRCDIR)cpfloat_binary64.h >> $(BUILDDIR)cpfloat.tmp
108 | 	sed 's/static inline //g' $(BUILDDIR)cpfloat.tmp > $(BUILDDIR)cpfloat.tmpfinal
109 | 	$(MV) $(BUILDDIR)cpfloat.tmpfinal $(BUILDDIR)cpfloat.tmp
110 | 
111 | $(BUILDDIR)cpfloat_template.c: $(SRCDIR)cpfloat_template.h
112 | 	sed 's/static inline//g' $< > $@
113 | 
114 | $(BUILDDIR)cpfloat.c: $(BUILDDIR)cpfloat.tmp $(BUILDDIR)cpfloat_template.c
115 | 	printf "#include \"cpfloat_docmacros.h\"\n\
116 | 	#include \"cpfloat_definitions.h\"\n" > $@
117 | 	sed 's/template.h/template.c/' $(BUILDDIR)cpfloat.tmp >> $@
118 | 
119 | $(INCDIR)cpfloat.h: $(BUILDDIR)cpfloat.tmp $(BUILDDIR) $(INCDIR)
120 | 	sed '/^\/\*\* @/,/^\/\*\* @/d' $< > $(BUILDDIR)cpfloat-h.tmp
121 | 	sed '/^ \*\|\/\*/d' $(BUILDDIR)cpfloat-h.tmp >  $(BUILDDIR)cpfloat-h.tmpfinal
122 | 	$(MV)  $(BUILDDIR)cpfloat-h.tmpfinal $(BUILDDIR)cpfloat-h.tmp
123 | 	printf "/* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */\n\
124 | 	/* SPDX-License-Identifier: LGPL-2.1-or-later                         */\n\
125 | 	\n\
126 | 	/**\n\
127 | 	 * @file cpfloat.h\n\
128 | 	 * @brief CPFloat header file.\n\
129 | 	 */\n\
130 | 	\n\
131 | 	#ifndef _CPFLOAT_\n\
132 | 	#define _CPFLOAT_\n\
133 | 	\n\
134 | 	#include \"cpfloat_docmacros.h\"\n\
135 | 	#include \"cpfloat_definitions.h\"\n\
136 | 	\n" > $@
137 | 	cat $(BUILDDIR)cpfloat-h.tmp >> $@
138 | 	printf "#endif /* #ifndef _CPFLOAT_ */" >> $@
139 | 
140 | HEADER_DEPS=$(INCDIR)cpfloat_threshold_binary32.h \
141 | 	$(INCDIR)cpfloat_threshold_binary64.h \
142 | 	$(DEPSDIR)pcg-c/include/pcg_variants.h
143 | 
144 | $(BUILDDIR)cpfloat-shared.o: $(BUILDDIR)cpfloat.c $(HEADER_DEPS)
145 | 	$(CC) $(CFLAGS) $(COPTIM) -fPIC -c $< $(PCG_INCLUDE) -o $@
146 | 
147 | $(BUILDDIR)cpfloat-static.o: $(BUILDDIR)cpfloat.c $(HEADER_DEPS)
148 | 	$(CC) $(CFLAGS) $(COPTIM) -c $< $(PCG_INCLUDE) -o $@
149 | 
150 | LIBPCG_OBJ=$(DEPSDIR)pcg-c/src/pcg-global-32.o \
151 | 	$(DEPSDIR)pcg-c/src/pcg-advance-64.o \
152 | 	$(DEPSDIR)pcg-c/src/pcg-global-64.o \
153 | 	$(DEPSDIR)pcg-c/src/pcg-advance-128.o
154 | 
155 | $(LIBDIR)libcpfloat.so: $(BUILDDIR)cpfloat-shared.o libpcg $(LIBDIR)
156 | 	$(CC) -shared -o $@ $< $(LIBPCG_OBJ) $(CLIBS) $(PCG_LIB)
157 | 
158 | $(LIBDIR)libcpfloat.a: $(BUILDDIR)cpfloat-static.o libpcg $(LIBDIR)
159 | 	ar -cr $@ $< $(LIBPCG_OBJ)
160 | 
161 | 
162 | 
163 | 
164 | 
165 | MEXEXTENSION:=`$(MEXEXT)`
166 | 
167 | .PHONY: mexmat
168 | mexmat: $(BINDIR)cpfloat.m $(BINDIR)cpfloat.$(MEXEXTENSION)
169 | 
170 | .PHONY: mexoct
171 | mexoct: $(BINDIR)cpfloat.m $(BINDIR)cpfloat.mex
172 | 
173 | $(BINDIR)cpfloat.m: $(MEXDIR)cpfloat.m $(BINDIR)
174 | 	$(CP) $< $@
175 | 
176 | MEXSTRING="cd $(MEXDIR); \
177 | 	retval = cpfloat_compile('cpfloatdir', '$(SRCDIR)', \
178 | 		'pcgpath', '$(DEPSDIR)pcg-c/', \
179 | 		'compilerpath', '$(CC)'); \
180 | 	if retval \
181 | 		rehash(); \
182 | 		cpfloat_autotune('cpfloatdir', '$(SRCDIR)'); \
183 | 		cpfloat_compile('cpfloatdir', '$(SRCDIR)', \
184 | 			'pcgpath', '$(DEPSDIR)pcg-c/', \
185 | 			'compilerpath', '$(CC)'); \
186 | 	end; \
187 | 	exit;"
188 | 
189 | MEXEXTENSION:=`$(MEXEXT)`
190 | 
191 | $(BINDIR)cpfloat.$(MEXEXTENSION): $(MEXDIR)cpfloat.c libpcg $(BINDIR)
192 | 	$(MATLAB) -r $(MEXSTRING)
193 | 	$(MV) $(MEXDIR)cpfloat.$(MEXEXTENSION) $@
194 | 
195 | $(BINDIR)cpfloat.mex: $(MEXDIR)cpfloat.c libpcg $(BINDIR)
196 | 	$(OCTAVE) --eval $(MEXSTRING)
197 | 	$(MV) $(MEXDIR)cpfloat.mex $@
198 | 
199 | 
200 | 
201 | 
202 | 
203 | .PHONY: test
204 | test: ctest libtest mtest otest
205 | 
206 | $(TESTDIR)cpfloat_test.c: $(TESTDIR)cpfloat_test.ts
207 | 	$(CHECKMK) clean_mode=1 $< > $@
208 | 
209 | $(BINDIR)cpfloat_test: $(TESTDIR)cpfloat_test.c libpcg $(BINDIR)
210 | 	$(CC) $(CFLAGS) $(COPTIM) -fsanitize=undefined -o $@ $< \
211 | 		$(CHECKLIBS) $(CLIBS) $(PCG_FLAGS)
212 | 
213 | .PHONY: ctest
214 | ctest: $(BINDIR)cpfloat_test
215 | 	$<
216 | 	$(MV) cpfloat_test.log $(TESTDIR)
217 | 
218 | $(TESTDIR)libcpfloat_test.c: $(TESTDIR)cpfloat_test.c
219 | 	sed '/#include "cpfloat_binary32.h"/d' $< > $@
220 | 	sed 's/#include "cpfloat_binary64.h"/#include "cpfloat.h"/g' $@ > cpfloath.temp
221 | 	$(MV) cpfloath.temp $@
222 | 
223 | $(BINDIR)libcpfloat_static_test: $(TESTDIR)libcpfloat_test.c lib
224 | 	$(CC) $(CFLAGS) $(COPTIM) -fsanitize=undefined -static -o $@ $< \
225 | 		-I$(INCDIR) -L$(LIBDIR) -lcpfloat $(CHECKLIBS)
226 | 
227 | $(BINDIR)libcpfloat_shared_test: $(TESTDIR)libcpfloat_test.c lib
228 | 	$(CC) $(CFLAGS) $(COPTIM) -fsanitize=undefined -o $@ $< \
229 | 		-I$(INCDIR) -L$(LIBDIR) -lcpfloat $(CHECKLIBS) -lm
230 | 
231 | .PHONY: libtest
232 | libtest: libtest-shared libtest-static
233 | 
234 | .PHONY: libtest-shared
235 | libtest-shared: $(BINDIR)libcpfloat_shared_test
236 | 	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(LIBDIR); $<
237 | 	$(MV) cpfloat_test.log $(TESTDIR)libcpfloat_dinamic_test.log
238 | 
239 | .PHONY: libtest-static
240 | libtest-static: $(BINDIR)libcpfloat_static_test
241 | 	$<
242 | 	$(MV) cpfloat_test.log $(TESTDIR)libcpfloat_static_test.log
243 | 
244 | .PHONY: mtest
245 | mtest: MTESTSTRING="addpath('$(DEPSDIR)float_params'); \
246 | 		addpath('$(BINDIR)'); \
247 | 		cd $(TESTDIR); \
248 | 		cpfloat_test; \
249 | 		exit;"
250 | 
251 | mtest: $(BINDIR)cpfloat.$(MEXEXTENSION) $(BINDIR)cpfloat.m init(float_params)
252 | 	$(MATLAB) -r $(MTESTSTRING)
253 | 
254 | .PHONY: otest
255 | otest: OTESTSTRING="pkglist=pkg('list'); \
256 | 		no_fenv=true; \
257 | 		for i=1:length(pkglist); \
258 | 			if strcmp(pkglist{i}.name, 'fenv'); \
259 | 				no_fenv = false; \
260 | 				break; \
261 | 			end; \
262 | 		end; \
263 | 		if no_fenv; \
264 | 			pkg install -forge fenv; \
265 | 		end; \
266 | 		pkg load fenv; \
267 | 		addpath('$(DEPSDIR)float_params'); \
268 | 		addpath('$(BINDIR)'); \
269 | 		cd $(TESTDIR); \
270 | 		cpfloat_test; \
271 | 		exit;"
272 | 
273 | otest: $(BINDIR)cpfloat.mex $(BINDIR)cpfloat.m init(float_params)
274 | 	$(OCTAVE) --eval $(OTESTSTRING)
275 | 
276 | 
277 | 
278 | 
279 | 
280 | .PHONY: docs
281 | docs: $(DOCDIR)html
282 | 
283 | $(DOCDIR)Doxyfile:
284 | 	$(DOXYGEN) -g $(DOCDIR)Doxyfile
285 | 
286 | $(DOCDIR)xml: $(DOCDIR)Doxyfile $(DOCDIR)Doxyfile-project
287 | 	$(DOXYGEN) $(DOCDIR)Doxyfile-project
288 | 
289 | $(DOCDIR)html: $(DOCDIR)xml
290 | 	$(SPHINXBUILD) -M html "$(DOCDIR)source" "$(DOCDIR)"
291 | 
292 | .PHONY: coverage
293 | coverage: $(TESTDIR)cpfloat_test.c libpcg
294 | 	$(CC) $(CFLAGS) $(CCOVFLAGS) -o $(TESTDIR)cpfloat_test $< \
295 | 		$(CHECKLIBS) $(CLIBS) $(PCG_FLAGS)
296 | 	$(TESTDIR)cpfloat_test
297 | 	$(CP) $(TESTDIR)cpfloat_test.c .
298 | 	$(CCOV) cpfloat_test.c
299 | 
300 | .PHONY: example
301 | example: $(BINDIR)example_manuscript
302 | 
303 | $(BINDIR)example_manuscript: $(EXAMPLEDIR)example_manuscript.c libpcg $(BINDIR)
304 | 	$(CC) $(CFLAGS) $(COPTIM) -o $@ $< $(CLIBS) $(PCG_FLAGS)
305 | 
306 | 
307 | 
308 | 
309 | 
310 | .PHONY: cleanall
311 | cleanall: clean cleanlib cleandeps cleantest cleancoverage cleandocs
312 | 
313 | .PHONY: clean
314 | clean:
315 | 	$(RM) $(BINDIR)*
316 | 
317 | .PHONY: cleanlib
318 | cleanlib:
319 | 	$(RM) -r $(BUILDDIR)*
320 | 
321 | .PHONY: cleandeps
322 | cleandep:
323 | 	cd $(DEPSDIR)pcg-c; make clean
324 | 
325 | .PHONY: cleantest
326 | cleantest:
327 | 	$(RM) $(TESTDIR)cpfloat_test $(TESTDIR)*.c $(TESTDIR)*.log
328 | 
329 | .PHONY: cleancoverage
330 | cleancoverage:
331 | 	$(RM) cpfloat_test.c cpfloat_test.log *.gcno *.gcda *.gcov
332 | 
333 | .PHONY: cleandocs
334 | cleandocs:
335 | 	$(RM) -r $(DOCDIR)Doxyfile $(DOCDIR)xml
336 | 	$(RM) -r $(DOCDIR)html $(DOCDIR)source/cpfloat
337 | 
338 | 
339 | 
340 | 
341 | 
342 | license.spdx:
343 | 	$(UTILDIR)generate_spdx.sh > $@
344 | 
345 | # CPFloat - Custom Precision Floating-point numbers.
346 | #
347 | # Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
348 | #
349 | # This library is free software; you can redistribute it and/or modify it under
350 | # the terms of the GNU Lesser General Public License as published by the Free
351 | # Software Foundation; either version 2.1 of the License, or (at your option)
352 | # any later version.
353 | #
354 | # This library is distributed in the hope that it will be useful, but WITHOUT
355 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
356 | # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
357 | # details.
358 | #
359 | # You should have received a copy of the GNU Lesser General Public License along
360 | # with this library; if not, write to the Free Software Foundation, Inc., 51
361 | # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
362 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Version](https://img.shields.io/github/v/tag/north-numerical-computing/cpfloat?label=version)](https://github.com/north-numerical-computing/cpfloat/tags)
  2 | [![C tests](https://img.shields.io/github/actions/workflow/status/north-numerical-computing/cpfloat/run_c_tests.yml?branch=main&label=c_tests)](https://github.com/north-numerical-computing/cpfloat/actions/workflows/run_c_tests.yml)
  3 | [![Octave tests](https://img.shields.io/github/actions/workflow/status/north-numerical-computing/cpfloat/run_octave_tests.yml?branch=main&label=octave_tests)](https://github.com/north-numerical-computing/cpfloat/actions/workflows/run_c_tests.yml)
  4 | [![GitHub](https://img.shields.io/github/license/north-numerical-computing/cpfloat)](LICENSE.md)
  5 | 
  6 | # CPFloat: Custom-Precision Floating-Point numbers
  7 | 
  8 | CPFloat is a C library for simulating low-precision floating-point arithmetics. CPFloat provides efficient routines for rounding, performing arithmetic operations, evaluating  mathematical functions, and querying properties of the simulated low-precision format. Internally, numbers are stored in `float` or `double` arrays. The low-precision format (target format) follows an extension of the formats defined in the IEEE 754 standard [[5]](#ref5) and is entirely specified by four parameters:
  9 | * a positive integer *p*, which represents the number of digits of precision;
 10 | * a positive integer *e*<sub>min</sub>, which represents the minimum supported exponent;
 11 | * a positive integer *e*<sub>max</sub>, which represents the maximum supported exponent; and
 12 | * a Boolean variable σ, set to **true** if subnormal are supported and to **false** otherwise.
 13 | 
 14 | Valid choices of *p*, *e*<sub>min</sub>, and *e*<sub>max</sub> depend on the format in which the converted numbers are to be stored (storage format). A more extensive description of the characteristics of the low-precision formats that can be used, together with more details on admissible values for *p*, *e*<sub>min</sub>, *e*<sub>max</sub>, and *σ* can be found in [[1]](#ref1).
 15 | 
 16 | The library was originally intended as a faster version of the MATLAB function `chop` [[2]](#ref2), which is [available on GitHub](https://github.com/higham/chop).
 17 | The latest versions of the library have a variety of subtle differences compared with `chop`.
 18 | * Since [14 June 2022](https://github.com/higham/chop/commit/1d37238067042416a3554a1f5e6cdd248b613999), `chop` supports specifying the function for generating random numbers. The MEX interface of CPFloat does not offer this capability, as the pseudo-random numbers used are generated in C and not in MATLAB.
 19 | * Since v0.6.0, CPFloat allows users to specify *e*<sub>min</sub> and *e*<sub>max</sub> separately. In earlier versions, users can only specify *e*<sub>max</sub>, while *e*<sub>min</sub> is set to 1 – *e*<sub>max</sub>.
 20 | * Since v0.6.0, the default 8-bit format `E4M3` has *e*<sub>max</sub> = 8 and *e*<sub>min</sub> = –6, which is consistent with the homonymous format in the December 2023 revision of the OCP 8-bit Floating Point Specification (OFP8) [[3]](#ref3). In `chop`, *e*<sub>max</sub> = 7 and *e*<sub>min</sub> = –6.
 21 | 
 22 | The code to reproduce the results of the tests in [[1]](#ref1) is [available on GitHub](https://github.com/north-numerical-computing/cpfloat_experiments).
 23 | 
 24 | 
 25 | # Dependencies
 26 | 
 27 | The only (optional) dependency of CPFloat is the [C implementation](https://github.com/imneme/pcg-c) of the [PCG Library](https://www.pcg-random.org), which provides a variety of high-quality pseudo-random number generators. For an in-depth discussion of the algorithms underlying the PCG Library, we recommend the [paper](https://www.pcg-random.org/paper.html) by [Melissa O'Neill](https://www.cs.hmc.edu/~oneill) [[4]](#ref4). If the header file `pcg_variants.h` in `include/pcg-c/include/pcg_variants.h` is not included at compile-time with the `--include` option, then CPFloat relies on the default C pseudo-random number generator.
 28 | 
 29 | The PCG Library is free software (see the [Licensing information](#licensing-information) below), and its generators are more efficient, reliable, and flexible than any combination of the functions `srand`, `rand`, and `rand_r` from the C standard library. A warning is issued at compile time if the location of `pcg_variant.h` is not specified correctly.
 30 | 
 31 | Compiling the MEX interface requires a reasonably recent version of MATLAB or Octave.
 32 | 
 33 | # Developer dependencies
 34 | 
 35 | Testing the MEX interface requires the function `float_params`, which is [available on GitHub](https://github.com/higham/float_params). The unit tests for the C implementation in `test/cpfloat_test.ts` require the [check unit testing framework for C](https://libcheck.github.io/check), including the [`checkmk`](https://github.com/libcheck/check/tree/master/checkmk) script, and the [subunit protocol](https://github.com/testing-cabal/subunit).
 36 | 
 37 | # Installation
 38 | 
 39 | No installation is needed in order to use CPFloat as a header-only library. The shared and static libraries can be built with
 40 | ```console
 41 | make lib
 42 | ```
 43 | If the compilation is successful, the header and library files of CPFloat will be located in the `build/include` and `build/lib` directories, respectively.
 44 | The library can be installed in `<path>` with
 45 | ```console
 46 | make install --prefix=<path>
 47 | ```
 48 | which copies the header and library files in `<path>/include` and `<path>/lib`, respectively.
 49 | The default value of `<path>`, which is used if the `--prefix` option is not supplied, is `/usr/local`.
 50 | 
 51 | ## MEX interface
 52 | 
 53 | The MEX interface can be compiled automatically with either
 54 | ```console
 55 | make mexmat # Compile MEX interface for MATLAB.
 56 | ```
 57 | or
 58 | ```console
 59 | make mexoct # Compile MEX interface for Octave.
 60 | 
 61 | ```
 62 | These two commands compile and autotune the MEX interface in MATLAB and Octave, respectively, by using the functions `mex/cpfloat_compile.m` and `mex/cpfloat_autotune.m`. To use the interface, the `bin/` folder must be in MATLAB's search path.
 63 | 
 64 | On a system where the `make` build automation tool is not available, we recommend building the MEX interface by running the script `cpfloat_compile_nomake.m` in the `mex/` folder. The script attempts to compile and autotune the MEX interface using the default C compiler. The following code will download the repository as a ZIP file, inflate it, and try to compile it:
 65 | 
 66 | ```matlab
 67 | zip_url = 'https://codeload.github.com/north-numerical-computing/cpfloat/zip/refs/heads/main';
 68 | unzip(zip_url);
 69 | movefile('cpfloat-main', 'cpfloat')
 70 | cd('cpfloat/mex');
 71 | cpfloat_compile_nomake
 72 | ```
 73 | 
 74 | A different compiler can be used by setting the value of the variable `compilerpath` appropriately.
 75 | If the chosen compiler does not support OpenMP, only the sequential version of the algorithm will be produced and no autotuning will take place.
 76 | 
 77 | On Windows, we have not been able to compile the PCG Library using the C compiler recommended by MATLAB. Therefore, the script uses the pseudo-random number generator in the C standard library by default.
 78 | 
 79 | ## Autotuning
 80 | 
 81 | CPFloat provides a sequential and a parallel implementation of the rounding functions. OpenMP introduces some overhead, and using a single thread is typically faster for arrays with few elements. Therefore, the library provides a facility to switch between the single-threaded and the multi-threaded variants automatically, depending on the size of the input. The threshold is machine-dependent, and the best value for a given system can be found by invoking
 82 | ```console
 83 | make autotune
 84 | ```
 85 | which compiles the file `src/cpfloat_autotune.c`, runs it, and updates the files `src/cpfloat_threshold_binary32.h` and `src/cpfloat_threshold_binary64.h`. This procedure is run automatically when building the shared and static libraries.
 86 | 
 87 | ## Documentation
 88 | 
 89 | The documentation of CPFloat can be generated with the command
 90 | ```console
 91 | make docs
 92 | ```
 93 | which relies on [Doxygen](https://www.doxygen.nl) to format the Javadoc-style comments in the source files, and on [Sphinx](https://www.sphinx-doc.org), with the [Breathe](https://breathe.readthedocs.io) and [Exhale](https://exhale.readthedocs.io) extensions, to generate the HTML version of the documentation that can be found in the `docs/html/` directory.
 94 | 
 95 | # Using CPFloat
 96 | 
 97 | CPFloat can be used as a header-only, shared, or static library. Examples for these three scenarios can be found in the `Makefile` (cf. targets `$(BINDIR)cpfloat_test`, `$(BINDIR)libcpfloat_shared_test`, and `$(BINDIR)libcpfloat_static_test`, respectively). Here we provide a brief summary.
 98 | 
 99 | * **Header-only library.** The only requirement is that the files in the `src/` directory be in the include path of the compiler. In order to use the PCG Library, one can either:
100 |     - specify the path of the file `pcg_variants.h` using the preprocessor option `--include` (see the variable `CFLAGS` in the `Makefile` for an example); or
101 |     - make sure that `pcg_variants.h` is in the include path and uncomment the preprocessor instruction on line 34 of `src/cpfloat_definitions.h`, that is, `/* #include "pcg_variants.h" */`. In either case, it is necessary link the executable against the `pcg-random` library, which can be obtained by passing the option `-lpcg-random` to the linker. The library `libpcg-random.a` must be in the load path.
102 | 
103 | * **Shared library.** The five header files in the `build/include` directory must be in the include path of the compiler. The options `-lcpfloat` and `-lm` must be passed to the linker, and the libraries `libcpfloat.so` and `m.so` must be in the load path.
104 | 
105 | * **Static library.** The static library uses the same five header files as the shared library, which are located in the `build/include` and must be in the include path of the compiler. Executable must be linked with the `-static` and `-lcpfloat` options, and the library file `libcpfloat.a` must be in the load path. Linking against the math library is not needed in this case.
106 | 
107 | # Code validation
108 | 
109 | The `test/` directory contains two sets of test, one for the C library and one for the MEX interface. The unit tests for the C implementation require the `check` library, and can be run with
110 | ```console
111 | make ctest
112 | ```
113 | for the header-only library or with
114 | ```console
115 | make libtest
116 | ```
117 | for the shared and static libraries. The two commands use the same batch of unit tests, which is generated from the file `test/cpfloat_test.ts` using the `checkmk` script.
118 | The Makefile target `coverage` measures the code coverage using GNU `gcov` on the same set of tests.
119 | 
120 | The MEX interface can be tested by using either
121 | ```console
122 | make mtest # Test MEX interface using MATLAB.
123 | ```
124 | or
125 | ```console
126 | make otest # Test MEX interface using Octave.
127 | ```
128 | These two commands run, in MATLAB and Octave respectively, the function `test/cpfloat_test.m`. This set of tests is based on the MATLAB script `test_chop.m`, [available on GitHub](https://github.com/higham/chop/blob/master/test_chop.m): some changes were necessary in order to make it compatible with Octave.
129 | 
130 | 
131 | # References
132 | 
133 | <a name="ref1">[1]</a> Massimiliano Fasi and Mantas Mikaitis. [CPFloat: A C library for simulating low-precision arithmetic](https://doi.org/10.1145/3585515). ACM Trans. Math. Softw., 49(2), Article No.: 18, June 2023.
134 | 
135 | <a name="ref2">[2]</a> Nicholas J. Higham and Srikara Pranesh, [Simulating Low Precision Floating-Point Arithmetic](https://doi.org/10.1137/19M1251308), SIAM J. Sci. Comput., 41, C585-C602, 2019.
136 | 
137 | <a name="ref3">[3]</a> Paulius Micikevicius, Stuart Oberman, Pradeep Dubey, Marius Cornea, Andres Rodriguez, Ian Bratt, Richard Grisenthwaite, Norm Jouppi, Chiachen Chou, Amber Huffman, Michael Schulte, Ralph Wittig, Dharmesh Jani, Summer Deng. [OCP 8-bit Floating Point Specification (OFP8)](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-12-01-pdf-1), pp. 1–16, Revision 1.0, Open Compute Project, June 2023. Revised December 2023.
138 | 
139 | <a name="ref4">[4]</a> Melissa E. O'Neill, [PCG: A family of simple fast space-efficient statistically good algorithms for random number generation](https://www.pcg-random.org/paper.html), Technical report HMC-CS-2014-0905, Harvey Mudd College, Claremont, CA, September 2014.
140 | 
141 | <a name="ref5">[5]</a> [754-2019 IEEE Standard for Floating-Point Arithmetic](https://doi.org/10.1109/IEEESTD.2019.8766229), pp. 1–84, Institute of Electrical and Electronics Engineers, July 2019. Revision of IEEE Std 754-2008.
142 | 
143 | # Acknowledgements
144 | 
145 | The library was written by Massimiliano Fasi and Mantas Mikaitis. We thank Nicolas Louvet, Theo Mary, Ian McInerney, and Siegfried Rump for reporting bugs and suggesting improvements.
146 | 
147 | # Licensing information
148 | 
149 | CPFloat is distributed under the GNU Lesser General Public License, Version 2.1
150 | or later (see [LICENSE.md](LICENSE.md)). Please contact us if you would like to use CPFloat in an open source project distributed under the terms of a license that is incompatible with the GNU LGPL. We might be able to relicense the software for you.
151 | 
152 | The PCG Library is distributed under the terms of either the [Apache License, Version 2.0](https://raw.githubusercontent.com/imneme/pcg-c/master/LICENSE-APACHE.txt) or the [Expat License](https://raw.githubusercontent.com/imneme/pcg-c/master/LICENSE-MIT.txt), at the option of the user.
153 | 
154 | The MATLAB function `float_params` is distributed under the terms of the [BSD 2-Clause "Simplified" License](https://raw.githubusercontent.com/higham/float_params/master/license.txt).
155 | 
156 | The MATLAB function `chop` is distributed under the terms of the [BSD 2-Clause "Simplified" License](https://raw.githubusercontent.com/higham/chop/master/license.txt).
157 | 


--------------------------------------------------------------------------------
/docs/Doxyfile-project:
--------------------------------------------------------------------------------
 1 | @INCLUDE                = "./docs/Doxyfile"
 2 | GENERATE_HTML           = NO
 3 | GENERATE_LATEX          = NO
 4 | GENERATE_XML            = YES
 5 | XML_PROGRAMLISTING      = NO
 6 | 
 7 | # Project configuration.
 8 | PROJECT_NAME           = "CPFloat"
 9 | PROJECT_NUMBER         = "0.5.0"
10 | PROJECT_BRIEF          = "Custom precision floating-point numbers"
11 | OUTPUT_DIRECTORY       = "./docs/"
12 | 
13 | # Inputs
14 | INPUT                  = ./src/cpfloat_definitions.h \
15 |                          ./src/cpfloat_binary32.h \
16 |                          ./src/cpfloat_binary64.h
17 | RECURSIVE              = NO
18 | 
19 | # Options
20 | EXTENSION_MAPPING      = h=C
21 | MACRO_EXPANSION        = YES
22 | OPTIMIZE_OUTPUT_FOR_C  = YES
23 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'CPFloat'
21 | copyright = '2020, Massimiliano Fasi and Mantas Mikaitis'
22 | author = 'Massimiliano Fasi and Mantas Mikaitis'
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = 'latest'
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 |     'sphinx.ext.githubpages',
35 |     'breathe',
36 |     'exhale',
37 |     'myst_parser',
38 | ]
39 | 
40 | # Setup the breathe extension
41 | breathe_projects = {
42 |     "CPFloat": "../xml"
43 | }
44 | breathe_default_project = "CPFloat"
45 | 
46 |  # Setup the exhale extension
47 | exhale_args = {
48 |     # These arguments are required
49 |     "containmentFolder":     "./cpfloat",
50 |     "rootFileName":          "cpfloat_root.rst",
51 |     "rootFileTitle":         "CPFloat API",
52 |     "doxygenStripFromPath":  "..",
53 |     "createTreeView":        True,
54 |     # TIP: if using the sphinx-bootstrap-theme, you need
55 |     # "treeViewIsBootstrap": True,
56 | }
57 | primary_domain = 'c'
58 | highlight_language = 'c'
59 | 
60 | 
61 | # Add any paths that contain templates here, relative to this directory.
62 | templates_path = ['_templates']
63 | 
64 | # The language for content autogenerated by Sphinx. Refer to documentation
65 | # for a list of supported languages.
66 | #
67 | # This is also used if you do content translation via gettext catalogs.
68 | # Usually you set "language" from the command line for these cases.
69 | language = 'en'
70 | 
71 | # List of patterns, relative to source directory, that match files and
72 | # directories to ignore when looking for source files.
73 | # This pattern also affects html_static_path and html_extra_path.
74 | exclude_patterns = []
75 | 
76 | # -- Options for HTML output -------------------------------------------------
77 | 
78 | # The theme to use for HTML and HTML Help pages.  See the documentation for
79 | # a list of builtin themes.
80 | #
81 | html_theme = 'sphinx_rtd_theme'
82 | 
83 | # Add any paths that contain custom static files (such as style sheets) here,
84 | # relative to this directory. They are copied after the builtin static files,
85 | # so a file named "default.css" will overwrite the builtin "default.css".
86 | html_static_path = ['_static']
87 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. CPFloat documentation master file.
 2 | 
 3 | CPFloat documentation
 4 | =====================
 5 | 
 6 | .. toctree::
 7 |    :hidden:
 8 | 
 9 |    self
10 | 
11 | .. toctree::
12 |    :maxdepth: 1
13 | 
14 |    ./readme
15 | 
16 | .. toctree::
17 |    :hidden:
18 | 
19 |    ./cpfloat/cpfloat_root
20 | 
21 | .. toctree::
22 |    :hidden:
23 | 
24 | * :ref:`genindex`
25 | 


--------------------------------------------------------------------------------
/docs/source/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../README.md
2 |    :parser: myst_parser.docutils_
3 | 


--------------------------------------------------------------------------------
/examples/example_manuscript.c:
--------------------------------------------------------------------------------
 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */
 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later                         */
 3 | 
 4 | #include <stdio.h>
 5 | #include "cpfloat_binary64.h"
 6 | 
 7 | #define N 3
 8 | 
 9 | int main () {
10 |   // Allocate the data structure for target formats and rounding parameters.
11 |   optstruct *fpopts = init_optstruct();
12 | 
13 |   // Set up the parameters for binary16 target format.
14 |   fpopts->precision = 11;                 // Bits in the significand + 1.
15 |   fpopts->emin = -14;                     // The minimum exponent value.
16 |   fpopts->emax = 15;                      // The maximum exponent value.
17 |   fpopts->subnormal = CPFLOAT_SUBN_USE;   // Support for subnormals is on.
18 |   fpopts->round = CPFLOAT_RND_TP;         // Round toward +infinity.
19 |   fpopts->flip = CPFLOAT_SOFTERR_NO;      // Bit flips are off.
20 |   fpopts->p = 0;                          // Bit flip probability (not used).
21 |   fpopts->explim = CPFLOAT_EXPRANGE_TARG; // Limited exponent in target format.
22 | 
23 |   // Validate the parameters in fpopts.
24 |   int retval = cpfloat_validate_optstruct(fpopts);
25 |   printf("The validation function returned %d.\n", retval);
26 | 
27 |   // Initialize C array with arbitrary elements.
28 |   double X[N] = { (double)5/3, M_PI, M_E };
29 |   double Y[N] = { 1.5, 1.5, 1.5 };
30 |   double Z[N];
31 |   printf("X in binary64:\n  %.15e %.15e %.15e\n", X[0], X[1], X[2]);
32 | 
33 |   // Round the values of X to the binary16 format and store in Z.
34 |   cpfloat(Z, X, N, fpopts);
35 |   printf("X rounded to binary16:\n  %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]);
36 | 
37 |   // Round the sum of X and Y.
38 |   cpf_add(Z, X, Y, N, fpopts);
39 |   printf("Sum rounded to binary16:\n  %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]);
40 | 
41 |   // Round the product of X and Y.
42 |   cpf_mul(Z, X, Y, N, fpopts);
43 |   printf("Product rounded to binary16:\n  %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]);
44 | 
45 |   // Round the logarithm of X.
46 |   cpf_log(Z, X, N, fpopts);
47 |   printf("Log rounded to binary16:\n  %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]);
48 | 
49 |   // Round the 2-argument arctangent of X and Y.
50 |   cpf_atan2(Z, X, Y, N, fpopts);
51 |   printf("Angle rounded to binary16:\n  %.15e %.15e %.15e\n", Z[0], Z[1], Z[2]);
52 | 
53 |   free_optstruct(fpopts);
54 | }
55 | 
56 | /*
57 |  * CPFloat - Custom Precision Floating-point numbers.
58 |  *
59 |  * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
60 |  *
61 |  * This library is free software; you can redistribute it and/or modify it under
62 |  * the terms of the GNU Lesser General Public License as published by the Free
63 |  * Software Foundation; either version 2.1 of the License, or (at your option)
64 |  * any later version.
65 |  *
66 |  * This library is distributed in the hope that it will be useful, but WITHOUT
67 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
68 |  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
69 |  * details.
70 |  *
71 |  * You should have received a copy of the GNU Lesser General Public License along
72 |  * with this library; if not, write to the Free Software Foundation, Inc., 51
73 |  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
74 |  */
75 | 


--------------------------------------------------------------------------------
/license.spdx:
--------------------------------------------------------------------------------
  1 | ##
  2 | ## Document Creation Information
  3 | ##
  4 | 
  5 | SPDXVersion: SPDX-2.2
  6 | DataLicense: CC0-1.0
  7 | SPDXID: SPDXRef-DOCUMENT
  8 | DocumentName: cpfloat-0.5.0
  9 | DocumentNamespace: https://raw.githubusercontent.com/north-numerical-computing/cpfloat/master/license.spdx
 10 | Creator: Person: Massimiliano Fasi (massimiliano.fasi@durham.ac.uk)
 11 | Creator: Person: Mantas Mikaitis (mantas.mikaitis@manchester.ac.uk)
 12 | Created: 2022-05-13T07:37:31Z
 13 | 
 14 | 
 15 | 
 16 | ##
 17 | ## Package Information
 18 | ##
 19 | 
 20 | PackageName: cpfloat
 21 | SPDXID: SPDXRef-1
 22 | PackageVersion: 0.5.0
 23 | PackageDownloadLocation: git://github.com/north-numerical-computing/cpfloat
 24 | PackageVerificationCode: e7abe3759c76a48cf70348f35121079126b5846e (excludes: ./license.spdx)
 25 | PackageHomePage: https://github.com/north-numerical-computing/cpfloat
 26 | PackageLicenseConcluded: LGPL-2.1-or-later
 27 | PackageLicenseInfoFromFiles: LGPL-2.1-or-later
 28 | PackageLicenseDeclared: LGPL-2.1-or-later
 29 | PackageCopyrightText: <text>Copyright 2020 Massimiliano Fasi and Mantas Mikaitis</text>
 30 | PackageSummary:<text>Custom Precision Floating-point numbers.</text>
 31 | 
 32 | 
 33 | 
 34 | ##
 35 | ## File Information
 36 | ##
 37 | 
 38 | FileName: ./src/cpfloat_binary64.h
 39 | SPDXID: SPDXRef-1-1
 40 | FileType: SOURCE
 41 | FileChecksum: SHA1: a12904f3e5531acf30bcd087c18d3d728adc26ef
 42 | FileChecksum: MD5: a048b24c8c72370e134004985b956b51
 43 | LicenseConcluded: LGPL-2.1-or-later
 44 | LicenseInfoInFile: LGPL-2.1-or-later
 45 | 
 46 | FileName: ./src/cpfloat_binary32.h
 47 | SPDXID: SPDXRef-1-2
 48 | FileType: SOURCE
 49 | FileChecksum: SHA1: 0659f31bfaa75fb0bee0af49916b29e87481b902
 50 | FileChecksum: MD5: ace80b75d287d8c86459430c24608cba
 51 | LicenseConcluded: LGPL-2.1-or-later
 52 | LicenseInfoInFile: LGPL-2.1-or-later
 53 | 
 54 | FileName: ./src/cpfloat_threshold_binary64.h
 55 | SPDXID: SPDXRef-1-3
 56 | FileType: SOURCE
 57 | FileChecksum: SHA1: 84d787a6e3dd3bc8d8615575e0d18272e2378b9d
 58 | FileChecksum: MD5: 68ac8be2a018baae453d325084221bdd
 59 | LicenseConcluded: LGPL-2.1-or-later
 60 | LicenseInfoInFile: LGPL-2.1-or-later
 61 | 
 62 | FileName: ./src/cpfloat_template.h
 63 | SPDXID: SPDXRef-1-4
 64 | FileType: SOURCE
 65 | FileChecksum: SHA1: 35d1080ced6bff3dc57d96e2d01af65aa4fec0ab
 66 | FileChecksum: MD5: a024bf10be172b3e9e42acae6bd84c40
 67 | LicenseConcluded: LGPL-2.1-or-later
 68 | LicenseInfoInFile: LGPL-2.1-or-later
 69 | 
 70 | FileName: ./src/cpfloat_definitions.h
 71 | SPDXID: SPDXRef-1-5
 72 | FileType: SOURCE
 73 | FileChecksum: SHA1: 44a75ba26b33d3b08771eafef03b62e15a419cea
 74 | FileChecksum: MD5: 3c7e1ee37e5f04399dabf6862cff2062
 75 | LicenseConcluded: LGPL-2.1-or-later
 76 | LicenseInfoInFile: LGPL-2.1-or-later
 77 | 
 78 | FileName: ./src/cpfloat_autotune.c
 79 | SPDXID: SPDXRef-1-6
 80 | FileType: SOURCE
 81 | FileChecksum: SHA1: 6790f95b7876857496fc2ab77650edbeefedb8e5
 82 | FileChecksum: MD5: 96bf1b5da96174fa8cbf51f33586389d
 83 | LicenseConcluded: LGPL-2.1-or-later
 84 | LicenseInfoInFile: LGPL-2.1-or-laterLGPL-2.1-or-later
 85 | 
 86 | FileName: ./src/cpfloat_docmacros.h
 87 | SPDXID: SPDXRef-1-7
 88 | FileType: DOCUMENTATION
 89 | FileChecksum: SHA1: f34e8ed8a5205320f6c1401f0ed45e5f068f59e2
 90 | FileChecksum: MD5: 41b2b3faaad6061c9f5f1aa6bed6f634
 91 | LicenseConcluded: LGPL-2.1-or-later
 92 | LicenseInfoInFile: LGPL-2.1-or-later
 93 | 
 94 | FileName: ./src/cpfloat_threshold_binary32.h
 95 | SPDXID: SPDXRef-1-8
 96 | FileType: SOURCE
 97 | FileChecksum: SHA1: abf295420daa865fc903e28d6b60e05d7e569b80
 98 | FileChecksum: MD5: bfca1b34e8098b5f0e667983e2ac5813
 99 | LicenseConcluded: LGPL-2.1-or-later
100 | LicenseInfoInFile: LGPL-2.1-or-later
101 | 
102 | FileName: ./LICENSE.md
103 | SPDXID: SPDXRef-1-9
104 | FileType: TEXT
105 | FileChecksum: SHA1: b386b371ce94933e63ced1052aa72a60da5485ff
106 | FileChecksum: MD5: 1803fa9c2c3ce8cb06b4861d75310742
107 | LicenseConcluded: LGPL-2.1-or-later
108 | LicenseInfoInFile: NONE
109 | 
110 | FileName: ./examples/example_manuscript.c
111 | SPDXID: SPDXRef-1-10
112 | FileType: SOURCE
113 | FileChecksum: SHA1: 933d2b2eeac0f9b4e41c539d358c2f747ae11f40
114 | FileChecksum: MD5: 3c68422c91c8ca3fca3e3bf165d0a094
115 | LicenseConcluded: LGPL-2.1-or-later
116 | LicenseInfoInFile: LGPL-2.1-or-later
117 | 
118 | FileName: ./Makefile
119 | SPDXID: SPDXRef-1-11
120 | FileType: OTHER
121 | FileChecksum: SHA1: 53accae589b9c06617915432474ffe91ecc72921
122 | FileChecksum: MD5: 26cebbb7f04a8cece8f6af36ad8557fb
123 | LicenseConcluded: LGPL-2.1-or-later
124 | LicenseInfoInFile: LGPL-2.1-or-laterLGPL-2.1-or-later
125 | 
126 | FileName: ./.circleci/config.yml
127 | SPDXID: SPDXRef-1-12
128 | FileType: OTHER
129 | FileChecksum: SHA1: 0a0b634f2a0eda8c23f4107e3c7066a741593b2b
130 | FileChecksum: MD5: 24d7ea179d64d1a084aab198ddfc2569
131 | LicenseConcluded: LGPL-2.1-or-later
132 | LicenseInfoInFile: NONE
133 | 
134 | FileName: ./test/cpfloat_test.m
135 | SPDXID: SPDXRef-1-13
136 | FileType: SOURCE
137 | FileChecksum: SHA1: fb9b197d5f5e6da78ae851a3e59ec70d7bccf056
138 | FileChecksum: MD5: 19db4e188c505d7c4931a57d4eb5b7f6
139 | LicenseConcluded: LGPL-2.1-or-later
140 | LicenseInfoInFile: LGPL-2.1-or-later
141 | 
142 | FileName: ./test/cpfloat_test.ts
143 | SPDXID: SPDXRef-1-14
144 | FileType: SOURCE
145 | FileChecksum: SHA1: 7fafef4423e408ff0b96569248e2fe6386c7cffe
146 | FileChecksum: MD5: 8deece3b4537745bbaa9abfbddbc1957
147 | LicenseConcluded: LGPL-2.1-or-later
148 | LicenseInfoInFile: LGPL-2.1-or-later
149 | 
150 | FileName: ./README.md
151 | SPDXID: SPDXRef-1-15
152 | FileType: TEXT
153 | FileChecksum: SHA1: 42d03ec1e4c3a135169cd9d5391b22b7ab64cd39
154 | FileChecksum: MD5: 30b0c4e7d8454b5af00c78f1cddad335
155 | LicenseConcluded: LGPL-2.1-or-later
156 | LicenseInfoInFile: NONE
157 | 
158 | FileName: ./docs/source/conf.py
159 | SPDXID: SPDXRef-1-16
160 | FileType: DOCUMENTATION
161 | FileChecksum: SHA1: 0f5066f0d287b33573181ff889ed443a081713e1
162 | FileChecksum: MD5: 6b70370507cd57c2dd530f89e7513bc2
163 | LicenseConcluded: LGPL-2.1-or-later
164 | LicenseInfoInFile: NONE
165 | 
166 | FileName: ./docs/source/readme.rst
167 | SPDXID: SPDXRef-1-17
168 | FileType: DOCUMENTATION
169 | FileChecksum: SHA1: 49d43efb093c16b067c2aee7caaaf0533e44e797
170 | FileChecksum: MD5: e251e18defff7c8f718661967ee61f9d
171 | LicenseConcluded: LGPL-2.1-or-later
172 | LicenseInfoInFile: NONE
173 | 
174 | FileName: ./docs/source/index.rst
175 | SPDXID: SPDXRef-1-18
176 | FileType: DOCUMENTATION
177 | FileChecksum: SHA1: d35eb28b1379ca0e7a2de0de003fe7a72f2eb47f
178 | FileChecksum: MD5: 2e1701ca9f6ea0d59e669d0a4fe031fd
179 | LicenseConcluded: LGPL-2.1-or-later
180 | LicenseInfoInFile: NONE
181 | 
182 | FileName: ./docs/Doxyfile-project
183 | SPDXID: SPDXRef-1-19
184 | FileType: DOCUMENTATION
185 | FileChecksum: SHA1: c86f08fbca4704f813dbe94b02699e8a74859337
186 | FileChecksum: MD5: ac6eb56dbf3895c4ea6e2ca1cf1899d3
187 | LicenseConcluded: LGPL-2.1-or-later
188 | LicenseInfoInFile: NONE
189 | 
190 | FileName: ./mex/cpfloat_compile_nomake.m
191 | SPDXID: SPDXRef-1-20
192 | FileType: SOURCE
193 | FileChecksum: SHA1: bda8544e616b10b9062187facb703db1ea507ae6
194 | FileChecksum: MD5: 222dd89700f55d7e2e472edff1ef9f51
195 | LicenseConcluded: LGPL-2.1-or-later
196 | LicenseInfoInFile: LGPL-2.1-or-later
197 | 
198 | FileName: ./mex/cpfloat_autotune.m
199 | SPDXID: SPDXRef-1-21
200 | FileType: SOURCE
201 | FileChecksum: SHA1: bb687d02ee15d94776b7d381fc906ced380df785
202 | FileChecksum: MD5: c05a7f8d328b78ac514f1098c0f62d3d
203 | LicenseConcluded: LGPL-2.1-or-later
204 | LicenseInfoInFile: LGPL-2.1-or-laterLGPL-2.1-or-later
205 | 
206 | FileName: ./mex/cpfloat_compile.m
207 | SPDXID: SPDXRef-1-22
208 | FileType: SOURCE
209 | FileChecksum: SHA1: c95b613db79fd5cbf87ea400940487bdee3ba618
210 | FileChecksum: MD5: 426237158bd953e9cb686686e3c18129
211 | LicenseConcluded: LGPL-2.1-or-later
212 | LicenseInfoInFile: LGPL-2.1-or-later
213 | 
214 | FileName: ./mex/cpfloat.m
215 | SPDXID: SPDXRef-1-23
216 | FileType: SOURCE
217 | FileChecksum: SHA1: fff373eccbb247b46122a789ce322866040471e9
218 | FileChecksum: MD5: ac5180f0c0af963a7943d852303e9d39
219 | LicenseConcluded: LGPL-2.1-or-later
220 | LicenseInfoInFile: LGPL-2.1-or-later
221 | 
222 | FileName: ./mex/cpfloat.c
223 | SPDXID: SPDXRef-1-24
224 | FileType: SOURCE
225 | FileChecksum: SHA1: dbece5f24974ceeb3bfeb81ebbd975b469665b16
226 | FileChecksum: MD5: f99f0653546b1e3d5c00ea0321a13049
227 | LicenseConcluded: LGPL-2.1-or-later
228 | LicenseInfoInFile: LGPL-2.1-or-later
229 | 
230 | FileName: ./util/generate_spdx.sh
231 | SPDXID: SPDXRef-1-25
232 | FileChecksum: SHA1: 021746898641abec17280530c3f26434038d2fd7
233 | FileChecksum: MD5: 605929ccf1b08e259e29dff8fbd1f067
234 | LicenseConcluded: LGPL-2.1-or-later
235 | LicenseInfoInFile: $file
236 | 


--------------------------------------------------------------------------------
/mex/cpfloat.c:
--------------------------------------------------------------------------------
  1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */
  2 | /* SPDX-License-Identifier: LGPL-2.1-or-later                         */
  3 | 
  4 | #include <stddef.h>
  5 | #include <stdint.h>
  6 | #include <string.h>
  7 | #include <math.h>
  8 | #include <float.h>
  9 | 
 10 | #include "mex.h"
 11 | #include "cpfloat_binary32.h"
 12 | #include "cpfloat_binary64.h"
 13 | 
 14 | static optstruct *fpopts;
 15 | void clearfpopts() {
 16 |   if (fpopts != NULL)
 17 |     mxFree(fpopts);
 18 | }
 19 | 
 20 | /********************
 21 |  * GATEWAY FUNCTION *
 22 |  ********************/
 23 | void mexFunction(int nlhs,
 24 |                  mxArray *plhs[],
 25 |                  int nrhs,
 26 |                  const mxArray *prhs[]) {
 27 | 
 28 |   /* Check for correct number of arguments. */
 29 |   if(nrhs > 3) {
 30 |     mexErrMsgIdAndTxt("cpfloat:nrhs",
 31 |                       "Chopfast requires at most three input arguments");
 32 |   }
 33 | 
 34 |   /* Allocate fpopts struct and set fields to default. */
 35 |   if (fpopts == NULL) {
 36 |     fpopts = mxCalloc(1, sizeof(optstruct));
 37 |     mexMakeMemoryPersistent(fpopts);
 38 |     mexAtExit(clearfpopts);
 39 | 
 40 |     strcpy(fpopts->format, "h");
 41 |     fpopts->precision = 11;
 42 |     fpopts->emin = -14;
 43 |     fpopts->emax = 15;
 44 |     fpopts->explim = CPFLOAT_EXPRANGE_TARG;
 45 |     fpopts->infinity = CPFLOAT_INF_USE;
 46 |     fpopts->round = CPFLOAT_RND_NE;
 47 |     fpopts->saturation = CPFLOAT_SAT_NO;
 48 |     fpopts->subnormal = CPFLOAT_SUBN_USE;
 49 | 
 50 |     fpopts->flip = CPFLOAT_SOFTERR_NO;
 51 |     fpopts->p = 0.5;
 52 | 
 53 |     fpopts->bitseed = NULL;
 54 |     fpopts->randseedf = NULL;
 55 |     fpopts->randseed = NULL;
 56 |   }
 57 | 
 58 |   /* Parse second argument and populate fpopts structure. */
 59 |   if (nrhs > 1) {
 60 |     bool is_subn_rnd_default = false;
 61 |     bool is_inf_no_default = false;
 62 |     if(!mxIsEmpty(prhs[1]) && !mxIsStruct(prhs[1])) {
 63 |       mexErrMsgIdAndTxt("cpfloat:invalidstruct",
 64 |                         "Second argument must be a struct.");
 65 |     } else if (!mxIsEmpty(prhs[1])) {
 66 |       mxArray *tmp = mxGetField(prhs[1], 0, "format");
 67 | 
 68 |       if (tmp != NULL) {
 69 |         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
 70 |           /* Set default format, for compatibility with chop. */
 71 |           strcpy(fpopts->format, "h");
 72 |         else if (mxGetClassID(tmp) == mxCHAR_CLASS)
 73 |           strcpy(fpopts->format, mxArrayToString(tmp));
 74 |       }
 75 |       tmp = mxGetField(prhs[1], 0, "params");
 76 |       if ((tmp != NULL) &&
 77 |           (strcmp(fpopts->format, "c")
 78 |            && strcmp(fpopts->format, "custom")))
 79 |         mexWarnMsgIdAndTxt("cpfloat:ignoredparams",
 80 |                            "Floating-point parameters ignored.");
 81 |       /* Populate fpopts->params according to fpopts->format. */
 82 |        if (!strcmp(fpopts->format, "q43") ||
 83 |            !strcmp(fpopts->format, "fp8-e4m3") ||
 84 |                  !strcmp(fpopts->format, "E4M3")) {
 85 |         fpopts->precision = 4;
 86 |         fpopts->emin = -6;
 87 |         fpopts->emax = 8;
 88 |         is_inf_no_default = true;
 89 |       } else if (!strcmp(fpopts->format, "q52") ||
 90 |                  !strcmp(fpopts->format, "fp8-e5m2") ||
 91 |                  !strcmp(fpopts->format, "E5M2")) {
 92 |         fpopts->precision = 3;
 93 |         fpopts->emin = -14;
 94 |         fpopts->emax = 15;
 95 |       } else if (!strcmp(fpopts->format, "b") ||
 96 |           !strcmp(fpopts->format, "bfloat16") ||
 97 |           !strcmp(fpopts->format, "bf16")) {
 98 |         fpopts->precision = 8;
 99 |         fpopts->emin = -126;
100 |         fpopts->emax = 127;
101 |         is_subn_rnd_default = true;
102 |       } else if (!strcmp(fpopts->format, "h") ||
103 |                  !strcmp(fpopts->format, "half") ||
104 |                  !strcmp(fpopts->format, "binary16") ||
105 |                  !strcmp(fpopts->format, "fp16")) {
106 |         fpopts->precision = 11;
107 |         fpopts->emin = -14;
108 |         fpopts->emax = 15;
109 |       } else if (!strcmp(fpopts->format, "t") ||
110 |                  !strcmp(fpopts->format, "TensorFloat-32") ||
111 |                  !strcmp(fpopts->format, "tf32")) {
112 |         fpopts->precision = 11;
113 |         fpopts->emin = -126;
114 |         fpopts->emax = 127;
115 |       } else if (!strcmp(fpopts->format, "s") ||
116 |                  !strcmp(fpopts->format, "single") ||
117 |                  !strcmp(fpopts->format, "binary32") ||
118 |                  !strcmp(fpopts->format, "fp32")) {
119 |         fpopts->precision =  24;
120 |         fpopts->emin = -126;
121 |         fpopts->emax = 127;
122 |       } else if (!strcmp(fpopts->format, "d") ||
123 |                  !strcmp(fpopts->format, "double") ||
124 |                  !strcmp(fpopts->format, "binary64") ||
125 |                  !strcmp(fpopts->format, "fp64")) {
126 |         fpopts->precision =   53;
127 |         fpopts->emin = -1022;
128 |         fpopts->emax = 1023;
129 |       } else if (!strcmp(fpopts->format, "c") ||
130 |                  !strcmp(fpopts->format, "custom")) {
131 |         if ((tmp != NULL) && (mxGetClassID(tmp) == mxDOUBLE_CLASS)) {
132 |           fpopts->precision = ((double *)mxGetData(tmp))[0];
133 |           fpopts->emin = ((double *)mxGetData(tmp))[1];
134 |           fpopts->emax = ((double *)mxGetData(tmp))[2];
135 |         } else {
136 |           mexErrMsgIdAndTxt("cpfloat:invalidparams",
137 |                             "Invalid floating-point parameters specified.");
138 |         }
139 |       } else {
140 |         mexErrMsgIdAndTxt("cpfloat:invalidformat",
141 |                           "Invalid floating-point format specified.");
142 |       }
143 | 
144 |       /* Set default values to be compatible with MATLAB chop. */
145 |       tmp = mxGetField(prhs[1], 0, "subnormal");
146 |       if (tmp != NULL) {
147 |         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
148 |           fpopts->subnormal = CPFLOAT_SUBN_USE;
149 |         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
150 |           fpopts->subnormal = *((double *)mxGetData(tmp));
151 |       } else {
152 |         if (is_subn_rnd_default)
153 |           fpopts->subnormal = CPFLOAT_SUBN_RND; /* Default for bfloat16. */
154 |       }
155 | 
156 |       tmp = mxGetField(prhs[1], 0, "explim");
157 |       if (tmp != NULL) {
158 |         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
159 |           fpopts->explim = 1;
160 |         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
161 |           fpopts->explim = *((double *)mxGetData(tmp));
162 |       }
163 | 
164 |       tmp = mxGetField(prhs[1], 0, "infinity");
165 |       if (tmp != NULL) {
166 |         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
167 |           fpopts->infinity = CPFLOAT_INF_USE;
168 |         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
169 |           fpopts->infinity = *((double *)mxGetData(tmp));
170 |       } else {
171 |         if (is_inf_no_default)
172 |           fpopts->infinity = CPFLOAT_INF_NO; /* Default for E4M5. */
173 |       }
174 | 
175 |       tmp = mxGetField(prhs[1], 0, "round");
176 |       if (tmp != NULL) {
177 |         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
178 |           fpopts->round = CPFLOAT_RND_NE;
179 |         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
180 |           fpopts->round = *((double *)mxGetData(tmp));
181 |       }
182 | 
183 |       tmp = mxGetField(prhs[1], 0, "saturation");
184 |       if (tmp != NULL) {
185 |         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
186 |           fpopts->saturation = CPFLOAT_SAT_NO;
187 |         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
188 |           fpopts->saturation = *((double *)mxGetData(tmp));
189 |       }
190 | 
191 |       tmp = mxGetField(prhs[1], 0, "subnormal");
192 |       if (tmp != NULL) {
193 |         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
194 |           fpopts->subnormal = CPFLOAT_SUBN_USE;
195 |         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
196 |           fpopts->subnormal = *((double *)mxGetData(tmp));
197 |       } else {
198 |         if (is_subn_rnd_default)
199 |           fpopts->subnormal = CPFLOAT_SUBN_RND; /* Default for bfloat16. */
200 |         else
201 |           fpopts->subnormal = CPFLOAT_SUBN_USE;
202 |       }
203 | 
204 |       tmp = mxGetField(prhs[1], 0, "flip");
205 |       if (tmp != NULL) {
206 |         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
207 |           fpopts->flip = CPFLOAT_SOFTERR_NO;
208 |         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
209 |           fpopts->flip = *((double *)mxGetData(tmp));
210 |       }
211 |       tmp = mxGetField(prhs[1], 0, "p");
212 |       if (tmp != NULL) {
213 |         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
214 |           fpopts->p = 0.5;
215 |         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
216 |           fpopts->p = *((double *)mxGetData(tmp));
217 |       }
218 |     }
219 |   }
220 | 
221 |   /* UNDOCUMENTED FEATURE: force number of OpenMP threads.
222 |    * If algorithm = 0, do not specify how many threads to use.
223 |    * If algorithm > 0, use cpfloat() with specified number of threads.
224 |    * If algorithm < 0, use cpfloat_parallel() with specified number of threads.
225 |    */
226 |   int algorithm;
227 |   if (nrhs > 2) {
228 |     double *tmp = (double *)mxGetData(prhs[2]);
229 |     if (!mxIsDouble(prhs[2]) || mxIsComplex(prhs[2])
230 |         || *tmp != round(*tmp)
231 |         || mxGetM(prhs[2]) != 1 || mxGetN(prhs[2]) != 1)
232 |       mexErrMsgIdAndTxt("cpfloat:invalidalgorithm",
233 |                         "Third parameters must be an integer.");
234 |     algorithm = (int)(*tmp);
235 |   } else
236 |     algorithm = 0;
237 | 
238 |   /* Parse first argument. */
239 |   if (nrhs > 0) {
240 |     if (!mxIsNumeric(prhs[0])
241 |         || (!mxIsDouble(prhs[0]) && !mxIsSingle(prhs[0]))
242 |         || mxIsComplex(prhs[0])
243 |         || (mxGetNumberOfDimensions(prhs[0]) != 2)) {
244 |       mexErrMsgIdAndTxt("cpfloat:invalidmatrix",
245 |                         "First argument must be a 2D real numeric array.");
246 |     }
247 | 
248 |     mwSize maxfbits, minexp, maxexp;
249 |     if (mxIsSingle(prhs[0])) {
250 |       if (!strcmp(fpopts->format, "d") ||
251 |           !strcmp(fpopts->format, "double") ||
252 |           !strcmp(fpopts->format, "binary64") ||
253 |           !strcmp(fpopts->format, "fp64")) {
254 |         mexErrMsgIdAndTxt("cpfloat:invalidformat",
255 |                           "Target format is too large.");
256 |       } else {
257 |         maxfbits = fpopts->round<=1 ? 11 : 23;
258 |         minexp = -126;
259 |         maxexp = 127;
260 |       }
261 |     } else if(mxIsDouble(prhs[0])) {
262 |       maxfbits = fpopts->round<=1 ? 25 : 52;
263 |       minexp = -1022;
264 |       maxexp = 1023;
265 |     }
266 |     if (fpopts->precision > maxfbits || fpopts->emin < minexp
267 |         ||fpopts->emax > maxexp)
268 |       if (!strcmp(fpopts->format, "c") || !strcmp(fpopts->format, "custom"))
269 |         mexErrMsgIdAndTxt("cpfloat:invalidparams",
270 |                           "Invalid floating-point parameters selected.");
271 | 
272 |     /* Allocate and compute first output. */
273 |     mwSize m, n;
274 |     m = mxGetM(prhs[0]);
275 |     n = mxGetN(prhs[0]);
276 |     mwSize dims[2];
277 |     dims[0] = m;
278 |     dims[1] = n;
279 | 
280 |     if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
281 |       double *A = (double *)mxGetData(prhs[0]);
282 |       plhs[0] = mxCreateNumericArray(2, dims,mxDOUBLE_CLASS, mxREAL);
283 |       double *X = (double *)mxGetData(plhs[0]);
284 |       #ifdef _OPENMP
285 |       if (algorithm == 0) {
286 |         cpfloat(X, A, m*n, fpopts);
287 |       } else if (algorithm == 1){
288 |         cpfloat_sequential(X, A, m*n, fpopts);
289 |       } else if (algorithm > 0) {
290 |         omp_set_num_threads(algorithm);
291 |         cpfloat(X, A, m*n, fpopts);
292 |       } else {
293 |         omp_set_num_threads(-algorithm);
294 |         cpfloat_parallel(X, A, m*n, fpopts);
295 |       }
296 |       #else
297 |       cpfloat(X, A, m*n, fpopts);
298 |       #endif
299 |     } else if (mxGetClassID(prhs[0]) == mxSINGLE_CLASS) {
300 |       float *A = (float *)mxGetData(prhs[0]);
301 |       plhs[0] = mxCreateNumericArray(2, dims, mxSINGLE_CLASS,mxREAL);
302 |       float *X = (float *)mxGetData(plhs[0]);
303 |       #ifdef _OPENMP
304 |       if (algorithm == 0) {
305 |         cpfloatf(X, A, m*n, fpopts);
306 |       } else if (algorithm == 1){
307 |         cpfloatf_sequential(X, A, m*n, fpopts);
308 |       } else if (algorithm > 0) {
309 |         omp_set_num_threads(algorithm);
310 |         cpfloatf(X, A, m*n, fpopts);
311 |       } else {
312 |         omp_set_num_threads(-algorithm);
313 |         cpfloatf_parallel(X, A, m*n, fpopts);
314 |       }
315 |       #else
316 |       cpfloatf(X, A, m*n, fpopts);
317 |       #endif
318 |     } else {
319 |       mexErrMsgIdAndTxt("cpfloat:invalidmatrix",
320 |                         "First argument must be a numeric array.");
321 |     }
322 |   } else {
323 |     mwSize dims[2];
324 |     dims[0] = 0;
325 |     dims[1] = 0;
326 |     plhs[0] = mxCreateNumericArray(2, dims,mxDOUBLE_CLASS, mxREAL);
327 |   }
328 | 
329 |   /* Allocate and return second output. */
330 |   if (nlhs > 1) {
331 |     const char* field_names[] = {"format", "params", "explim", "infinity",
332 |                                  "round", "saturation", "subnormal",
333 |                                  "flip", "p"};
334 |     mwSize dims[2] = {1, 1};
335 |     plhs[1] = mxCreateStructArray(2, dims, 9, field_names);
336 |     mxSetFieldByNumber(plhs[1], 0, 0, mxCreateString(fpopts->format));
337 | 
338 |     mxArray *outparams = mxCreateDoubleMatrix(1,3,mxREAL);
339 |     double *outparamsptr = mxGetData(outparams);
340 |     outparamsptr[0] = fpopts->precision;
341 |     outparamsptr[1] = fpopts->emin;
342 |     outparamsptr[2] = fpopts->emax;
343 |     mxSetFieldByNumber(plhs[1], 0, 1, outparams);
344 | 
345 |     mxArray *outexplim = mxCreateDoubleMatrix(1, 1, mxREAL);
346 |     double *outexplimptr = mxGetData(outexplim);
347 |     outexplimptr[0] = fpopts->explim;
348 |     mxSetFieldByNumber(plhs[1], 0, 2, outexplim);
349 | 
350 |     mxArray *outinfinity = mxCreateDoubleMatrix(1, 1, mxREAL);
351 |     double *outinfinityptr = mxGetData(outinfinity);
352 |     outinfinityptr[0] = fpopts->infinity;
353 |     mxSetFieldByNumber(plhs[1], 0, 3, outinfinity);
354 | 
355 |     mxArray *outround = mxCreateDoubleMatrix(1,1,mxREAL);
356 |     double *outroundptr = mxGetData(outround);
357 |     outroundptr[0] = fpopts->round;
358 |     mxSetFieldByNumber(plhs[1], 0, 4, outround);
359 | 
360 |     mxArray *outsaturation = mxCreateDoubleMatrix(1,1,mxREAL);
361 |     double *outsaturationptr = mxGetData(outsaturation);
362 |     outsaturationptr[0] = fpopts->saturation;
363 |     mxSetFieldByNumber(plhs[1], 0, 5, outsaturation);
364 | 
365 |     mxArray *outsubnormal = mxCreateDoubleMatrix(1,1,mxREAL);
366 |     double *outsubnormalptr = mxGetData(outsubnormal);
367 |     outsubnormalptr[0] = fpopts->subnormal;
368 |     mxSetFieldByNumber(plhs[1], 0, 6, outsubnormal);
369 | 
370 |     mxArray *outflip = mxCreateDoubleMatrix(1,1,mxREAL);
371 |     double *outflipptr = mxGetData(outflip);
372 |     outflipptr[0] = fpopts->flip;
373 |     mxSetFieldByNumber(plhs[1], 0, 7, outflip);
374 | 
375 |     mxArray *outp = mxCreateDoubleMatrix(1,1,mxREAL);
376 |     double *outpptr = mxGetData(outp);
377 |     outpptr[0] = fpopts->p;
378 |     mxSetFieldByNumber(plhs[1], 0, 8, outp);
379 | 
380 |   }
381 |   if (nlhs > 2)
382 |     mexErrMsgIdAndTxt("cpfloat:invalidnargout",
383 |                       "This function returns at most two valaues.");
384 | 
385 | }
386 | 
387 | /*
388 |  * CPFloat - Custom Precision Floating-point numbers.
389 |  *
390 |  * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
391 |  *
392 |  * This library is free software; you can redistribute it and/or modify it under
393 |  * the terms of the GNU Lesser General Public License as published by the Free
394 |  * Software Foundation; either version 2.1 of the License, or (at your option)
395 |  * any later version.
396 |  *
397 |  * This library is distributed in the hope that it will be useful, but WITHOUT
398 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
399 |  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
400 |  * details.
401 |  *
402 |  * You should have received a copy of the GNU Lesser General Public License along
403 |  * with this library; if not, write to the Free Software Foundation, Inc., 51
404 |  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
405 |  */
406 | 


--------------------------------------------------------------------------------
/mex/cpfloat.m:
--------------------------------------------------------------------------------
  1 | %CPFLOAT    Round floating point numbers to lower precision.
  2 | %   [Y,OPTIONS] = CPFLOAT(X,FPOPTS) returns a matrix Y containing the elements
  3 | %   of X rounded to a lower-precision floating-point format (the target format).
  4 | %   The function can be used to simulate the occurrence of soft errors in the
  5 | %   rounded values. X must be a real matrix with entries of class 'single' or
  6 | %   'double' (the storage format), and the output matrix Y will be a real matrix
  7 | %   of the same size with entries of the same class. The parameters that
  8 | %   describe the target format, the rounding mode, and the likelihood of soft
  9 | %   errors are stored by the function in persistent memory, and are preserved
 10 | %   across multiple calls to CPFLOAT. The internal configuration can be modified
 11 | %   by means of the structure FPOPTS, whose fields are discussed in detail
 12 | %   below. The parameters for which a new configuration value is not specified
 13 | %   take the default value on the first invocation of CPFLOAT, and keep their
 14 | %   previous values on subsequent calls. The parameters of the current
 15 | %   configuration are returned in the second output argument OPTIONS, a
 16 | %   structure with the same fields as FPOPTS.
 17 | %
 18 | %   The fields of FPOPTS are interpreted as follows.
 19 | %
 20 | %   * The string FPOPTS.format specifies the target floating-point format.
 21 | %     Possible values are:
 22 | %       'q43', 'fp8-e4m3', 'E4M3'          for OCP specification E4M3;
 23 | %       'q52', 'fp8-e5m2', 'E5M2'          for OCP specification E5M2;
 24 | %       'b', 'bf16', 'bfloat16'            for Intel bfloat16;
 25 | %       'h', 'fp16', 'binary16', 'half'    for IEEE binary16 (half precision);
 26 | %       't', 'tf32', 'TeensorFloat-32'     for NVIDIA TensorFloat-32;
 27 | %       's', 'fp32', 'binary32', 'single'  for IEEE binary32 (single precision);
 28 | %       'd', 'fp64', 'binary64', 'double'  for IEEE binary64 (double precision);
 29 | %       'c', 'custom'                      for a custom-precision format.
 30 | %     In order to use a custom format, the parameters of the floating-point
 31 | %     format must be supplied using the FPOPTS.params field. The default value
 32 | %     for this field is 'h'.
 33 | %
 34 | %   * The three-element vector FPOPTS.params specifies the parameters of the
 35 | %     target floating-point format, and is ignored unless FPOPTS.format is set
 36 | %     to either 'c' or 'custom'. The vector has the form [PRECISION,EMIN,EMAX],
 37 | %     where PRECISION, EMIN and EMAX are positive integers representing
 38 | %     the number of binary digits in the fraction and the maximum exponent of
 39 | %     the target format, respectively. The default value of this field is
 40 | %     the vector [11,-14,15].
 41 | %
 42 | %   * The scalar FPOPTS.explim specifies the support for an extended exponent
 43 | %     range. The target floating-point format will have the exponent range of
 44 | %     the storage format ('single' or 'double', depending on the class of X) if
 45 | %     this field is set to 0, and the exponent range of the format specified in
 46 | %     FPOPTS.format otherwise. The default value for this field is 1.
 47 | %
 48 | %   * The scalar FPOPTS.infinity specifies whether infinities are supported. The
 49 | %     target floating-point format will support infinities if this field is set
 50 | %     to 1, and they will be replaced by NaNs otherwise. The default value for
 51 | %     this field is 0 if the target format is 'E4M3' and 1 otherwise.
 52 | %
 53 | %   * The scalar FPOPTS.round specifies the rounding mode. Possible values are:
 54 | %       -1 for round-to-nearest with ties-to-away;
 55 | %        0 for round-to-nearest with ties-to-zero;
 56 | %        1 for round-to-nearest with ties-to-even;
 57 | %        2 for round-toward-plus-infinity;
 58 | %        3 for round-toward-minus-infinity;
 59 | %        4 for round-toward-zero;
 60 | %        5 for round-stochastic with proportional probabilities;
 61 | %        6 for round-stochastic with equal probabilities; and
 62 | %        7 for round-to-odd.
 63 | %      Any other value results in no rounding. The default value for this field
 64 | %      is 1.
 65 | %
 66 | %   * The scalar FPOPTS.saturation specifies whether saturation arithmetic is in
 67 | %     use. On overflow, the target floating-point format will use the largest
 68 | %     representable floating-point if this field is set to 0, and infinity
 69 | %     otherwise. The default value for this field is 0.
 70 | %
 71 | %   * The scalar FPOPTS.subnormal specifies the support for subnormal numbers.
 72 | %     The target floating-point format will not support subnormal numbers if
 73 | %     this field is set to 0, and will support them otherwise. The default value
 74 | %     for this field is 0 if the target format is 'bfloat16' and 1 otherwise.
 75 | %
 76 | %   * The scalar FPOPTS.flip specifies whether the function should simulate the
 77 | %     occurrence of a single bit flip striking the floating-point representation
 78 | %     of elements of Y. Possible values are:
 79 | %        0    no bit flips
 80 | %        1    bit flips can occur in fraction of target-format representation
 81 | %        2    bit flips can occur in any bit of target-format representation
 82 | %     The probability of a bit flip occurring in any element of Y is FPOPTS.p.
 83 | %     If the exponent range of the storage format is larger than that of the
 84 | %     target format, then subnormal numbers might be stored as normal numbers,
 85 | %     in which case the bit flip cannot strike the leading bit of the
 86 | %     representation. The default value for this field is 0.
 87 | %
 88 | %   * The scalar FPOPTS.p specifies the probability of bit flips. If FPOPTS.flip
 89 | %     is not set to zero, then the value of this field must be a valid
 90 | %     probability, that is, a real number in the interval [0,1]. The default
 91 | %     value for this field is 0.5.
 92 | %
 93 | %   The interface of CPFLOAT is mostly compatible with that of the MATLAB
 94 | %   function CHOP available at https://github.com/higham/chop. See
 95 | %   https://github.com/north-numerical-computing/cpfloat/blob/main/README.md
 96 | %   for an up-to-date list of differences.
 97 | 
 98 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis
 99 | % SPDX-License-Identifier: LGPL-2.1-or-later
100 | 
101 | % CPFloat - Custom Precision Floating-point numbers.
102 | %
103 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
104 | %
105 | % This library is free software; you can redistribute it and/or modify it under
106 | % the terms of the GNU Lesser General Public License as published by the Free
107 | % Software Foundation; either version 2.1 of the License, or (at your option)
108 | % any later version.
109 | %
110 | % This library is distributed in the hope that it will be useful, but WITHOUT
111 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
112 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
113 | % details.
114 | %
115 | % You should have received a copy of the GNU Lesser General Public License along
116 | % with this library; if not, write to the Free Software Foundation, Inc., 51
117 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
118 | 


--------------------------------------------------------------------------------
/mex/cpfloat_autotune.m:
--------------------------------------------------------------------------------
  1 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis
  2 | % SPDX-License-Identifier: LGPL-2.1-or-later
  3 | 
  4 | function cpfloat_autotune(varargin)
  5 | %CPFLOAT_AUTOTUNE    Autotune MEX interface to the CPFloat Library.
  6 | %   CPFLOAT_AUTOTUNE() runs the function CPFLOAT with inputs of class 'single'
  7 | %   and 'double' and computes the size at which switching from the sequential to
  8 | %   the parallel implementation becomes beneficial. The functions generates the
  9 | %   two files cpfloat_threshold_binary32.h and cpfloat_threshold_binary64.h in
 10 | %   the current working directory.
 11 | %
 12 | %   CPFLOAT_AUTOTUNE('cpfloatdir',CPFLOATDIR) places the output files in the
 13 | %   folder CPFLOATDIR instead of the current woking directory. CPFLOATDIR must
 14 | %   be an existing folder.
 15 | 
 16 |   fpopts.format = 'h';
 17 |   fpopts.subnormal = 1;
 18 |   fpopts.round = 1;
 19 |   fpopts.flip = 0;
 20 |   fpopts.p = 0.5;
 21 |   fpopts.explim = 1;
 22 | 
 23 |   p = inputParser;
 24 |   addParameter(p, 'cpfloatdir', './', @ischar);
 25 |   if exist('maxNumCompThreads', 'builtin')
 26 |     addParameter(p, 'nthreads', maxNumCompThreads(), ...
 27 |                  @(x)(isscalar(x) && round(x) == x));
 28 |   else
 29 |     pkg load parallel
 30 |     addParameter(p, 'nthreads', parcellfun_set_nproc(Inf), ...
 31 |                  @(x)(isscalar(x) && round(x) == x));
 32 |   end
 33 |   parse(p,varargin{:});
 34 |   cpfloatdir = p.Results.cpfloatdir;
 35 |   nthreads = p.Results.nthreads;
 36 | 
 37 |   ntests = 100;
 38 | 
 39 |   fprintf('Test using %d OpenMP threads.\n', nthreads);
 40 |   if exist('timeit', 'builtin')
 41 |     parfaster = @(n, fpopts, ntests, fpclass)...
 42 |         parfaster_timeit(n, fpopts, ntests, nthreads, fpclass);
 43 |   else
 44 |     parfaster = @(n, fpopts, ntests, fpclass)...
 45 |         parfaster_tictoc(n, fpopts, ntests, nthreads, fpclass);
 46 |   end
 47 | 
 48 |   docstring =[
 49 |       '/* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */\n',...
 50 |       '/* SPDX-License-Identifier: LGPL-2.1-or-later                         */\n',...
 51 |       '\n',...
 52 |       '/**\n',...
 53 |       ' * @file %s_threshold_%s.h\n',...
 54 |       ' * @brief Size of smallest `%s` array on which to use',...
 55 |       ' multiple OpenMP threads.\n',...
 56 |       ' */\n',...
 57 |       '\n',...
 58 |       '/**\n',...
 59 |       ' * @brief Size of smallest array on which %s() uses multiple threads.\n',...
 60 |       ' *\n',...
 61 |       ' * @details Threshold for switching between %s_sequential() and\n',...
 62 |       ' * %s_parallel() in %s(). The value of this constant is ignored\n',...
 63 |       ' * if the file that includes cpfloat_%s.h is compiled without OpenMP\n',...
 64 |       ' * support.\n',...
 65 |       ' */\n'];
 66 | 
 67 |   % Binary32
 68 |   fpclass = 'single';
 69 |   nmin = 1;
 70 |   nmax = 1;
 71 |   while(~parfaster(nmax, fpopts, ntests, fpclass))
 72 |     nmax = nmax * 2;
 73 |   end
 74 |   nmid = round((nmax + nmin) / 2);
 75 |   while(nmid ~= nmin && nmid ~= nmax)
 76 |     if(parfaster(nmid, fpopts, ntests, fpclass))
 77 |       nmax = nmid;
 78 |     else
 79 |       nmin = nmid;
 80 |     end
 81 |     nmid = round((nmax + nmin) / 2);
 82 |   end
 83 |   filename = sprintf('%s/cpfloat_threshold_binary32.h', cpfloatdir);
 84 |   fid = fopen(filename, 'w');
 85 |   fprintf(fid, docstring, 'cpfloat', 'binary32', 'float',...
 86 |           'cpfloatf', 'cpfloatf', 'cpfloatf', 'cpfloatf', 'binary32');
 87 |   fprintf(fid, "#define OPENMP_THRESHOLD_float %d", nmax);
 88 |   fclose(fid);
 89 | 
 90 |   % Binary64
 91 |   nmin = 1;
 92 |   nmax = 1;
 93 |   while(~parfaster(nmax, fpopts, ntests, 'double'))
 94 |     nmax = nmax * 2;
 95 |   end
 96 |   nmid = round((nmax + nmin) / 2);
 97 |   while(nmid ~= nmin && nmid ~= nmax)
 98 |     if(parfaster(nmid, fpopts, ntests, fpclass))
 99 |       nmax = nmid;
100 |     else
101 |       nmin = nmid;
102 |     end
103 |     nmid = round((nmax + nmin) / 2);
104 |   end
105 |   filename = sprintf('%s/cpfloat_threshold_binary64.h', cpfloatdir);
106 |   fid = fopen(filename, 'w');
107 |   fprintf(fid, docstring, 'cpfloat', 'binary64', 'double',...
108 |           'cpfloat', 'cpfloat', 'cpfloat', 'cpfloat', 'binary64');
109 |   fprintf(fid, "#define OPENMP_THRESHOLD_double %d", nmax);
110 |   fclose(fid);
111 | 
112 |   function res = parfaster_timeit(n, fpopts, ~, nthreads, fpclass)
113 |     X = rand(n, 1, fpclass);
114 |     funseq = @()(cpfloat(X, fpopts, 1));
115 |     seqtime = timeit(funseq);
116 |     funseq = @()(cpfloat(X, fpopts, -nthreads));
117 |     partime = timeit(funseq);
118 |     res = partime < seqtime;
119 |     fprintf('[%7d]   %.5e   %.5e\n', n, seqtime, partime);
120 |   end
121 | 
122 |   function res = parfaster_tictoc(n, fpopts, ntests, nthreads, fpclass)
123 |     X = rand(n, 1, fpclass);
124 |     seqtimings = zeros(1, ntests);
125 |     partimings = zeros(1, ntests);
126 |     for i = 1:ntests
127 |       tic;
128 |       Y = cpfloat(X, fpopts, 1);
129 |       seqtimings(i) = toc();
130 |       tic;
131 |       Y = cpfloat(X, fpopts, -nthreads);
132 |       partimings(i) = toc();
133 |     end
134 |     seqtime = median(seqtimings);
135 |     partime = median(partimings);
136 |     res = partime < seqtime;
137 |     fprintf('[%7d]   %.5e   %.5e\n', n, seqtime, partime);
138 |   end
139 | end
140 | 
141 | % CPFloat - Custom Precision Floating-point numbers.
142 | %
143 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
144 | %
145 | % This library is free software; you can redistribute it and/or modify it under
146 | % the terms of the GNU Lesser General Public License as published by the Free
147 | % Software Foundation; either version 2.1 of the License, or (at your option)
148 | % any later version.
149 | %
150 | % This library is distributed in the hope that it will be useful, but WITHOUT
151 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
152 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
153 | % details.
154 | %
155 | % You should have received a copy of the GNU Lesser General Public License along
156 | % with this library; if not, write to the Free Software Foundation, Inc., 51
157 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
158 | 


--------------------------------------------------------------------------------
/mex/cpfloat_compile.m:
--------------------------------------------------------------------------------
  1 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis
  2 | % SPDX-License-Identifier: LGPL-2.1-or-later
  3 | 
  4 | function retval = cpfloat_compile(varargin)
  5 | %CPFLOAT_COMPILE    Compile MEX interface to the CPFloat Library.
  6 | %   CPFLOAT_COMPILE() compiles the MEX function chopfast using the default C
  7 | %   compiler. The function expects all the header files of the CPFloat Library
  8 | %   as well as the file pcg_variants.h from the PCG Library to be in the current
  9 | %   working directory. The function attempts to use the OpenMP library, if
 10 | %   available.
 11 | %
 12 | %   CPFLOAT_COMPILE('cpfloatdir',CPFLOATDIR) looks for the header files of the
 13 | %   CPFloat Library in CPFLOATDIR rather than in the current working directory.
 14 | %
 15 | %   CPFLOAT_COMPILE('pcgpath',PCGPATH) sets the root directory of the PCG
 16 | %   random number generator to PCGPATH instead of ./pcg-c/.
 17 | %
 18 | %   CPFLOAT_COMPILE('pcgvariants',PCGVARIANTS) specifies that the path of the
 19 | %   header file pcg_variants.h is PCGVARIANTS. The default value is
 20 | %   PCGPATH/include/pcg_variants.h.
 21 | %
 22 | %   CPFLOAT_COMPILE('pcglib',PCGLIB) specifies that the path of the library
 23 | %   libpcg_random.a is PCGLIB. The default value is PCGPATH/src/libpcg_random.a.
 24 | %
 25 | %   CPFLOAT_COMPILE('compilerpath',COMPILERPATH) uses the compiler COMPILERPATH
 26 | %   instead of the default C compiler.
 27 | 
 28 |   retval = true;
 29 | 
 30 |   p = inputParser;
 31 |   addParameter(p, 'cpfloatdir', '', @ischar);
 32 |   addParameter(p, 'pcgpath', './pcg-c/', @ischar);
 33 |   addParameter(p, 'pcgvariants', '', @ischar);
 34 |   addParameter(p, 'pcglib', '', @ischar);
 35 |   addParameter(p, 'compilerpath', '', @ischar);
 36 |   parse(p,varargin{:});
 37 |   cpfloatdir = p.Results.cpfloatdir;
 38 |   pcgpath = p.Results.pcgpath;
 39 |   pcgvariants = p.Results.pcgvariants;
 40 |   pcglib = p.Results.pcglib;
 41 |   compilerpath = p.Results.compilerpath;
 42 | 
 43 |   coptions = '-std=gnu99 -O3 -march=native';
 44 | 
 45 |   % Try to find the PCG library.
 46 |   if (isempty(pcgvariants))
 47 |     pcgvariants = sprintf('%s/include/pcg_variants.h', pcgpath);
 48 |   end
 49 |   if (isempty(pcglib))
 50 |     pcglib = sprintf('%s/src/libpcg_random.a', pcgpath);
 51 |   end
 52 |   if exist(pcgvariants, 'file') && exist(pcglib, 'file')
 53 |     coptions = sprintf('%s -include%s', coptions, pcgvariants);
 54 |     clibs = sprintf('-L%s/', fileparts(pcglib));
 55 |   else
 56 |     pcglib = '';
 57 |     clibs = '';
 58 |   end
 59 | 
 60 |   usingoctave = exist('OCTAVE_VERSION', 'builtin');
 61 |   if usingoctave
 62 |     if ~isempty(compilerpath)
 63 |       setenv("CC", compilerpath);
 64 |       setenv("CXX", compilerpath);
 65 |       setenv("DL_LD", compilerpath);
 66 |     end
 67 |     if ~isempty(compilerpath)
 68 |       coptions = sprintf('%s -I%s', coptions, cpfloatdir)
 69 |     end
 70 |     setenv("CFLAGS", sprintf("-fopenmp %s", coptions));
 71 |     libpath = deblank(evalc('mkoctfile --print OCTLIBDIR'));
 72 |     setenv("LDFLAGS", sprintf("-fopenmp %s -L%s", clibs, libpath));
 73 |     if isempty(pcglib)
 74 |       [output, status] = mkoctfile('cpfloat.c', '--mex', '--verbose');
 75 |     else
 76 |       [output, status] = mkoctfile('cpfloat.c', pcglib, '--mex', '--verbose');
 77 |     end
 78 |     if status ~= 0
 79 |       warning('Compilation error, trying to compile without OpenMP.');
 80 |       retval = false;
 81 |       setenv("CFLAGS", coptions);
 82 |       setenv("LDFLAGS", sprintf("%s -L%s", clibs, libpath));
 83 |       if isempty(pcglib)
 84 |         [output, status] = mkoctfile('cpfloat.c', '--mex', '--verbose');
 85 |       else
 86 |         [output, status] = mkoctfile('cpfloat.c', pcglib, '--mex', '--verbose');
 87 |       end
 88 |     end
 89 |   else
 90 |     if ~isempty(cpfloatdir)
 91 |       include_dir = sprintf('-I%s', cpfloatdir);
 92 |     else
 93 |       include_dir = '';
 94 |     end
 95 |     if isempty(compilerpath)
 96 |       compiler_string = '';
 97 |     else
 98 |       compiler_string = ['CC="' compilerpath '"'];
 99 |     end
100 |     try
101 |       mex('cpfloat.c', pcglib, '-silent',...
102 |           compiler_string, include_dir,...
103 |           [sprintf('CFLAGS=$CFLAGS %s -fopenmp ', coptions)],...
104 |           [sprintf('LDFLAGS=$LDFLAGS %s -fopenmp ', clibs)]);
105 |     catch
106 |       warning('Compilation error, trying to compile without OpenMP.');
107 |       retval = false;
108 |       mex('cpfloat.c', pcglib, '-silent',...
109 |           compiler_string, include_dir,...
110 |           [sprintf('CFLAGS=$CFLAGS %s ', coptions)],...
111 |           [sprintf('LDFLAGS=$LDFLAGS %s ', clibs)]);
112 |     end
113 |   end
114 | end
115 | 
116 | % CPFloat - Custom Precision Floating-point numbers.
117 | %
118 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
119 | %
120 | % This library is free software; you can redistribute it and/or modify it under
121 | % the terms of the GNU Lesser General Public License as published by the Free
122 | % Software Foundation; either version 2.1 of the License, or (at your option)
123 | % any later version.
124 | %
125 | % This library is distributed in the hope that it will be useful, but WITHOUT
126 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
127 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
128 | % details.
129 | %
130 | % You should have received a copy of the GNU Lesser General Public License along
131 | % with this library; if not, write to the Free Software Foundation, Inc., 51
132 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
133 | 


--------------------------------------------------------------------------------
/mex/cpfloat_compile_nomake.m:
--------------------------------------------------------------------------------
 1 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis
 2 | % SPDX-License-Identifier: LGPL-2.1-or-later
 3 | 
 4 | % This MATLAB/Octave script attempts to build the MEX interface to CPFloat on
 5 | % systems where the make tool is not available. The code provides only minimal
 6 | % functionalities, but should produce a MEX file on a machines where the C
 7 | % building environment is configured correctly.
 8 | 
 9 | % Absolute path of the C compiler to be used to build the MEX interface.
10 | % If the string is left empty, the default C compiler will be used.
11 | compilerpath = '';
12 | 
13 | % Absolute path of the source code of cpfloat. By default, the script
14 | % assumes that it is being run from the cpfloat/mex/ folder.
15 | cpfloat_dir = fileparts(pwd);
16 | 
17 | % Compile MEX interface.
18 | cpfloat_srcdir = fullfile(cpfloat_dir, 'src');
19 | retval = cpfloat_compile('cpfloatdir', cpfloat_srcdir,...
20 |                          'compilerpath', compilerpath);
21 | 
22 | % If parallel compilation was successful, auto-tune the threshold.
23 | if retval
24 |   cpfloat_autotune('cpfloatdir', cpfloat_srcdir);
25 |   cpfloat_compile('cpfloatdir', cpfloat_srcdir,...
26 |                   'compilerpath', compilerpath);
27 | end
28 | 
29 | % CPFloat - Custom Precision Floating-point numbers.
30 | %
31 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
32 | %
33 | % This library is free software; you can redistribute it and/or modify it under
34 | % the terms of the GNU Lesser General Public License as published by the Free
35 | % Software Foundation; either version 2.1 of the License, or (at your option)
36 | % any later version.
37 | %
38 | % This library is distributed in the hope that it will be useful, but WITHOUT
39 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
40 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
41 | % details.
42 | %
43 | % You should have received a copy of the GNU Lesser General Public License along
44 | % with this library; if not, write to the Free Software Foundation, Inc., 51
45 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
46 | 


--------------------------------------------------------------------------------
/src/cpfloat_autotune.c:
--------------------------------------------------------------------------------
  1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */
  2 | /* SPDX-License-Identifier: LGPL-2.1-or-later                         */
  3 | 
  4 | /*
  5 |  * This file is part of CPFloat.
  6 |  *
  7 |  * Running this program will update the threshold values in
  8 |  * cpfloat_threshold_binary32.h and cpfloat_threshold_binary64.h.
  9 |  */
 10 | #include <time.h>
 11 | #include <stdio.h>
 12 | #include <string.h>
 13 | #include <stdbool.h>
 14 | 
 15 | #include "cpfloat_binary32.h"
 16 | #include "cpfloat_binary64.h"
 17 | 
 18 | int cmpfun(const void *x, const void *y) {
 19 |   if (*(double *)x < *(double *)y)
 20 |     return -1;
 21 |   else if (*(double *)x > *(double *)y)
 22 |     return 1;
 23 |   else
 24 |     return 0;
 25 | }
 26 | 
 27 | double timedifference(struct timespec *start, struct timespec *end) {
 28 |   return
 29 |     (end->tv_sec - start->tv_sec) +
 30 |     (double)(end->tv_nsec - start->tv_nsec) * 1e-9;
 31 | }
 32 | 
 33 | 
 34 | bool parfaster_double(size_t n, optstruct *fpopts,
 35 |                       struct timespec *start, struct timespec *end,
 36 |                       double *seqtimings, double *partimings, size_t ntests) {
 37 |   size_t i;
 38 |   double *Xd = malloc(n * sizeof(*Xd));
 39 |   double *Yd = malloc(n * sizeof(*Yd));
 40 |   for (i = 0; i < n; i++)
 41 |     Xd[i] = rand() / (double)RAND_MAX;
 42 | 
 43 |   for (i = 0; i < ntests; i++) {
 44 |     clock_gettime(CLOCK_MONOTONIC, start);
 45 |     cpfloat_sequential(Yd, Xd, n, fpopts);
 46 |     clock_gettime(CLOCK_MONOTONIC, end);
 47 |     seqtimings[i] = timedifference(start, end);
 48 |     clock_gettime(CLOCK_MONOTONIC, start);
 49 |     cpfloat_parallel(Yd, Xd, n, fpopts);
 50 |     clock_gettime(CLOCK_MONOTONIC, end);
 51 |     partimings[i] = timedifference(start, end);
 52 |   }
 53 |   free(Xd);
 54 |   free(Yd);
 55 |   qsort(seqtimings, ntests, sizeof(*seqtimings), cmpfun);
 56 |   double seqtime = seqtimings[ntests/2];
 57 |   qsort(partimings, ntests, sizeof(*partimings), cmpfun);
 58 |   double partime = partimings[ntests/2];
 59 |   printf("[%7lu]   [%.2e, %.2e, %.2e]   [%.2e, %.2e, %.2e]\n",
 60 |          n, seqtimings[0], seqtime, seqtimings[ntests-1],
 61 |          partimings[0], partime, partimings[ntests-1]);
 62 |   return partime < seqtime ? true : false;
 63 | }
 64 | 
 65 | bool parfaster_float(size_t n, optstruct *fpopts,
 66 |                      struct timespec *start, struct timespec *end,
 67 |                      double *seqtimings, double *partimings, size_t ntests) {
 68 |   size_t i;
 69 |   float *Xd = malloc(n * sizeof(*Xd));
 70 |   float *Yd = malloc(n * sizeof(*Yd));
 71 |   for (i = 0; i < n; i++)
 72 |     Xd[i] = rand() / (float)RAND_MAX;
 73 |   for (i = 0; i < ntests; i++) {
 74 |     clock_gettime(CLOCK_MONOTONIC, start);
 75 |     cpfloatf_sequential(Yd, Xd, n, fpopts);
 76 |     clock_gettime(CLOCK_MONOTONIC, end);
 77 |     seqtimings[i] = timedifference(start, end);
 78 |     clock_gettime(CLOCK_MONOTONIC, start);
 79 |     cpfloatf_parallel(Yd, Xd, n, fpopts);
 80 |     clock_gettime(CLOCK_MONOTONIC, end);
 81 |     partimings[i] = timedifference(start, end);
 82 |   }
 83 |   free(Xd);
 84 |   free(Yd);
 85 |   qsort(seqtimings, ntests, sizeof(*seqtimings), cmpfun);
 86 |   float seqtime = seqtimings[ntests/2];
 87 |   qsort(partimings, ntests, sizeof(*partimings), cmpfun);
 88 |   float partime = partimings[ntests/2];
 89 |   printf("[%7lu]   [%.2e, %.2e, %.2e]   [%.2e, %.2e, %.2e]\n",
 90 |          n, seqtimings[0], seqtime, seqtimings[ntests-1],
 91 |          partimings[0], partime, partimings[ntests-1]);
 92 |   return partime < seqtime ? true : false;
 93 | }
 94 | 
 95 | int main() {
 96 | 
 97 |   /* Allocate fpopts struct and set fields to default. */
 98 |   static optstruct *fpopts;
 99 |   fpopts = malloc(sizeof(optstruct));
100 |   strcpy(fpopts->format,"s");
101 |   fpopts->precision = 24;
102 |   fpopts->emax = 127;
103 |   fpopts->emin = -126;
104 |   fpopts->subnormal = 0;
105 |   fpopts->round = 1;
106 |   fpopts->flip = 0;
107 |   fpopts->p = 0.5;
108 |   fpopts->explim = 1;
109 | 
110 |   size_t nmin, nmax, nmid;
111 |   size_t ntests = 1000;
112 |   struct timespec *start = malloc(sizeof(struct timespec));
113 |   struct timespec *end = malloc(sizeof(struct timespec));
114 | 
115 |   double *seqtimings = malloc(ntests * sizeof(*seqtimings));
116 |   double *partimings = malloc(ntests * sizeof(*partimings));
117 | 
118 |   int maxnumthreads = omp_get_max_threads();
119 |   printf("Test using %d OpenMP threads.\n", maxnumthreads);
120 | 
121 |   char docstring [] =
122 |     "/* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */\n"
123 |     "/* SPDX-License-Identifier: LGPL-2.1-or-later                         */\n"
124 |     "\n/**\n"
125 |     " * @file %s_threshold_%s.h\n"
126 |     " * @brief Size of smallest `%s` array on which to use"
127 |     " multiple OpenMP threads.\n"
128 |     " */\n"
129 |     "\n"
130 |     "/**\n"
131 |     " * @brief Size of smallest array on which %s() uses multiple threads.\n"
132 |     " *\n"
133 |     " * @details Threshold for switching between %s_sequential() and\n"
134 |     " * %s_parallel() in %s(). The value of this constant is ignored\n"
135 |     " * if the file that includes cpfloat_%s.h is compiled without OpenMP\n"
136 |     " * support.\n"
137 |     " */\n";
138 | 
139 |   /* Binary32 */
140 |   nmin = 1;
141 |   nmax = 1;
142 |   while(!parfaster_float(nmax, fpopts, start, end,
143 |                          seqtimings, partimings, ntests))
144 |     nmax *= 2;
145 |   nmid = (nmax + nmin) / 2;
146 |   while(nmid != nmin && nmid != nmax) {
147 |     if(parfaster_float(nmid, fpopts, start, end,
148 |                        seqtimings, partimings, ntests))
149 |       nmax = nmid;
150 |     else
151 |       nmin = nmid;
152 |     nmid = (nmax + nmin) / 2;
153 |   }
154 |   printf("Optimal threshold for single is %zu.\n", nmax);
155 |   const char filenamef [] = "./cpfloat_threshold_binary32.h";
156 |   FILE *fidf = fopen(filenamef, "w");
157 |   fprintf(fidf, docstring, "cpfloat", "binary32", "float",
158 |           "cpfloatf", "cpfloatf", "cpfloatf", "cpfloatf", "binary32");
159 |   fprintf(fidf, "#define OPENMP_THRESHOLD_float %zu", nmax);
160 |   fclose(fidf);
161 | 
162 |   /* Binary64 */
163 |   nmin = 1;
164 |   nmax = 1;
165 |   while(!parfaster_double(nmax, fpopts, start, end,
166 |                           seqtimings, partimings, ntests))
167 |     nmax *= 2;
168 |   nmid = (nmax + nmin) / 2;
169 |   while(nmid != nmin && nmid != nmax) {
170 |     /* printf("[%5zu, %5zu, %5zu]\n", nmin, nmid, nmax); */
171 |     if(parfaster_double(nmid, fpopts, start, end,
172 |                         seqtimings, partimings, ntests))
173 |       nmax = nmid;
174 |     else
175 |       nmin = nmid;
176 |     nmid = (nmax + nmin) / 2;
177 |   }
178 |   printf("Optimal threshold for double is %zu.\n", nmax);
179 |   const char filenamed [] = "./cpfloat_threshold_binary64.h";
180 |   FILE *fidd = fopen(filenamed, "w");
181 |   fprintf(fidd, docstring, "cpfloat", "binary64", "double",
182 |           "cpfloat", "cpfloat", "cpfloat", "cpfloat", "binary64");
183 |   fprintf(fidd, "#define OPENMP_THRESHOLD_double %zu", nmax);
184 |   fclose(fidd);
185 | 
186 |   return 0;
187 | }
188 | 
189 | /*
190 |  * CPFloat - Custom Precision Floating-point numbers.
191 |  *
192 |  * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
193 |  *
194 |  * This library is free software; you can redistribute it and/or modify it under
195 |  * the terms of the GNU Lesser General Public License as published by the Free
196 |  * Software Foundation; either version 2.1 of the License, or (at your option)
197 |  * any later version.
198 |  *
199 |  * This library is distributed in the hope that it will be useful, but WITHOUT
200 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
201 |  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
202 |  * details.
203 |  *
204 |  * You should have received a copy of the GNU Lesser General Public License along
205 |  * with this library; if not, write to the Free Software Foundation, Inc., 51
206 |  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
207 |  */
208 | 


--------------------------------------------------------------------------------
/src/cpfloat_binary32.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */
  2 | /* SPDX-License-Identifier: LGPL-2.1-or-later                         */
  3 | 
  4 | /**
  5 |  * @file cpfloat_binary32.h
  6 |  * @brief CPFloat functions for `float` arrays.
  7 |  */
  8 | 
  9 | #ifndef _CPFLOAT_BINARY32_
 10 | #define _CPFLOAT_BINARY32_
 11 | 
 12 | #include "cpfloat_definitions.h"
 13 | #include "cpfloat_docmacros.h"
 14 | 
 15 | /* Validation of floating-point parameters. */
 16 | doc_cpfloat_validate_optstruct(double, 12, 24, -126, 127)
 17 | static inline int cpfloat_validate_optstructf(const optstruct *fpopts);
 18 | 
 19 | /* Rounding functions. */
 20 | doc_cpfloat(float, 24, -126, 127)
 21 | static inline int cpfloatf(float *X, const float *A, const size_t numelem,
 22 |                            optstruct *fpopts);
 23 | doc_cpfloat(float, 24, -126, 127)
 24 | static inline int cpf_fproundf(float *X, const float *A,
 25 |                                const size_t numelem, optstruct *fpopts);
 26 | 
 27 | /* Elementary arithmetic operations. */
 28 | doc_cpf_bivariate(sum, \f$ X_i = A_i + B_i \f$, 24, -126, 127)
 29 | static inline int cpf_addf(float *X, const float *A, const float *B,
 30 |                            const size_t numelem, optstruct *fpopts);
 31 | doc_cpf_bivariate(difference, \f$ X_i = A_i - B_i \f$, 24, -126, 127)
 32 | static inline int cpf_subf(float *X, const float *A, const float *B,
 33 |                            const size_t numelem, optstruct *fpopts);
 34 | doc_cpf_bivariate(product, \f$ X_i = A_i \times B_i \f$, 24, -126, 127)
 35 | static inline int cpf_mulf(float *X, const float *A, const float *B,
 36 |                            const size_t numelem, optstruct *fpopts);
 37 | doc_cpf_bivariate(ratio, \f$ X_i = A_i / B_i \f$, 24, -126, 127)
 38 | static inline int cpf_divf(float *X, const float *A, const float *B,
 39 |                            const size_t numelem, optstruct *fpopts);
 40 | 
 41 | /* Trigonometric functions. */
 42 | doc_cpf_univariate(trigonometric cosine, \f$ X_i = \cos(A_i) \f$, 24, -126, 127)
 43 | static inline int cpf_cosf(float *X, const float *A,
 44 |                            const size_t numelem, optstruct *fpopts);
 45 | doc_cpf_univariate(trigonometric sine, \f$ X_i = \sin(A_i) \f$, 24, -126, 127)
 46 | static inline int cpf_sinf(float *X, const float *A,
 47 |                            const size_t numelem, optstruct *fpopts);
 48 | doc_cpf_univariate(trigonometric tangent, \f$ X_i = \tan(A_i) \f$, 24, -126, 127)
 49 | static inline int cpf_tanf(float *X, const float *A,
 50 |                            const size_t numelem, optstruct *fpopts);
 51 | 
 52 | doc_cpf_univariate(inverse trigonometric cosine,
 53 |                    \f$ X_i = \mathrm{acos}(A_i) \f$, 24, -126, 127)
 54 | static inline int cpf_acosf(float *X, const float *A,
 55 |                             const size_t numelem, optstruct *fpopts);
 56 | doc_cpf_univariate(inverse trigonometric sine,
 57 |                    \f$ X_i = \mathrm{asin}(A_i) \f$, 24, -126, 127)
 58 | static inline int cpf_asinf(float *X, const float *A,
 59 |                             const size_t numelem, optstruct *fpopts);
 60 | doc_cpf_univariate(inverse trigonometric tangent,
 61 |                    \f$ X_i = \mathrm{atan}(A_i) \f$, 24, -126, 127)
 62 | static inline int cpf_atanf(float *X, const float *A,
 63 |                             const size_t numelem, optstruct *fpopts);
 64 | doc_cpf_bivariate(2-argument arctangent,
 65 |                   \f$ X_i = \mathrm{atan} (B_i / A_i) \f$, 24, -126, 127)
 66 | static inline int cpf_atan2f(float *X, const float *A, const float *B,
 67 |                              const size_t numelem, optstruct *fpopts);
 68 | 
 69 | /* Hyperbolic functions. */
 70 | doc_cpf_univariate(hyperbolic cosine, \f$ X_i = \mathrm{cosh}(A_i) \f$, 24, -126, 127)
 71 | static inline int cpf_coshf(float *X, const float *A,
 72 |                             const size_t numelem, optstruct *fpopts);
 73 | doc_cpf_univariate(hyperbolic sine, \f$ X_i = \mathrm{sinh}(A_i) \f$, 24, -126, 127)
 74 | static inline int cpf_sinhf(float *X, const float *A,
 75 |                             const size_t numelem, optstruct *fpopts);
 76 | doc_cpf_univariate(hyperbolic tangent , \f$ X_i = \mathrm{tanh}(A_i) \f$, 24, -126, 127)
 77 | static inline int cpf_tanhf(float *X, const float *A,
 78 |                             const size_t numelem, optstruct *fpopts);
 79 | 
 80 | doc_cpf_univariate(inverse hyperbolic cosine,
 81 |                    \f$ X_i = \mathrm{arcosh}(A_i) \f$, 24, -126, 127)
 82 | static inline int cpf_acoshf(float *X, const float *A,
 83 |                              const size_t numelem, optstruct *fpopts);
 84 | doc_cpf_univariate(inverse hyperbolic sine,
 85 |                    \f$ X_i = \mathrm{arsinh}(A_i) \f$, 24, -126, 127)
 86 | static inline int cpf_asinhf(float *X, const float *A,
 87 |                              const size_t numelem, optstruct *fpopts);
 88 | doc_cpf_univariate(inverse hyperbolic tangent,
 89 |                    \f$ X_i = \mathrm{artanh}(A_i) \f$, 24, -126, 127)
 90 | static inline int cpf_atanhf(float *X, const float *A,
 91 |                              const size_t numelem, optstruct *fpopts);
 92 | 
 93 | /* Exponentiation and logarithmic functions. */
 94 | doc_cpf_univariate(exponential, \f$ X_i = \exp(A_i) \f$, 24, -126, 127)
 95 | static inline int cpf_expf(float *X, const float *A,
 96 |                            const size_t numelem, optstruct *fpopts);
 97 | 
 98 | doc_cpf_frexp(24, -126, 127)
 99 | static inline int cpf_frexpf(float *X, int *exp, const float *A,
100 |                              const size_t numelem, optstruct *fpopts);
101 | doc_cpf_scaling(2, 24, -126, 127)
102 | static inline int cpf_ldexpf(float *X, const float *A, const int *exp,
103 |                              const size_t numelem, optstruct *fpopts);
104 | doc_cpf_univariate(natural logarithm, \f$ X_i = \log(A_i) \f$, 24, -126, 127)
105 | static inline int cpf_logf(float *X, const float *A,
106 |                            const size_t numelem, optstruct *fpopts);
107 | doc_cpf_univariate(base-10 logarithm, \f$ X_i = \log_{10}(A_i) \f$, 24, -126, 127)
108 | static inline int cpf_log10f(float *X, const float *A,
109 |                              const size_t numelem, optstruct *fpopts);
110 | doc_cpf_modf(24, -126, 127)
111 | static inline int cpf_modff(float *X, float *intpart, const float *A,
112 |                             const size_t numelem, optstruct *fpopts);
113 | doc_cpf_univariate(base-2 exponential, \f$ X_i = 2^{A_i} \f$, 24, -126, 127)
114 | static inline int cpf_exp2f(float *X, const float *A,
115 |                             const size_t numelem, optstruct *fpopts);
116 | doc_cpf_univariate(exp(x) - 1, \f$ X_i = \exp(A_i) - 1 \f$, 24, -126, 127)
117 | static inline int cpf_expm1f(float *X, const float *A,
118 |                              const size_t numelem, optstruct *fpopts);
119 | doc_cpf_ilogb(24, -126, 127)
120 | static inline int cpf_ilogbf(int *exp, const float *A,
121 |                              const size_t numelem, optstruct *fpopts);
122 | 
123 | doc_cpf_univariate(natural logarithm of number shifted by one,
124 |                    \f$ X_i = \log(1+A_i) \f$, 24, -126, 127)
125 | static inline int cpf_log1pf(float *X, const float *A,
126 |                              size_t numelem, optstruct *fpopts);
127 | doc_cpf_univariate(base-2 logarithm, \f$ X_i = \log_2(A_i) \f$, 24, -126, 127)
128 | static inline int cpf_log2f(float *X, const float *A,
129 |                             const size_t numelem, optstruct *fpopts);
130 | doc_cpf_univariate(base-FLT_RADIX logarithm of absolute value,
131 |                    \f$ X_i = \log(\lvert A_i \rvert) \f$, 24, -126, 127)
132 | static inline int cpf_logbf(float *X, const float *A,
133 |                             const size_t numelem, optstruct *fpopts);
134 | doc_cpf_scaling(FLT\_RADIX, 24, -126, 127)
135 | static inline int cpf_scalbnf(float *X, const float *A, const int *exp,
136 |                               const size_t numelem, optstruct *fpopts);
137 | doc_cpf_scaling(FLT\_RADIX, 24, -126, 127)
138 | static inline int cpf_scalblnf(float *X, const float *A,
139 |                                const long int *exp, const size_t numelem,
140 |                                optstruct *fpopts);
141 | 
142 | /* Power functions. */
143 | doc_cpf_bivariate(real powers, \f$ X_i = A_i^{B_i} \f$, 24, -126, 127)
144 | static inline int cpf_powf(float *X, const float *A, const float *B,
145 |                            const size_t numelem, optstruct *fpopts);
146 | doc_cpf_univariate(square root, \f$ X_i = \sqrt{A_i} \f$, 24, -126, 127)
147 | static inline int cpf_sqrtf(float *X, const float *A,
148 |                             const size_t numelem, optstruct *fpopts);
149 | doc_cpf_univariate(cube root, \f$ X_i = \sqrt[3]{A_i} \f$, 24, -126, 127)
150 | static inline int cpf_cbrtf(float *X, const float *A,
151 |                             const size_t numelem, optstruct *fpopts);
152 | doc_cpf_bivariate(hypotenuse of a right-angle triangle,
153 |                   \f$ X_i = \sqrt{A_i^2 + B_i^2} \f$, 24, -126, 127)
154 | static inline int cpf_hypotf(float *X, const float *A, const float *B,
155 |                              const size_t numelem, optstruct *fpopts);
156 | 
157 | /* Error and gamma functions. */
158 | doc_cpf_univariate(error function, \f$ X_i = \mathrm{erf}(A_i) \f$, 24, -126, 127)
159 | static inline int cpf_erff(float *X, const float *A,
160 |                            const size_t numelem, optstruct *fpopts);
161 | doc_cpf_univariate(complementary error function,
162 |                    \f$ X_i = \mathrm{erfc}(A_i) \f$, 24, -126, 127)
163 | static inline int cpf_erfcf(float *X, const float *A,
164 |                             const size_t numelem, optstruct *fpopts);
165 | doc_cpf_univariate(gamma function, \f$ X_i = \Gamma(A_i) \f$, 24, -126, 127)
166 | static inline int cpf_tgammaf(float *X, const float *A,
167 |                               const size_t numelem, optstruct *fpopts);
168 | doc_cpf_univariate(natural logarithm of absolute value of gamma function,
169 |                    \f$ X_i = \log(\lvert \Gamma(A_i) \rvert) \f$, 24, -126, 127)
170 | static inline int cpf_lgammaf(float *X, const float *A,
171 |                               const size_t numelem, optstruct *fpopts);
172 | 
173 | /* Rounding and remainder functions. */
174 | doc_cpf_univariate(ceiling function, \f$ X_i = \lceil A_i \rceil \f$, 24, -126, 127)
175 | static inline int cpf_ceilf(float *X, const float *A,
176 |                             const size_t numelem, optstruct *fpopts);
177 | doc_cpf_univariate(floor function, \f$ X_i = \lfloor A_i \rfloor \f$, 24, -126, 127)
178 | static inline int cpf_floorf(float *X, const float *A,
179 |                              const size_t numelem, optstruct *fpopts);
180 | doc_cpf_bivariate(floating-point remainder of division,
181 |                   \f$ X_i = A_i \;\mathrm{mod}\; B_i \f$, 24, -126, 127)
182 | static inline int cpf_fmodf(float *X, const float *A, const float *B,
183 |                             const size_t numelem, optstruct *fpopts);
184 | doc_cpf_univariate(integer truncation, \f$ X_i = \mathrm{trunc}(A_i) \f$, 24, -126, 127)
185 | static inline int cpf_truncf(float *X, const float *A,
186 |                              const size_t numelem, optstruct *fpopts);
187 | 
188 | doc_cpf_univariate(closest integer (with round-to-nearest),
189 |                    \f$ X_i = \mathrm{round}(A_i) \f$, 24, -126, 127)
190 | static inline int cpf_roundf(float *X, const float *A,
191 |                              const size_t numelem, optstruct *fpopts);
192 | doc_cpf_univariate(closest integer (with round-to-nearest),
193 |                    \f$ X_i = \mathrm{round}(A_i) \f$, 24, -126, 127)
194 | static inline int cpf_lroundf(long *X, const float *A,
195 |                               const size_t numelem, optstruct *fpopts);
196 | doc_cpf_univariate_nobitflip(closest integer (with round-to-nearest),
197 |                              \f$ X_i = \mathrm{round}(A_i) \f$, 24, -126, 127)
198 | static inline int cpf_llroundf(long long *X, const float *A,
199 |                                const size_t numelem, optstruct *fpopts);
200 | 
201 | doc_cpf_rint(PMAX, -126, 127)
202 | static inline int cpf_rintf(float *X, int *exception, const float *A,
203 |                             const size_t numelem, optstruct *fpopts);
204 | doc_cpf_rint(24, -126, 127)
205 | static inline int cpf_lrintf(long *X, int *exception, const float *A,
206 |                              const size_t numelem, optstruct *fpopts);
207 | doc_cpf_rint(24, -126, 127)
208 | static inline int cpf_llrintf(long long *X, int *exception, const float *A,
209 |                              const size_t numelem, optstruct *fpopts);
210 | doc_cpf_nearbyint(24, -126, 127)
211 | static inline int cpf_nearbyintf(float *X, const float *A,
212 |                                  const size_t numelem, optstruct *fpopts);
213 | doc_cpf_bivariate(remainder of the floating point division,
214 |                   \f$ X_i = A_i^2 - k \times B_i \f$
215 |                   for largest \f$ k \f$ such that \f$ k \times B_i < A_i \f$,
216 |                   24, -126, 127)
217 | static inline int cpf_remainderf(float *X, const float *A, const float *B,
218 |                                  const size_t numelem, optstruct *fpopts);
219 | 
220 | doc_cpf_remquo(24, -126, 127)
221 | static inline int cpf_remquof(float *X, int *quot,
222 |                               const float *A, const float *B,
223 |                               const size_t numelem, optstruct *fpopts);
224 | 
225 | /* Floating-point manipulation functions. */
226 | doc_cpf_bivariate(number from magnitude and sign,
227 |                   \f$ X_i = \mathrm{sign}(A_i) \times \lvert B_i \rvert \f$,
228 |                   24, -126, 127)
229 | static inline int cpf_copysignf(float *X, const float *A, const float *B,
230 |                                 const size_t numelem, optstruct *fpopts);
231 | doc_cpf_bivariate(next floating-point number in specified direction,
232 |                   the floating-point number closest to \f$ A_i \f$ in the
233 |                   direction of \f$ B_i \f$, 24, -126, 127)
234 | static inline int cpf_nextafterf(float *X, const float *A, const float *B,
235 |                                  const size_t numelem, optstruct *fpopts);
236 | doc_cpf_bivariate(next floating-point number in specified direction,
237 |                   the floating-point number closest to \f$ A_i \f$ in the
238 |                   direction of \f$ B_i \f$, 24, -126, 127)
239 | static inline int cpf_nexttowardf(float *X, const float *A,
240 |                                   const long double *B,
241 |                                   const size_t numelem,
242 |                                   optstruct *fpopts);
243 | 
244 | /* Minimum, maximum, difference functions. */
245 | doc_cpf_bivariate(positive difference, \f$ X_i = \lvert A_i - B_i \rvert \f$,
246 |                   24, -126, 127)
247 | static inline int cpf_fdimf(float *X, const float *A, const float *B,
248 |                             const size_t numelem, optstruct *fpopts);
249 | doc_cpf_bivariate(element-wise maximum, \f$ X_i = \mathrm{max}(A_i, B_i) \f$,
250 |                   24, -126, 127)
251 | static inline int cpf_fmaxf(float *X, const float *A, const float *B,
252 |                             const size_t numelem, optstruct *fpopts);
253 | doc_cpf_bivariate(element-wise minimum, \f$ X_i = \mathrm{min}(A_i, B_i) \f$,
254 |                   24, -126, 127)
255 | static inline int cpf_fminf(float *X, const float *A, const float *B,
256 |                             const size_t numelem, optstruct *fpopts);
257 | 
258 | /* Classification. */
259 | doc_cpf_fpclassify(24, -126, 127)
260 | static inline int cpf_fpclassifyf(int *r, const float *A,
261 |                                   const size_t numelem, optstruct *fpopts);
262 | doc_cpf_isfun(finite, 24, -126, 127)
263 | static inline int cpf_isfinitef(int *r, const float *A,
264 |                                 const size_t numelem, optstruct *fpopts);
265 | doc_cpf_isfun(infinite, 24, -126, 127)
266 | static inline int cpf_isinff(int *r, const float *A,
267 |                              const size_t numelem, optstruct *fpopts);
268 | doc_cpf_isfun(not a number, 24, -126, 127)
269 | static inline int cpf_isnanf(int *r, const float *A,
270 |                              const size_t numelem, optstruct *fpopts);
271 | doc_cpf_isfun(normal, 24, -126, 127)
272 | static inline int cpf_isnormalf(int *r, const float *A,
273 |                                 const size_t numelem, optstruct *fpopts);
274 | 
275 | /* Other functions. */
276 | doc_cpf_univariate(absolute value, \f$ X_i = \lvert A_i \rvert \f$, 24, -126, 127)
277 | static inline int cpf_fabsf(float *X, const float *A,
278 |                             const size_t numelem, optstruct *fpopts);
279 | doc_cpf_trivariate(fused multiply-add , \f$ X_i = A_i \times B_i + C_i \f$,
280 |                    24, -126, 127)
281 | static inline int cpf_fmaf(float *X, const float *A, const float *B,
282 |                            const float *C, const size_t numelem,
283 |                            optstruct *fpopts);
284 | 
285 | /** @cond */
286 | #define FUNSUFFIX f
287 | #define FPTYPE float
288 | #define INTTYPE uint32_t
289 | #define INTSUFFIX  U
290 | 
291 | #define DEFPREC   24
292 | #define DEFEMIN -126
293 | #define DEFEMAX  127
294 | #define NLEADBITS  9
295 | #define NBITS     32
296 | #define FULLMASK 0xFFFFFFFFU
297 | #define ABSMASK  0x7FFFFFFFU
298 | #define SIGNMASK 0x80000000U
299 | #define EXPMASK  0x7F800000U
300 | #define FRACMASK 0x007FFFFFU
301 | 
302 | #ifdef PCG_VARIANTS_H_INCLUDED
303 | #define MAXRAND 0xFFFFFFFFU
304 | #define INITRAND(seed) pcg32_srandom_r(seed, time(NULL), (intptr_t)seed);
305 | #define ADVANCERAND(seed, thread, nloc)                                        \
306 |   pcg32_advance_r(seed, thread * nloc - 1);
307 | #define GENRAND(seed) pcg32_random_r(seed)
308 | #else /* #ifdef PCG_VARIANTS_H_INCLUDED */
309 | #warning "The default C random number generator is being used."
310 | #warning "Please compile with -include <path-to-pcg_variants.h>"
311 | #warning "and link with -L <path-to-libpcg_random.a> -lpcg_random."
312 | #define MAXRAND 0x7FFFFFFFU
313 | #ifdef _OPENMP
314 | #define INITRAND(seed) *seed = time(NULL);
315 | #define GEN_SINGLE_RAND(seed) ((INTTYPE)rand_r((unsigned int *)seed))
316 | #else /*# ifdef _OPENMP */
317 | #define INITRAND(seed) srand(time(NULL));
318 | #define GEN_SINGLE_RAND(seed) ((INTTYPE)rand())
319 | #endif  /*# ifdef _OPENMP */
320 | #endif /* #ifndef PCG_VARIANTS_H_INCLUDED */
321 | 
322 | #include "cpfloat_threshold_binary32.h"
323 | #include "cpfloat_template.h"
324 | /** @endcond */
325 | 
326 | #endif /* #ifndef _CPFLOAT_BINARY32_ */
327 | 
328 | /*
329 |  * CPFloat - Custom Precision Floating-point numbers.
330 |  *
331 |  * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
332 |  *
333 |  * This library is free software; you can redistribute it and/or modify it under
334 |  * the terms of the GNU Lesser General Public License as published by the Free
335 |  * Software Foundation; either version 2.1 of the License, or (at your option)
336 |  * any later version.
337 |  *
338 |  * This library is distributed in the hope that it will be useful, but WITHOUT
339 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
340 |  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
341 |  * details.
342 |  *
343 |  * You should have received a copy of the GNU Lesser General Public License along
344 |  * with this library; if not, write to the Free Software Foundation, Inc., 51
345 |  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
346 |  */
347 | 


--------------------------------------------------------------------------------
/src/cpfloat_binary64.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */
  2 | /* SPDX-License-Identifier: LGPL-2.1-or-later                         */
  3 | 
  4 | /**
  5 |  * @file cpfloat_binary64.h
  6 |  * @brief CPFloat functions for `double` arrays.
  7 |  */
  8 | 
  9 | #ifndef _CPFLOAT_BINARY64_
 10 | #define _CPFLOAT_BINARY64_
 11 | 
 12 | #include "cpfloat_definitions.h"
 13 | #include "cpfloat_docmacros.h"
 14 | 
 15 | /* Validation of floating-point parameters. */
 16 | doc_cpfloat_validate_optstruct(double, 26, 53, -1022, 1023)
 17 | static inline int cpfloat_validate_optstruct(const optstruct *fpopts);
 18 | 
 19 | /* Rounding functions. */
 20 | doc_cpfloat(double, 53, -1022, 1023)
 21 | static inline int cpfloat(double *X, const double *A, const size_t numelem,
 22 |                           optstruct *fpopts);
 23 | doc_cpfloat(double, 53, -1022, 1023)
 24 | static inline int cpf_fpround(double *X, const double *A,
 25 |                               const size_t numelem, optstruct *fpopts);
 26 | 
 27 | /* Elementary arithmetic operations. */
 28 | doc_cpf_bivariate(sum, \f$ X_i = A_i + B_i \f$, 53, -1022, 1023)
 29 | static inline int cpf_add(double *X, const double *A, const double *B,
 30 |                           const size_t numelem, optstruct *fpopts);
 31 | doc_cpf_bivariate(difference, \f$ X_i = A_i - B_i \f$, 53, -1022, 1023)
 32 | static inline int cpf_sub(double *X, const double *A, const double *B,
 33 |                           const size_t numelem, optstruct *fpopts);
 34 | doc_cpf_bivariate(product, \f$ X_i = A_i \times B_i \f$, 53, -1022, 1023)
 35 | static inline int cpf_mul(double *X, const double *A, const double *B,
 36 |                           const size_t numelem, optstruct *fpopts);
 37 | doc_cpf_bivariate(ratio, \f$ X_i = A_i / B_i \f$, 53, -1022, 1023)
 38 | static inline int cpf_div(double *X, const double *A, const double *B,
 39 |                           const size_t numelem, optstruct *fpopts);
 40 | 
 41 | /* Trigonometric functions. */
 42 | doc_cpf_univariate(trigonometric cosine, \f$ X_i = \cos(A_i) \f$, 53, -1022, 1023)
 43 | static inline int cpf_cos(double *X, const double *A,
 44 |                           const size_t numelem, optstruct *fpopts);
 45 | doc_cpf_univariate(trigonometric sine, \f$ X_i = \sin(A_i) \f$, 53, -1022, 1023)
 46 | static inline int cpf_sin(double *X, const double *A,
 47 |                           const size_t numelem, optstruct *fpopts);
 48 | doc_cpf_univariate(trigonometric tangent, \f$ X_i = \tan(A_i) \f$, 53, -1022, 1023)
 49 | static inline int cpf_tan(double *X, const double *A,
 50 |                           const size_t numelem, optstruct *fpopts);
 51 | 
 52 | doc_cpf_univariate(inverse trigonometric cosine,
 53 |                    \f$ X_i = \mathrm{acos(A_i)} \f$, 53, -1022, 1023)
 54 | static inline int cpf_acos(double *X, const double *A,
 55 |                            const size_t numelem, optstruct *fpopts);
 56 | doc_cpf_univariate(inverse trigonometric sine,
 57 |                    \f$ X_i = \mathrm{asin}(A_i) \f$, 53, -1022, 1023)
 58 | static inline int cpf_asin(double *X, const double *A,
 59 |                            const size_t numelem, optstruct *fpopts);
 60 | doc_cpf_univariate(inverse trigonometric tangent,
 61 |                    \f$ X_i = \mathrm{atan}(A_i) \f$, 53, -1022, 1023)
 62 | static inline int cpf_atan(double *X, const double *A,
 63 |                            const size_t numelem, optstruct *fpopts);
 64 | doc_cpf_bivariate(2-argument arctangent,
 65 |                   \f$ X_i = \mathrm{atan}(B_i / A_i) \f$, 53, -1022, 1023)
 66 | static inline int cpf_atan2(double *X, const double *A, const double *B,
 67 |                             const size_t numelem, optstruct *fpopts);
 68 | 
 69 | /* Hyperbolic functions. */
 70 | doc_cpf_univariate(hyperbolic cosine, \f$ X_i = \mathrm{cosh}(A_i) \f$,
 71 |                    53, -1022, 1023)
 72 | static inline int cpf_cosh(double *X, const double *A,
 73 |                            const size_t numelem, optstruct *fpopts);
 74 | doc_cpf_univariate(hyperbolic sine, \f$ X_i = \mathrm{sinh}(A_i) \f$, 53, -1022, 1023)
 75 | static inline int cpf_sinh(double *X, const double *A,
 76 |                            const size_t numelem, optstruct *fpopts);
 77 | doc_cpf_univariate(hyperbolic tangent , \f$ X_i = \mathrm{tanh}(A_i) \f$,
 78 |                    53, -1022, 1023)
 79 | static inline int cpf_tanh(double *X, const double *A,
 80 |                            const size_t numelem, optstruct *fpopts);
 81 | 
 82 | doc_cpf_univariate(inverse hyperbolic cosine,
 83 |                    \f$ X_i = \mathrm{arcosh}(A_i) \f$, 53, -1022, 1023)
 84 | static inline int cpf_acosh(double *X, const double *A,
 85 |                             const size_t numelem, optstruct *fpopts);
 86 | doc_cpf_univariate(inverse hyperbolic sine,
 87 |                    \f$ X_i = \mathrm{arsinh}(A_i) \f$, 53, -1022, 1023)
 88 | static inline int cpf_asinh(double *X, const double *A,
 89 |                             const size_t numelem, optstruct *fpopts);
 90 | doc_cpf_univariate(inverse hyperbolic tangent,
 91 |                    \f$ X_i = \mathrm{artanh}(A_i) \f$, 53, -1022, 1023)
 92 | static inline int cpf_atanh(double *X, const double *A,
 93 |                             const size_t numelem, optstruct *fpopts);
 94 | 
 95 | /* Exponentiation and logarithmic functions. */
 96 | doc_cpf_univariate(exponential, \f$ X_i = \exp(A_i) \f$, 53, -1022, 1023)
 97 | static inline int cpf_exp(double *X, const double *A,
 98 |                           const size_t numelem, optstruct *fpopts);
 99 | 
100 | doc_cpf_frexp(53, -1022, 1023)
101 | static inline int cpf_frexp(double *X, int *exp, const double *A,
102 |                             const size_t numelem, optstruct *fpopts);
103 | doc_cpf_scaling(2, 53, -1022, 1023)
104 | static inline int cpf_ldexp(double *X, const double *A, const int *exp,
105 |                             const size_t numelem, optstruct *fpopts);
106 | doc_cpf_univariate(natural logarithm, \f$ X_i = \log(A_i) \f$, 53, -1022, 1023)
107 | static inline int cpf_log(double *X, const double *A,
108 |                           const size_t numelem, optstruct *fpopts);
109 | doc_cpf_univariate(base - 10 logarithm, \f$ X_i = \log_{10}(A_i) \f$, 53, -1022, 1023)
110 | static inline int cpf_log10(double *X, const double *A,
111 |                             const size_t numelem, optstruct *fpopts);
112 | doc_cpf_modf(53, -1022, 1023)
113 | static inline int cpf_modf(double *X, double *intpart, const double *A,
114 |                            const size_t numelem, optstruct *fpopts);
115 | doc_cpf_univariate(base-2 exponential, \f$ X_i = 2^{A_i} \f$, 53, -1022, 1023)
116 | static inline int cpf_exp2(double *X, const double *A,
117 |                            const size_t numelem, optstruct *fpopts);
118 | doc_cpf_univariate(exp(x) - 1, \f$ X_i = \exp(A_i) - 1 \f$, 53, -1022, 1023)
119 | static inline int cpf_expm1(double *X, const double *A,
120 |                             const size_t numelem, optstruct *fpopts);
121 | doc_cpf_ilogb(53, -1022, 1023)
122 | static inline int cpf_ilogb(int *exp, const double *A,
123 |                             const size_t numelem, optstruct *fpopts);
124 | 
125 | doc_cpf_univariate(natural logarithm of number shifted by one,
126 |                    \f$ X_i = \log(1+A_i) \f$, 53, -1022, 1023)
127 | static inline int cpf_log1p(double *X, const double *A,
128 |                             size_t numelem, optstruct *fpopts);
129 | doc_cpf_univariate(base-2 logarithm, \f$ X_i = \log_2(A_i) \f$, 53, -1022, 1023)
130 | static inline int cpf_log2(double *X, const double *A,
131 |                            const size_t numelem, optstruct *fpopts);
132 | doc_cpf_univariate(base-FLT_RADIX logarithm of absolute value,
133 |                    \f$ X_i = \log(\lvert A_i \rvert) \f$, 53, -1022, 1023)
134 | static inline int cpf_logb(double *X, const double *A,
135 |                            const size_t numelem, optstruct *fpopts);
136 | doc_cpf_scaling(FLT\_RADIX, 53, -1022, 1023)
137 |   static inline int cpf_scalbn(double *X, const double *A, const int *exp,
138 |                                const size_t numelem, optstruct *fpopts);
139 | doc_cpf_scaling(FLT\_RADIX, 53, -1022, 1023)
140 |   static inline int cpf_scalbln(double *X, const double *A,
141 |                                 const long int *exp, const size_t numelem,
142 |                                 optstruct *fpopts);
143 | 
144 | /* Power functions. */
145 | doc_cpf_bivariate(real powers, \f$ X_i = A_i^{B_i} \f$, 53, -1022, 1023)
146 | static inline int cpf_pow(double *X, const double *A, const double *B,
147 |                           const size_t numelem, optstruct *fpopts);
148 | doc_cpf_univariate(square root, \f$ X_i = \sqrt{A_i} \f$, 53, -1022, 1023)
149 | static inline int cpf_sqrt(double *X, const double *A,
150 |                            const size_t numelem, optstruct *fpopts);
151 | doc_cpf_univariate(cube root, \f$ X_i = \sqrt[3]{A_i} \f$, 53, -1022, 1023)
152 | static inline int cpf_cbrt(double *X, const double *A,
153 |                            const size_t numelem, optstruct *fpopts);
154 | doc_cpf_bivariate(hypotenuse of a right-angle triangle,
155 |                   \f$ X_i = \sqrt{A_i^2 + B_i^2} \f$, 53, -1022, 1023)
156 | static inline int cpf_hypot(double *X, const double *A, const double *B,
157 |                             const size_t numelem, optstruct *fpopts);
158 | 
159 | /* Error and gamma functions. */
160 | doc_cpf_univariate(error function, \f$ X_i = \mathrm{erf}(A_i) \f$, 53, -1022, 1023)
161 | static inline int cpf_erf(double *X, const double *A,
162 |                           const size_t numelem, optstruct *fpopts);
163 | doc_cpf_univariate(complementary error function,
164 |                    \f$ X_i = \mathrm{erfc}(A_i) \f$, 53, -1022, 1023)
165 | static inline int cpf_erfc(double *X, const double *A,
166 |                            const size_t numelem, optstruct *fpopts);
167 | doc_cpf_univariate(gamma function, \f$ X_i = \Gamma(A_i) \f$, 53, -1022, 1023)
168 | static inline int cpf_tgamma(double *X, const double *A,
169 |                              const size_t numelem, optstruct *fpopts);
170 | doc_cpf_univariate(natural logarithm of absolute value of gamma function,
171 |                    \f$ X_i = \log(\lvert \Gamma(A_i) \rvert) \f$, 53, -1022, 1023)
172 | static inline int cpf_lgamma(double *X, const double *A,
173 |                              const size_t numelem, optstruct *fpopts);
174 | 
175 | /* Rounding and remainder functions. */
176 | doc_cpf_univariate(ceiling function, \f$ X_i = \lceil A_i \rceil \f$, 53, -1022, 1023)
177 | static inline int cpf_ceil(double *X, const double *A,
178 |                            const size_t numelem, optstruct *fpopts);
179 | doc_cpf_univariate(floor function, \f$ X_i = \lfloor A_i \rfloor \f$, 53, -1022, 1023)
180 | static inline int cpf_floor(double *X, const double *A,
181 |                             const size_t numelem, optstruct *fpopts);
182 | doc_cpf_bivariate(floating-point remainder of division,
183 |                   \f$ X_i = A_i \;\mathrm{mod}\; B_i \f$, 53, -1022, 1023)
184 | static inline int cpf_fmod(double *X, const double *A, const double *B,
185 |                            const size_t numelem, optstruct *fpopts);
186 | doc_cpf_univariate(integer truncation, \f$ X_i = \mathrm{trunc}(A_i) \f$,
187 |                    53, -1022, 1023)
188 | static inline int cpf_trunc(double *X, const double *A,
189 |                             const size_t numelem, optstruct *fpopts);
190 | 
191 | doc_cpf_univariate(closest integer (with round-to-nearest),
192 |                    \f$ X_i = \mathrm{round}(A_i) \f$, 53, -1022, 1023)
193 | static inline int cpf_round(double *X, const double *A,
194 |                             const size_t numelem, optstruct *fpopts);
195 | doc_cpf_univariate(closest integer (with round-to-nearest),
196 |                    \f$ X_i = \mathrm{round}(A_i) \f$, 53, -1022, 1023)
197 | static inline int cpf_lround(long *X, const double *A,
198 |                              const size_t numelem, optstruct *fpopts);
199 | doc_cpf_univariate_nobitflip(closest integer (with round-to-nearest),
200 |                              \f$ X_i = \mathrm{round}(A_i) \f$, 53, -1022, 1023)
201 | static inline int cpf_llround(long long *X, const double *A,
202 |                               const size_t numelem, optstruct *fpopts);
203 | 
204 | doc_cpf_rint(53, -1022, 1023)
205 | static inline int cpf_rint(double *X, int *exception, const double *A,
206 |                            const size_t numelem, optstruct *fpopts);
207 | doc_cpf_rint(53, -1022, 1023)
208 | static inline int cpf_lrint(long *X, int *exception, const double *A,
209 |                             const size_t numelem, optstruct *fpopts);
210 | doc_cpf_rint(53, -1022, 1023)
211 | static inline int cpf_llrint(long long *X, int *exception, const double *A,
212 |                              const size_t numelem, optstruct *fpopts);
213 | doc_cpf_nearbyint(53, -1022, 1023)
214 | static inline int cpf_nearbyint(double *X, const double *A,
215 |                                 const size_t numelem, optstruct *fpopts);
216 | doc_cpf_bivariate(remainder of the floating point division,
217 |                   \f$ X_i = A_i^2 - k \times B_i \f$
218 |                   for largest \f$ k \f$ such that \f$ k \times B_i < A_i \f$,
219 |                   53, -1022, 1023)
220 | static inline int cpf_remainder(double *X, const double *A, const double *B,
221 |                                 const size_t numelem, optstruct *fpopts);
222 | 
223 | doc_cpf_remquo(53, -1022, 1023)
224 | static inline int cpf_remquo(double *X, int *quot,
225 |                              const double *A, const double *B,
226 |                              const size_t numelem, optstruct *fpopts);
227 | 
228 | /* Floating-point manipulation functions. */
229 | doc_cpf_bivariate(number from magnitude and sign,
230 |                   \f$ X_i = \mathrm{sign}(A_i) * abs(B_i) \f$, 53, -1022, 1023)
231 | static inline int cpf_copysign(double *X, const double *A, const double *B,
232 |                                const size_t numelem, optstruct *fpopts);
233 | doc_cpf_bivariate(next floating-point number in specified direction,
234 |                   the floating-point number closest to \f$ A_i \f$ in the
235 |                   direction of \f$ B_i \f$, 53, -1022, 1023)
236 | static inline int cpf_nextafter(double *X, const double *A, const double *B,
237 |                                 const size_t numelem, optstruct *fpopts);
238 | doc_cpf_bivariate(next floating-point number in specified direction,
239 |                   the floating-point number closest to \f$ A_i \f$ in the
240 |                   direction of \f$ B_i \f$, 53, -1022, 1023)
241 | static inline int cpf_nexttoward(double *X, const double *A,
242 |                                  const long double *B, const size_t numelem,
243 |                                  optstruct *fpopts);
244 | 
245 | /* Minimum, maximum, difference functions. */
246 | doc_cpf_bivariate(positive difference, \f$ X_i = \lvert A_i \rvert - B_i \f$,
247 |                   53, -1022, 1023)
248 | static inline int cpf_fdim(double *X, const double *A, const double *B,
249 |                            const size_t numelem, optstruct *fpopts);
250 | doc_cpf_bivariate(element-wise maximum, \f$ X_i = \mathrm{max}(A_i, B_i) \f$,
251 |                   53, -1022, 1023)
252 | static inline int cpf_fmax(double *X, const double *A, const double *B,
253 |                            const size_t numelem, optstruct *fpopts);
254 | doc_cpf_bivariate(element-wise minimum, \f$ X_i = \mathrm{min}(A_i, B_i) \f$,
255 |                   53, -1022, 1023)
256 | static inline int cpf_fmin(double *X, const double *A, const double *B,
257 |                            const size_t numelem, optstruct *fpopts);
258 | 
259 | /* Classification. */
260 | doc_cpf_fpclassify(53, -1022, 1023)
261 | static inline int cpf_fpclassify(int *r, const double *A,
262 |                                  const size_t numelem, optstruct *fpopts);
263 | doc_cpf_isfun(finite, 53, -1022, 1023)
264 | static inline int cpf_isfinite(int *r, const double *A,
265 |                                const size_t numelem, optstruct *fpopts);
266 | doc_cpf_isfun(infinite, 53, -1022, 1023)
267 | static inline int cpf_isinf(int *r, const double *A,
268 |                             const size_t numelem, optstruct *fpopts);
269 | doc_cpf_isfun(not a number, 53, -1022, 1023)
270 | static inline int cpf_isnan(int *r, const double *A,
271 |                             const size_t numelem, optstruct *fpopts);
272 | doc_cpf_isfun(normal, 53, -1022, 1023)
273 | static inline int cpf_isnormal(int *r, const double *A,
274 |                                const size_t numelem, optstruct *fpopts);
275 | 
276 | /* Other functions. */
277 | doc_cpf_univariate(absolute value, \f$ X_i = \lvert A_i \rvert \f$, 53, -1022, 1023)
278 | static inline int cpf_fabs(double *X, const double *A,
279 |                            const size_t numelem, optstruct *fpopts);
280 | doc_cpf_trivariate(fused multiply-add , \f$ X_i = A_i \times B_i + C_i \f$,
281 |                    53, -1022, 1023)
282 | static inline int cpf_fma(double *X, const double *A, const double *B,
283 |                           const double *C, const size_t numelem,
284 |                           optstruct *fpopts);
285 | 
286 | /** @cond */
287 | #define FUNSUFFIX
288 | #define FPTYPE double
289 | #define INTTYPE uint64_t
290 | #define INTSUFFIX ULL
291 | #define DEFPREC 53
292 | #define DEFEMIN -1022
293 | #define DEFEMAX 1023
294 | #define NLEADBITS 12
295 | #define NBITS 64
296 | #define FULLMASK 0xFFFFFFFFFFFFFFFFULL
297 | #define ABSMASK  0x7FFFFFFFFFFFFFFFULL
298 | #define SIGNMASK 0x8000000000000000ULL
299 | #define EXPMASK  0x7FF0000000000000ULL
300 | #define FRACMASK 0x000FFFFFFFFFFFFFULL
301 | 
302 | #ifdef PCG_VARIANTS_H_INCLUDED
303 | #define MAXRAND 0xFFFFFFFFFFFFFFFFULL
304 | #define INITRAND(seed) pcg64_srandom_r(seed, time(NULL), (intptr_t)seed);
305 | #define ADVANCERAND(seed, thread, nloc) pcg64_advance_r(seed, thread *nloc - 1);
306 | #define GENRAND(seed) pcg64_random_r(seed)
307 | #else /* #ifdef PCG_VARIANTS_H_INCLUDED */
308 | #warning "The default C random number generator is being used."
309 | #warning "Please compile with -include <path-to-pcg_variants.h>"
310 | #warning "and link with -L <path-to-libpcg_random.a> -lpcg_random."
311 | #define MAXRAND 0x3FFFFFFFFFFFFFFFULL
312 | #ifdef _OPENMP
313 | #define INITRAND(seed) *seed = time(NULL);
314 | #define GEN_SINGLE_RAND(seed)                                                  \
315 |   ((INTTYPE)rand_r((unsigned int *)seed) +                                     \
316 |    ((INTTYPE)rand_r((unsigned int *)seed) << 31))
317 | #else /*# ifdef _OPENMP */
318 | #define INITRAND(seed) srand(time(NULL));
319 | #define GEN_SINGLE_RAND(seed) ((INTTYPE)rand() + ((INTTYPE)rand() << 31))
320 | #endif  /*# ifdef _OPENMP */
321 | #endif /* #ifdef PCG_VARIANTS_H_INCLUDED */
322 | 
323 | #include "cpfloat_threshold_binary64.h"
324 | #include "cpfloat_template.h"
325 | /** @endcond */
326 | 
327 | #endif  /* #ifndef _CPFLOAT_BINARY64_ */
328 | 
329 | /*
330 |  * CPFloat - Custom Precision Floating-point numbers.
331 |  *
332 |  * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
333 |  *
334 |  * This library is free software; you can redistribute it and/or modify it under
335 |  * the terms of the GNU Lesser General Public License as published by the Free
336 |  * Software Foundation; either version 2.1 of the License, or (at your option)
337 |  * any later version.
338 |  *
339 |  * This library is distributed in the hope that it will be useful, but WITHOUT
340 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
341 |  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
342 |  * details.
343 |  *
344 |  * You should have received a copy of the GNU Lesser General Public License along
345 |  * with this library; if not, write to the Free Software Foundation, Inc., 51
346 |  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
347 |  */
348 | 


--------------------------------------------------------------------------------
/src/cpfloat_definitions.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */
  2 | /* SPDX-License-Identifier: LGPL-2.1-or-later                         */
  3 | 
  4 | /**
  5 |  * @file cpfloat_definitions.h
  6 |  * @brief Definition of CPFloat data types.
  7 |  *
  8 |  * @details This file includes all the external header files used by CPFloat,
  9 |  * defines the enumerated types
 10 |  *
 11 |  * + @ref cpfloat_explim_t,
 12 |  * + @ref cpfloat_infinity_t,
 13 |  * + @ref cpfloat_rounding_t,
 14 |  * + @ref cpfloat_saturation_t,
 15 |  * + @ref cpfloat_softerr_t,
 16 |  * + @ref cpfloat_subnormal_t,
 17 |  *
 18 |  * and the structured data type @ref optstruct. It is not necessary to include
 19 |  * this file in order to use CPFloat, as it is already included by @ref
 20 |  * cpfloat_binary32.h and by @ref cpfloat_binary64.h.
 21 |  */
 22 | 
 23 | #ifndef _CHOPFAST_DEFINITIONS_
 24 | #define _CHOPFAST_DEFINITIONS_
 25 | 
 26 | #include <stdlib.h>
 27 | #include <stdint.h>
 28 | 
 29 | #include <time.h>
 30 | #include <math.h>
 31 | #include <fenv.h>
 32 | #include <float.h>
 33 | #include <limits.h>
 34 | #include <string.h>
 35 | 
 36 | /* #include "pcg_variants.h" */
 37 | 
 38 | #if defined(_OPENMP)
 39 | #include <omp.h>
 40 | #endif /* #if defined(_OPENMP) */
 41 | 
 42 | /**
 43 |  * @brief Prefix for all library function names.
 44 |  */
 45 | #define MAINFUNNAME cpf
 46 | 
 47 | /**
 48 |  * @brief Data type for specifying number of precision bits in target format.
 49 |  */
 50 | typedef unsigned int cpfloat_precision_t;
 51 | 
 52 | /**
 53 |  * @brief Data type for specifying exponents in target format.
 54 |  */
 55 | typedef int cpfloat_exponent_t;
 56 | 
 57 | /**
 58 |  * @brief Extended exponent range modes available in CPFloat.
 59 |  */
 60 | typedef enum {
 61 |   /** Use exponent range of storage format. */
 62 |   CPFLOAT_EXPRANGE_STOR = 0,
 63 |   /** Use exponent range of target format. */
 64 |   CPFLOAT_EXPRANGE_TARG = 1
 65 | } cpfloat_explim_t;
 66 | 
 67 | /**
 68 |  * @brief Infinity support modes available in CPFloat.
 69 |  */
 70 | typedef enum {
 71 |   /** Use infinities in target format. */
 72 |   CPFLOAT_INF_NO = 0,
 73 |   /** Replace infinities with NaNs in target format. */
 74 |   CPFLOAT_INF_USE = 1,
 75 | } cpfloat_infinity_t;
 76 | 
 77 | /**
 78 |  * @brief Rounding modes available in CPFloat.
 79 |  */
 80 | typedef enum {
 81 |   /** Use round-to-nearest with ties-to-away. */
 82 |   CPFLOAT_RND_NA = -1,
 83 |   /** Use round-to-nearest with ties-to-zero. */
 84 |   CPFLOAT_RND_NZ =  0,
 85 |   /** Use round-to-nearest with ties-to-even. */
 86 |   CPFLOAT_RND_NE =  1,
 87 |   /** Use round-toward-+&infin;. */
 88 |   CPFLOAT_RND_TP =  2,
 89 |   /** Use round-toward-&minus;&infin;. */
 90 |   CPFLOAT_RND_TN =  3,
 91 |   /** Use round toward zero */
 92 |   CPFLOAT_RND_TZ =  4,
 93 |   /** Stochastic rounding with proportional probabilities. */
 94 |   CPFLOAT_RND_SP =  5,
 95 |   /** Stochastic rounding with equal probabilities. */
 96 |   CPFLOAT_RND_SE =  6,
 97 |   /** Use round-to-odd. */
 98 |   CPFLOAT_RND_OD =  7,
 99 |   /** Do not perform rounding. */
100 |   CPFLOAT_NO_RND =  8,
101 | } cpfloat_rounding_t;
102 | 
103 | /**
104 |  * @brief Saturation modes available in CPFloat.
105 |  */
106 | typedef enum {
107 |   /** Use standard arithmetic. */
108 |   CPFLOAT_SAT_NO = 0,
109 |   /** Use saturation arithmetic. */
110 |   CPFLOAT_SAT_USE = 1,
111 | } cpfloat_saturation_t;
112 | 
113 | /**
114 |  * @brief Soft fault simulation modes available in CPFloat.
115 |  */
116 | typedef enum {
117 |   /** Do not introduce soft errors. */
118 |   CPFLOAT_SOFTERR_NO = 0,
119 |   /** Soft errors in fraction of target-format floating-point representation.*/
120 |   CPFLOAT_SOFTERR_FRAC = 1,
121 |   /** Soft errors anywhere in target-format floating-point representation. */
122 |   CPFLOAT_SOFTERR_FP = 2
123 | } cpfloat_softerr_t;
124 | 
125 | /**
126 |  * @brief Subnormal support modes available in CPFloat.
127 |  */
128 | typedef enum {
129 |   /** Round subnormal numbers according to the current rounding mode. */
130 |   CPFLOAT_SUBN_RND = 0,
131 |   /** Support storage of subnormal numbers. */
132 |   CPFLOAT_SUBN_USE = 1
133 | } cpfloat_subnormal_t;
134 | 
135 | /** @cond */
136 | #ifdef PCG_VARIANTS_H_INCLUDED
137 | #define CPFLOAT_BITSEEDTYPE pcg32_random_t
138 | #define CPFLOAT_RANDSEEDTYPEF pcg32_random_t
139 | #define CPFLOAT_RANDSEEDTYPE pcg64_random_t
140 | #else /* #ifdef PCG_VARIANTS_H_INCLUDED */
141 | #define CPFLOAT_BITSEEDTYPE unsigned int
142 | #define CPFLOAT_RANDSEEDTYPEF size_t
143 | #define CPFLOAT_RANDSEEDTYPE size_t
144 | #endif /* #ifdef PCG_VARIANTS_H_INCLUDED */
145 | /** @endcond */
146 | 
147 | /**
148 |  * @brief Internal state of the pseudo-random bit generator.
149 |  */
150 | typedef CPFLOAT_BITSEEDTYPE cpfloat_bitseed_t;
151 | 
152 | /**
153 |  * @brief Internal state of the pseudo-random `float` generator.
154 |  */
155 | typedef CPFLOAT_RANDSEEDTYPEF cpfloat_randseedf_t;
156 | 
157 | /**
158 |  * @brief Internal state of the pseudo-random `double` generator.
159 |  */
160 | typedef CPFLOAT_RANDSEEDTYPE cpfloat_randseed_t;
161 | 
162 | /**
163 |  * @brief Specify target format, rounding mode, and occurrence of soft faults.
164 |  *
165 |  * @details The fields of this structure determine the parameters of the
166 |  * floating-point format to be simulated, the rounding mode to be used during
167 |  * the conversion process, and whether soft faults striking the rounded numbers
168 |  * should be simulated.
169 |  */
170 | typedef struct {
171 |   /**
172 |    * @brief String specifying target format.
173 |    *
174 |    * @details This field is defined only for compatibility with the MATLAB
175 |    * function `chop`, and its value is used by the MEX interface but ignored by
176 |    * the pure C implementation.
177 |    *
178 |    * Possible values are:
179 |    * + `q43`, `e4m3`, `E4M3` for E4M3 (4-bit exponent, 4-bit significand);
180 |    * + `q52`, `e5m2`, `E5M2` for E5M2 (5-bit exponent, 2-bit significand);
181 |    * + `b`, `bf16`, `bfloat16` for bfloat16;
182 |    * + `h`, `fp16`, `binary16`, `half` for binary16;
183 |    * + `t`, `tf32`, `TensorFloat-32`, for TensorFloat-32;
184 |    * + `s`, `fp32`, `binary32`, `single` for binary32;
185 |    * + `d`, `fp64`, `binary64`, `double` for binary64; and
186 |    * + `custom`, `c` for a format specifying `precision`, `emin`, and `emax`.
187 |    *
188 |    * The validation functions cpfloatf_validate_optstruct() and
189 |    * cpfloat_validate_optstruct() return a warning code if this field is not set
190 |    * to either the empty string or one of the strings above.
191 |    */
192 |   char format [15];
193 |   /**
194 |    * @brief Bits of precision of target format.
195 |    *
196 |    * @details The maximum values allowed are 24 and 53 if the storage format is
197 |    * `float` or `double`, respectively.
198 |    *
199 |    * For compatibility with the MATLAB function `chop`, in the MEX interface the
200 |    * number of digits of precision for `float` and `double` cannot exceed 11 and
201 |    * 25, respectively, when using stochastic rounding, and cannot exceed 23 and
202 |    * 52, respectively, for other rounding modes. The C implementation does not
203 |    * have any such restrictions, but using larger values can cause double
204 |    * rounding.
205 |    *
206 |    * The validation functions cpfloatf_validate_optstruct() and
207 |    * cpfloat_validate_optstruct() return an error code if the required number of
208 |    * digits is larger than the maximum allowed by the storage format, and a
209 |    * warning code if the required number of digits is above the maximum allowed
210 |    * by the MEX interface.
211 |    */
212 |   cpfloat_precision_t precision;
213 |   /**
214 |    * @brief Minimum exponent of target format.
215 |    *
216 |    * @details The minimum values allowed are -126 and -1022 if the storage
217 |    * format is `float` or `double`, respectively. If a smaller value is chosen,
218 |    * it is changed to the minimum allowed value without warning. This field is
219 |    * ignored unless `explim` is set to `CPFLOAT_EXPRANGE_TARG`.
220 |    *
221 |    * The validation functions cpfloatf_validate_optstruct() and
222 |    * cpfloat_validate_optstruct() return an error code if the required minimum
223 |    * exponent is smaller than the minimum allowed by the storage format.
224 |    */
225 |   cpfloat_exponent_t emin;
226 |   /**
227 |    * @brief Maximum exponent of target format.
228 |    *
229 |    * @details The maximum values allowed are 127 and 1023 if the storage format
230 |    * is `float` or `double`, respectively. If a larger value is chosen, it is
231 |    * changed to the maximum allowed value without warning. This field is ignored
232 |    * unless `explim` is set to `CPFLOAT_EXPRANGE_TARG`.
233 |    *
234 |    * The validation functions cpfloatf_validate_optstruct() and
235 |    * cpfloat_validate_optstruct() return an error code if the required maximum
236 |    * exponent is larger than the maximum allowed by the storage format.
237 |    */
238 |   cpfloat_exponent_t emax;
239 |   /**
240 |    * @brief Support for extended exponents in target format.
241 |    *
242 |    * @details The upper limit of the exponent range is set to `emax` if this
243 |    * field is set to `CPFLOAT_EXPRANGE_TARG`, and to the upper limit of the
244 |    * exponent range of the storage format if it is set to
245 |    * `CPFLOAT_EXPRANGE_STOR`.
246 |    */
247 |   cpfloat_explim_t explim;
248 |   /**
249 |    * @brief Support for infinities in target format.
250 |    *
251 |    * @details If this field is set to `CPFLOAT_INF_USE`, the target format
252 |    * supports signed infinities. If the field is set to `CPFLOAT_INF_NO`,
253 |    * infinities are replaced with a quiet NaN.
254 |    */
255 |   cpfloat_infinity_t infinity;
256 |   /**
257 |    * @brief Rounding mode to be used for the conversion.
258 |    *
259 |    * @details The values of this field are consistent with those of the MATLAB
260 |    *function `chop`.
261 |    *
262 |    * Possible values are:
263 |    * + CPFLOAT_RND_NA for round-to-nearest with ties-to-away;
264 |    * + CPFLOAT_RND_NZ for round-to-nearest with ties-to-zero;
265 |    * + CPFLOAT_RND_NE for round-to-nearest with ties-to-even;
266 |    * + CPFLOAT_RND_TP for round-to-+&infin;
267 |    * + CPFLOAT_RND_TN for round-to-&minus;&infin;
268 |    * + CPFLOAT_RND_TZ for round-to-zero;
269 |    * + CPFLOAT_RND_SP for stochastic rounding with proportional probabilities;
270 |    * + CPFLOAT_RND_SE for stochastic rounding with equal probabilities;
271 |    * + CPFLOAT_RND_OD for round-to-odd; and
272 |    * + CPFLOAT_NO_RND for no rounding.
273 |    *
274 |    * No rounding is performed if this field is set to any other value.
275 |    *
276 |    * The validation functions cpfloatf_validate_optstruct() and
277 |    * cpfloat_validate_optstruct() return a warning code if a value other than
278 |    * those in the list above is specified.
279 |    */
280 |   cpfloat_rounding_t round;
281 |   /**
282 |    * @brief Support for saturation arithmetic in target format.
283 |    *
284 |    * @details If this field is set to `CPFLOAT_SAT_USE`, numbers too large to be
285 |    * represented in the target format are clamped to the largest floating-point
286 |    * number of appropriate sign. If this field is set to `CPFLOAT_SAT_NO`,
287 |    * numbers that are too large to be represented are rounded to either the
288 |    * largest normal value of appropriate sign or the closest infinity according
289 |    * to the current rounding mode.
290 |    */
291 |   cpfloat_saturation_t saturation;
292 |   /**
293 |    * @brief Support for subnormal numbers in target format.
294 |    *
295 |    * @details Subnormal numbers are supported if this field is set to
296 |    * `CPFLOAT_SUBN_USE` and rounded to a normal number according to the current
297 |    * rounding mode if it is set to `CPFLOAT_SUBN_RND`.
298 |    */
299 |   cpfloat_subnormal_t subnormal;
300 | 
301 |   /* Bit flips. */
302 |   /**
303 |    * @brief Support for soft errors.
304 |    *
305 |    * @details If this field is not set to `CPFLOAT_SOFTERR_NO`, a single bit
306 |    * flip is introduced in the binary floating-point representation of the
307 |    * rounded result with probability `p`. The bit flip can strike only the
308 |    * target-format fraction (significand without the implicit bit) if this field
309 |    * is set to `CPFLOAT_SOFTERR_FRAC` and any bit in the target-format
310 |    * representation if it is set to `CPFLOAT_SOFTERR_FP`.
311 |    */
312 |   cpfloat_softerr_t flip;
313 |   /**
314 |    * @brief Probability of bit flips.
315 |    *
316 |    * @details The probability of flipping a single bit in the binary
317 |    * floating-point representation or in the fraction (significand without the
318 |    * implicit bit) of a number after rounding. This field is ignored if `flip`
319 |    * is set to `CPFLOAT_SOFTERR_NO`.
320 |    *
321 |    * The validation functions cpfloatf_validate_optstruct() and
322 |    * cpfloat_validate_optstruct() return an error code if `flip` is set to
323 |    * `CPFLOAT_FP_SOFTERR` or `CPFLOAT_SOFTERR_FRAC` and this field does not
324 |    * contain a number in the interval [0,1].
325 |    */
326 |   double p;
327 | 
328 |   /* Internal: state of pseudo-random number generator. */
329 |   /**
330 |    * @brief Internal state of pseudo-random number generator for single bits.
331 |    *
332 |    * @details This field is used to store the internal state of the random
333 |    *  number generator used when @ref round is set to `CPFLOAT_RND_SE`. This
334 |    *  value should be initialized to `NULL`.
335 |    */
336 |   cpfloat_bitseed_t *bitseed;
337 |   /**
338 |    * @brief Internal state of pseudo-random number generator for `float`s.
339 |    *
340 |    * @details This field is used to store the internal state of the random
341 |    *  number generator used when @ref round is set to `CPFLOAT_RND_SP` and
342 |    *  `float` arrays are used. This value should be initialized to `NULL`.
343 |    */
344 |   cpfloat_randseedf_t *randseedf;
345 |   /**
346 |    * @brief Internal state of pseudo-random number generator for `double`s.
347 |    *
348 |    * @details This field is used to store the internal state of the random
349 |    *  number generator used when @ref round is set to `CPFLOAT_RND_SP` and
350 |    *  `double` arrays are used. This value should be initialized to `NULL`.
351 |    */
352 |   cpfloat_randseed_t *randseed;
353 | } optstruct;
354 | 
355 | /**
356 |  @brief Allocate @ref optstruct struct to store parameters of target format.
357 | 
358 |  @details This function allocates and initializes an @ref optstruct struct.
359 | 
360 |  @return The function returns a pointer to the allocated memory if the
361 |  execution was successful, and @b NULL otherwise.<p/>
362 |  */
363 | optstruct *init_optstruct();
364 | 
365 | /**
366 |  @brief Free the memory underlying an @ref optstruct struct.
367 | 
368 |  @details This function attempts to free all the memory used by @p fpopts.
369 | 
370 |  @param[in] fpopts Pointer to @ref optstruct struct to be deallocated.
371 | 
372 |  @return The function returns @p 0 if the unless @p fpopts is set to @p NULL,
373 |  in which case it return @p -1.<p/>
374 |  */
375 | int free_optstruct(optstruct *fpopts);
376 | 
377 | #endif /* #ifndef _CHOPFAST_DEFINITIONS_ */
378 | 
379 | /*
380 |  * CPFloat - Custom Precision Floating-point numbers.
381 |  *
382 |  * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
383 |  *
384 |  * This library is free software; you can redistribute it and/or modify it under
385 |  * the terms of the GNU Lesser General Public License as published by the Free
386 |  * Software Foundation; either version 2.1 of the License, or (at your option)
387 |  * any later version.
388 |  *
389 |  * This library is distributed in the hope that it will be useful, but WITHOUT
390 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
391 |  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
392 |  * details.
393 |  *
394 |  * You should have received a copy of the GNU Lesser General Public License along
395 |  * with this library; if not, write to the Free Software Foundation, Inc., 51
396 |  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
397 |  */
398 | 


--------------------------------------------------------------------------------
/src/cpfloat_docmacros.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */
  2 | /* SPDX-License-Identifier: LGPL-2.1-or-later                         */
  3 | 
  4 | #ifndef _CPFLOAT_DOCMACROS_
  5 | #define _CPFLOAT_DOCMACROS_
  6 | 
  7 | #define doc_cpfloat_validate_optstruct(FPTYPE, PMIN, PMAX, EMIN, EMAX) \
  8 | /** \
  9 |  @brief Validate fields of @ref optstruct struct for `FPTYPE` storage format. \
 10 |  \
 11 |  @details This function checks whether the parameters stored in @p fpopts are \
 12 |  valid when `FPTYPE` is used as storage format. \
 13 |  \
 14 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
 15 |  and the probability of soft errors striking the rounded values. \
 16 |  \
 17 |  @return The function returns @b 0 if all the parameters are valid, and a \
 18 |  positive number if at least one of them is not. A negative number should be \
 19 |  understood as a warning, and indicates that a CPFloat function @em will \
 20 |  return @p 0 if @p fpopts is used as fourth argument, but might not perform as \
 21 |  intended.<p/>\
 22 |  \
 23 |  Possible return values are: \
 24 |  \li @b -4 The rounding mode specified in @p fpopts->round does not correspond \
 25 |  to a valid choice, thus no rounding will be performed. \
 26 |  \li @b -2 The required number of digits in @p fpopts->precision is between \
 27 |  PMIN and PMAX inclusive, which might cause double rounding if round-to-\
 28 |  nearest is used. \
 29 |  \li @b -1 The string in @p fpopts->format is not valid. This is not an error \
 30 |  as this value is not used by the C functions, but only by the MEX interface. \
 31 |  \li @b  0 All the parameters in @p fpopts are valid. \
 32 |  \li @b  2 The required number of digits in @p fpopts->precision is larger \
 33 |  than PMAX, the number of significant digits in a variable of type `FPTYPE`. \
 34 |  \li @b  3 The required minimum exponent in @p fpopts->emin is larger than \
 35 |  EMIN, the largest possible exponent for a variable of type `FPTYPE`, or \
 36 |  the required maximum exponent in @p fpopts->emax is larger than        \
 37 |  EMAX, the largest possible exponent for a variable of type `FPTYPE`. \
 38 |  \li @b  5 The value of @p fpopts->flip indicates that soft errors should be \
 39 |  introduced, but @p fpopts->p is not a real number between 0 and 1 and thus \
 40 |  does not represent a valid probability.<p/>\
 41 |  \
 42 |  Errors take precedence over warnings, thus a nonpositive return value \
 43 |  implies no errors. In case of multiple issues, the return value is that of \
 44 |  the first error (or warning, if no error is present) encountered in the order \
 45 |  given in the list above. \
 46 |  */
 47 | 
 48 | #define doc_cpfloat(FPTYPE, PMAX, EMIN, EMAX) \
 49 | /** \
 50 |  @brief Round `FPTYPE` array to lower precision. \
 51 |  \
 52 |  @details If the function executes without errors, then the array @p X \
 53 |  contains the @p numelem entries of the array @p A rounded to a \
 54 |  lower-precision target format. The parameters of the target format and the \
 55 |  rounding mode to be used are encoded in @p fpopts. If required, the function \
 56 |  flips one bit in some of the entries of @p X.<p/>\
 57 |  \
 58 |  If OpenMP support is specified at compile time, several OpenMP threads are \
 59 |  used if @p numelem is large enough. This parameter is machine-dependent. <p/>\
 60 |  \
 61 |  @param[out] X Array of rounded values. \
 62 |  @param[in] A Input array. \
 63 |  @param[in] numelem Number of elements in @p X and @p A. \
 64 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
 65 |  and the probability of soft errors striking the rounded values. \
 66 |  \
 67 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
 68 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
 69 |  than EMAX, and @b 0 otherwise.                                    \
 70 |  */
 71 | 
 72 | #define doc_cpf_univariate(MATHFUN, FUNSTRING, PMAX, EMIN, EMAX) \
 73 | /** \
 74 |  @brief Compute MATHFUN rounded to lower precision. \
 75 |  \
 76 |  @details If the function executes without errors, then
 77 |  FUNSTRING \
 78 |  rounded to a lower-precision target format. The parameters of the \
 79 |  target format and the rounding mode to be used are encoded in @p fpopts. If \
 80 |  required, the function flips one bit in some of the entries of @p X.<p/>\
 81 |  \
 82 |  If OpenMP support is specified at compile time, several OpenMP threads are \
 83 |  used if @p numelem is large enough. This parameter is machine dependent.\
 84 |  \
 85 |  @param[out] X Array of rounded values. \
 86 |  @param[in] A Input array. \
 87 |  @param[in] numelem Number of elements in @p X and @p A. \
 88 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
 89 |  and the probability of soft errors striking the rounded values. \
 90 |  \
 91 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
 92 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
 93 |  than EMAX, and @b 0 otherwise.                                    \
 94 | */
 95 | 
 96 | #define doc_cpf_univariate_nobitflip(MATHFUN, FUNSTRING, PMAX, EMIN, EMAX) \
 97 | /** \
 98 |  @brief Compute MATHFUN in lower precision. \
 99 |  \
100 |  @details If the function executes without errors, then
101 |  FUNSTRING \
102 |  rounded to a lower-precision target format. The parameters of the \
103 |  target format and the rounding mode to be used are encoded in @p fpopts.<p/>\
104 |  \
105 |  If OpenMP support is specified at compile time, several OpenMP threads are \
106 |  used if @p numelem is large enough. This parameter is machine dependent.\
107 |  \
108 |  @param[out] X Array of rounded values. \
109 |  @param[in] A Input array. \
110 |  @param[in] numelem Number of elements in @p X and @p A. \
111 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
112 |  and the probability of soft errors striking the rounded values. \
113 |  \
114 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
115 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
116 |  than EMAX, and @b 0 otherwise.                                    \
117 | */
118 | 
119 | #define doc_cpf_bivariate(MATHFUN, FUNSTRING, PMAX, EMIN, EMAX) \
120 | /** \
121 |  @brief Compute MATHFUN in lower precision. \
122 |  \
123 |  @details If the function executes without errors, then \
124 |   FUNSTRING \
125 |  rounded to a lower-precision target format. The parameters of the \
126 |  target format and the rounding mode to be used are encoded in @p fpopts. If \
127 |  required, the function flips one bit in some of the entries of @p X.<p/>\
128 |  \
129 |  If OpenMP support is specified at compile time, several OpenMP threads are \
130 |  used if @p numelem is large enough. This parameter is machine dependent.\
131 |  \
132 |  @param[out] X Array of rounded values. \
133 |  @param[in] A Input array. \
134 |  @param[in] B Input array. \
135 |  @param[in] numelem Number of elements in @p X, @p A, and @p B. \
136 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
137 |  and the probability of soft errors striking the rounded values. \
138 |  \
139 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
140 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
141 |  than EMAX, and @b 0 otherwise.                                    \
142 | */
143 | 
144 | #define doc_cpf_trivariate(MATHFUN, FUNSTRING, PMAX, EMIN, EMAX) \
145 | /** \
146 |  @brief Compute MATHFUN in lower precision. \
147 |  \
148 |  @details If the function executes without errors, then \
149 |  FUNSTRING \
150 |  rounded to a lower-precision target format. The parameters of the \
151 |  target format and the rounding mode to be used are encoded in @p fpopts. If \
152 |  required, the function flips one bit in some of the entries of @p X.<p/>\
153 |  \
154 |  If OpenMP support is specified at compile time, several OpenMP threads are \
155 |  used if @p numelem is large enough. This parameter is machine dependent.\
156 |  \
157 |  @param[out] X Array of rounded values. \
158 |  @param[in] A Input array. \
159 |  @param[in] B Input array. \
160 |  @param[in] C Input array. \
161 |  @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \
162 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
163 |  and the probability of soft errors striking the rounded values. \
164 |  \
165 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
166 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
167 |  than EMAX, and @b 0 otherwise.                                    \
168 | */
169 | 
170 | #define doc_cpf_frexp(PMAX, EMIN, EMAX) \
171 | /** \
172 |  @brief Exponent and normalized fraction of rounded floating-point number. \
173 |  \
174 |  @details If the function executes without errors, then: \
175 |  \li if \f$ A_i \f$ is 0, then \f$ X_i \f$ and \f$ \exp_i \f$ are both set to \
176 |  zero;\
177 |  \li otherwise, \f$ X_i \f$ is a value in the range \f$ (-1;-0.5] \cup
178 |  [0.5; 1) \f$ and \f$ \exp_i \f$ is an integer such that \f$ 2^{\exp_i} \
179 |  \times X_i \f$ is equal to \f$ A_i \f$ rounded to a lower-precision target \
180 |  format.<p/>\
181 |  \
182 |  The parameters of the target format and the rounding mode to be used are \
183 |  encoded in @p fpopts. If \ required, the function flips one bit in some of \
184 |  the entries of @p X.<p/>\
185 |  \
186 |  If OpenMP support is specified at compile time, several OpenMP threads are \
187 |  used if @p numelem is large enough. This parameter is machine dependent.\
188 |  \
189 |  @param[out] X Array of floating-point values in \
190 |  \f$ (-1;-0.5] \f$, \f$ [0.5; 1) \f$. \
191 |  @param[out] exp Array of integer exponents. \
192 |  @param[in] A Input array. \
193 |  @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \
194 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
195 |  and the probability of soft errors striking the rounded values. \
196 |  \
197 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
198 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
199 |  than EMAX, and @b 0 otherwise.                                    \
200 | */
201 | 
202 | #define doc_cpf_scaling(BASE, PMAX, EMIN, EMAX) \
203 | /** \
204 |  @brief Scale number by power of BASE in lower precision. \
205 |  \
206 |  @details If the function executes without errors, then \f$ X_i = A_i \times \
207 |  \mathrm{BASE}^{\exp_i} \f$ rounded to a lower-precision target format. \
208 |  The parameters of the target format and the rounding mode to be used are \
209 |  encoded in @p fpopts. If required, the function flips one bit in some of the \
210 |  entries of @p X.<p/>\
211 |  \
212 |  If OpenMP support is specified at compile time, several OpenMP threads are \
213 |  used if @p numelem is large enough. This parameter is machine dependent.\
214 |  \
215 |  @param[out] X Array of rounded values. \
216 |  @param[in] A Input array. \
217 |  @param[in] exp Array of integer exponents. \
218 |  @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \
219 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
220 |  and the probability of soft errors striking the rounded values. \
221 |  \
222 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
223 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
224 |  than EMAX, and @b 0 otherwise.                                    \
225 | */
226 | 
227 | #define doc_cpf_modf(PMAX, EMIN, EMAX) \
228 | /** \
229 |  @brief Compute integral and fractional part. \
230 |  \
231 |  @details If the function executes without errors, then \f$ X_i \f$ is a value \
232 |  the range \f$ (-1,1) \f$ and \f$ \mathrm{intpart}_i \f$ is an integer such \
233 |  that \f$ X_i + \mathrm{intpart}_i \f$ is equal to \f$ A_i \f$ rounded to a \
234 |  lower-precision target format. The parameters of the target format and the \
235 |  rounding mode to be used are encoded in @p fpopts. If required, the function \
236 |  flips one bit in some of the entries of @p X.<p/>\
237 |  \
238 |  If OpenMP support is specified at compile time, several OpenMP threads are \
239 |  used if @p numelem is large enough. This parameter is machine dependent.\
240 |  \
241 |  @param[out] X Array of floating-point values in (-1, 1). \
242 |  @param[out] intpart Array of integer parts. \
243 |  @param[in] A Input array. \
244 |  @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \
245 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
246 |  and the probability of soft errors striking the rounded values. \
247 |  \
248 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
249 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
250 |  than EMAX, and @b 0 otherwise.                                    \
251 | */
252 | 
253 | #define doc_cpf_ilogb(PMAX, EMIN, EMAX) \
254 | /** \
255 |  @brief Compute integral part of the logarithm of the absolute value. \
256 |  \
257 |  @details If the function executes without errors, the integer \f$ \exp_i \f$ \
258 |  is the exponent used internally to express the floating-point value \
259 |  \f$ A_i \f$ rounded to a lower-precision target format. In other words, \
260 |  \f$ X_i \f$ is equal to \f$ \mathrm{trunc}(\log_b^{\lvert A_i \rvert}) \f$ \
261 |  where \f$ b = \mathrm{FLT\_RADIX} \f$ is typically 2. The parameters of the \
262 |  target format and the rounding mode to be used are encoded in @p fpopts.<p/>\
263 |  \
264 |  If OpenMP support is specified at compile time, several OpenMP threads are \
265 |  used if @p numelem is large enough. This parameter is machine dependent.\
266 |  \
267 |  @param[out] exp Array of floating-point values in \f$ (-1, 1) \f$. \
268 |  @param[in] A Input array. \
269 |  @param[in] numelem Number of elements in @p X, @p A, @p B, and @p C. \
270 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
271 |  and the probability of soft errors striking the rounded values. \
272 |  \
273 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
274 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
275 |  than EMAX, and @b 0 otherwise.                                    \
276 | */
277 | 
278 | #define doc_cpf_rint(PMAX, EMIN, EMAX) \
279 | /** \
280 |  @brief Compute the closest integer with specified rounding mode. \
281 |  \
282 |  @details If the function executes without errors, then \f$ X_i \f$ is the \
283 |  integral part of \f$ A_i \f$ rounded to a lower-precision target format and \
284 |  \f$ \mathrm{exception}_i \f$ is set to 0 if \f$ X_i \f$ is equal to \
285 |  \f$ A_i \f$ and to FE_INEXACT otherwise. The parameters of the target format \
286 |  and the rounding mode to be used are encoded in @p fpopts. If required, the \
287 |  function flips one bit in some of the entries of @p X.<p/>\
288 |  \
289 |  If OpenMP support is specified at compile time, several OpenMP threads are \
290 |  used if @p numelem is large enough. This parameter is machine dependent.\
291 |  \
292 |  @param[out] X Array of rounded values. \
293 |  @param[out] exception Array of floating-point exceptions. \
294 |  @param[in] A Input array. \
295 |  @param[in] numelem Number of elements in @p X and @p A. \
296 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
297 |  and the probability of soft errors striking the rounded values. \
298 |  \
299 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
300 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
301 |  than EMAX, and @b 0 otherwise.                                    \
302 | */
303 | 
304 | #define doc_cpf_nearbyint(PMAX, EMIN, EMAX) \
305 | /** \
306 |  @brief Compute the closest integer with specified rounding mode. \
307 |  \
308 |  @details If the function executes without errors, then \f$ X_i \f$ is the \
309 |  integral part of \f$ A_i \f$ rounded to lower-precision target format. \
310 |  The parameters of the target format and the rounding mode to be used are \
311 |  encoded in @p fpopts. If required, the function flips one bit in some of the \
312 |  entries of @p X.<p/>\
313 |  \
314 |  If OpenMP support is specified at compile time, several OpenMP threads are \
315 |  used if @p numelem is large enough. This parameter is machine dependent.\
316 |  \
317 |  @param[out] X Array of rounded values. \
318 |  @param[in] A Input array. \
319 |  @param[in] numelem Number of elements in @p X and @p A. \
320 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
321 |  and the probability of soft errors striking the rounded values. \
322 |  \
323 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
324 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
325 |  than EMAX, and @b 0 otherwise.                                    \
326 | */
327 | 
328 | 
329 | #define doc_cpf_remquo(PMAX, EMIN, EMAX) \
330 | /** \
331 |  @brief Compute reminder and quotient of rounded numbers. \
332 |  \
333 |  @details If the function executes without errors, then \f$ \mathrm{quot}_i \f$ \
334 |  and \f$ X_i \f$ are the (integral) quotient and the reminder of the division \
335 |  \f$ A_i / B_i \f$  with \f$ A_i \f$ and \f$ B_i \f$ rounded to a \
336 |  lower-precision target format. The parameters of the target format and the \
337 |  rounding mode to be used are encoded in @p fpopts. If required, the function \
338 |  flips one bit in some of the entries of @p X.<p/>\
339 |  \
340 |  If OpenMP support is specified at compile time, several OpenMP threads are \
341 |  used if @p numelem is large enough. This parameter is machine dependent.\
342 |  \
343 |  @param[out] X Array of reminders. \
344 |  @param[out] quot Array of quotients. \
345 |  @param[in] A Input array. \
346 |  @param[in] B Input array. \
347 |  @param[in] numelem Number of elements in @p X and @p A. \
348 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
349 |  and the probability of soft errors striking the rounded values. \
350 |  \
351 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
352 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
353 |  than EMAX, and @b 0 otherwise.                                    \
354 | */
355 | 
356 | #define doc_cpf_fpclassify(PMAX, EMIN, EMAX) \
357 | /** \
358 |  @brief Categorize floating-point values. \
359 |  \
360 |  @details If the function executes without errors, then \f$ r_i \f$ has value: \
361 |  \li FP_INFINITE, if \f$ A_i \f$ is finite in the lower-precising target format; \
362 |  \li FP_NAN, if \f$ A_i \f$ is a NaN in the lower-precising target format; \
363 |  \li FP_NORMAL, if \f$ A_i \f$ is normal in the lower-precising target format; \
364 |  \li FP_SUBNORMAL, if \f$ A_i \f$ is subnormal in the lower-precising target format; and \
365 |  \li FP_ZERO, if \f$ A_i \f$ is zero in the lower-precising target format. <p/> \
366 |  The parameters of the target format and the rounding mode to be used are \
367 |  encoded in @p fpopts.<p/>\
368 |  \
369 |  If OpenMP support is specified at compile time, several OpenMP threads are \
370 |  used if @p numelem is large enough. This parameter is machine dependent.\
371 |  \
372 |  @param[out] r Array of classes. \
373 |  @param[in] A Input array. \
374 |  @param[in] numelem Number of elements in @p X and @p A. \
375 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
376 |  and the probability of soft errors striking the rounded values. \
377 |  \
378 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
379 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
380 |  than EMAX, and @b 0 otherwise.                                    \
381 | */
382 | 
383 | #define doc_cpf_isfun(STRING, PMAX, EMIN, EMAX) \
384 | /** \
385 |  @brief Check whether value is STRING in lower precision target format. \
386 |  \
387 |  @details If the function executes without errors, then \f$ r_i \f$ is a \
388 |  nonzero integral value if \f$ A_i \f$ is STRING in a lower-precision target \
389 |  format, and zero otherwise. The parameters of the target format and the \
390 |  rounding mode to be used are encoded in @p fpopts.<p/>\
391 |  \
392 |  If OpenMP support is specified at compile time, several OpenMP threads are \
393 |  used if @p numelem is large enough. This parameter is machine dependent.\
394 |  \
395 |  @param[out] r Array of Boolean values. \
396 |  @param[in] A Input array. \
397 |  @param[in] numelem Number of elements in @p X and @p A. \
398 |  @param[in] fpopts Parameters describing the target format, the rounding mode, \
399 |  and the probability of soft errors striking the rounded values. \
400 |  \
401 |  @return The function returns @b 1 if @p fpopts->precision is larger than \
402 |  PMAX, @b 2 if @p fptops->emin is smaller than EMIN or fpopts->emax is larger \
403 |  than EMAX, and @b 0 otherwise.                                    \
404 | */
405 | 
406 | #endif  /* #ifndef _CPFLOAT_DOCMACROS_ */
407 | 
408 | /*
409 |  * CPFloat - Custom Precision Floating-point numbers.
410 |  *
411 |  * Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
412 |  *
413 |  * This library is free software; you can redistribute it and/or modify it under
414 |  * the terms of the GNU Lesser General Public License as published by the Free
415 |  * Software Foundation; either version 2.1 of the License, or (at your option)
416 |  * any later version.
417 |  *
418 |  * This library is distributed in the hope that it will be useful, but WITHOUT
419 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
420 |  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
421 |  * details.
422 |  *
423 |  * You should have received a copy of the GNU Lesser General Public License along
424 |  * with this library; if not, write to the Free Software Foundation, Inc., 51
425 |  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
426 |  */
427 | 


--------------------------------------------------------------------------------
/src/cpfloat_threshold_binary32.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */
 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later                         */
 3 | 
 4 | /**
 5 |  * @file cpfloat_threshold_binary32.h
 6 |  * @brief Size of smallest `float` array on which to use multiple OpenMP threads.
 7 |  */
 8 | 
 9 | /**
10 |  * @brief Size of smallest array on which cpfloatf() uses multiple threads.
11 |  *
12 |  * @details Threshold for switching between cpfloatf_sequential() and
13 |  * cpfloatf_parallel() in cpfloatf(). The value of this constant is ignored
14 |  * if the file that includes cpfloat_binary32.h is compiled without OpenMP
15 |  * support.
16 |  */
17 | #define OPENMP_THRESHOLD_float 1
18 | 


--------------------------------------------------------------------------------
/src/cpfloat_threshold_binary64.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis */
 2 | /* SPDX-License-Identifier: LGPL-2.1-or-later                         */
 3 | 
 4 | /**
 5 |  * @file cpfloat_threshold_binary64.h
 6 |  * @brief Size of smallest `double` array on which to use multiple OpenMP threads.
 7 |  */
 8 | 
 9 | /**
10 |  * @brief Size of smallest array on which cpfloat() uses multiple threads.
11 |  *
12 |  * @details Threshold for switching between cpfloat_sequential() and
13 |  * cpfloat_parallel() in cpfloat(). The value of this constant is ignored
14 |  * if the file that includes cpfloat_binary64.h is compiled without OpenMP
15 |  * support.
16 |  */
17 | #define OPENMP_THRESHOLD_double 1
18 | 


--------------------------------------------------------------------------------
/test/cpfloat_test.m:
--------------------------------------------------------------------------------
  1 | % SPDX-FileCopyrightText: 2020 Massimiliano Fasi and Mantas Mikaitis
  2 | % SPDX-License-Identifier: LGPL-2.1-or-later
  3 | 
  4 | function cpfloat_test
  5 | %TEST_CPFLOAT Test the cpfloat function.
  6 | %   The tests are for single precision and fp16.
  7 | 
  8 |   clear cpfloat fp options options2 assert_eq
  9 | 
 10 |   usingoctave = exist('OCTAVE_VERSION', 'builtin');
 11 | 
 12 |   if usingoctave
 13 |     rand('seed', 1);
 14 |   else
 15 |     rng(1);
 16 |   end
 17 | 
 18 |   n = 0;
 19 |   uh = 2^(-11);  % Unit roundoff for fp16.
 20 |   pi_h = 6432*uh; % fp16(pi)
 21 | 
 22 |   % Check handling of defaults and persistent variable.
 23 |   fp.format = 'bfloat16';
 24 |   [~,options] = cpfloat(pi,fp);
 25 |   assert_eq(fp.format,options.format)
 26 |   assert_eq(options.subnormal,0)
 27 | 
 28 |   fp.format = [];
 29 |   [~,options] = cpfloat(pi,fp);
 30 |   assert_eq(options.format,'h')  % Check default;
 31 | 
 32 |   fp.explim = [];
 33 |   [~,options] = cpfloat(pi,fp);
 34 |   assert_eq(options.explim,1)  % Check default.
 35 | 
 36 |   fp.explim = 0;
 37 |   [~,options] = cpfloat(pi,fp);
 38 |   assert_eq(options.explim,0)  % Check no default.
 39 | 
 40 |   fp.round = [];
 41 |   [~,options] = cpfloat(pi,fp);
 42 |   assert_eq(options.round,1)  % Check default.
 43 | 
 44 |   fp.saturation = 1;
 45 |   [~,options] = cpfloat(pi,fp);
 46 |   assert_eq(options.saturation,1)
 47 | 
 48 |   fp.saturation = [];
 49 |   [~,options] = cpfloat(pi,fp);
 50 |   assert_eq(options.saturation,0)  % Check default;
 51 | 
 52 |   fp.subnormal = 0;
 53 |   [~,options] = cpfloat(pi,fp);
 54 |   assert_eq(options.subnormal,0)
 55 | 
 56 |   fp.subnormal = [];
 57 |   [~,options] = cpfloat(pi,fp);
 58 |   assert_eq(options.subnormal,1)  % Check default;
 59 | 
 60 |   fp.flip = [];
 61 |   [~,options] = cpfloat(pi,fp);
 62 |   assert_eq(options.flip,0)  % Check no default.
 63 | 
 64 |   clear cpfloat fp options
 65 |   fp.flip = 1;
 66 |   [~,options] = cpfloat([],fp);
 67 |   assert_eq(options.format,'h')
 68 |   assert_eq(options.round,1)
 69 |   assert_eq(options.saturation,0)
 70 |   assert_eq(options.subnormal,1)
 71 | 
 72 |   clear cpfloat fp options
 73 |   % check all default options
 74 |   fp.format = [];
 75 |   fp.round = [];
 76 |   fp.saturation = [];
 77 |   fp.subnormal = [];
 78 |   fp.flip = [];
 79 |   fp.p = [];
 80 |   [~,options] = cpfloat(pi,fp);
 81 |   assert_eq(options.format,'h')
 82 |   assert_eq(options.round,1)
 83 |   assert_eq(options.saturation,0)
 84 |   assert_eq(options.subnormal,1)
 85 |   assert_eq(options.flip,0)
 86 |   assert_eq(options.p,0.5)
 87 |   % % Takes different path from previous test since fpopts exists.
 88 |   % fp.subnormal = 0;
 89 |   % fp.format = []; [c,options] = cpfloat(pi,fp);
 90 |   % assert_eq(options.format,'h')
 91 | 
 92 |   % Check flip output.
 93 |   clear cpfloat fp
 94 |   fp.flip = 1; fp.format = 'd';
 95 |   c = ones(8,1);
 96 |   d = cpfloat(c,fp);
 97 |   assert_eq(norm(d-c,1)>0,true);
 98 |   d = cpfloat(c',fp);
 99 |   assert_eq(norm(d-c',1)>0,true);
100 |   fp.p = 0; % No bits flipped.
101 |   d = cpfloat(c,fp);
102 |   assert_eq(d,d);
103 |   fp.p = 1; % All bits flipped.
104 |   d = cpfloat(c,fp);
105 |   assert_eq(all(d ~= c),true);
106 | 
107 |   clear cpfloat
108 |   [~,fp] = cpfloat;
109 |   assert_eq(fp.subnormal,1)
110 |   assert_eq(fp.format,'h')
111 |   [~,options] = cpfloat(pi);
112 |   assert_eq(options.format,'h')
113 |   assert_eq(options.subnormal,1)
114 |   assert_eq(options.round,1)
115 |   assert_eq(options.flip,0)
116 |   assert_eq(options.p,0.5)
117 | 
118 |   clear fp
119 |   fp.format = 'd';
120 |   [~,options] = cpfloat(pi,fp);
121 |   assert_eq(options.format,'d')
122 |   assert_eq(options.subnormal,1)
123 |   assert_eq(options.params, [53 -1022 1023])
124 |   [~,fp] = cpfloat;
125 |   assert_eq(fp.format,'d')
126 |   assert_eq(fp.subnormal,1)
127 |   assert_eq(fp.params, [53 -1022 1023])
128 | 
129 |   clear fp
130 |   fp.format = 'E4M3';
131 |   [~,options] = cpfloat(pi,fp);
132 |   assert_eq(options.format,'E4M3')
133 |   assert_eq(options.infinity,0)
134 |   assert_eq(options.params, [4 -6 8])
135 |   [~,fp] = cpfloat;
136 |   assert_eq(fp.format,'E4M3')
137 |   assert_eq(fp.infinity,0)
138 |   assert_eq(fp.params, [4 -6 8])
139 | 
140 | 
141 |   clear fp
142 |   fp.format = 'bfloat16';
143 |   [~,options] = cpfloat(pi,fp);
144 |   assert_eq(options.format,'bfloat16')
145 |   assert_eq(options.subnormal,0)
146 |   assert_eq(options.params, [8 -126 127])
147 |   [~,fp] = cpfloat;
148 |   assert_eq(fp.format,'bfloat16')
149 |   assert_eq(fp.subnormal,0)
150 |   assert_eq(fp.params, [8 -126 127])
151 | 
152 |   clear cpfloat
153 |   [~,fp] = cpfloat;
154 |   fp.format = 'b';
155 |   fp = rmfield(fp, 'params');
156 |   [~,options] = cpfloat(pi,fp);
157 |   assert_eq(options.saturation,0) % No saturation if that field was empty.
158 |   assert_eq(options.subnormal,1) % No subnormals only if that field was empty.
159 | 
160 |   % Check these usages do not give an error.
161 |   c = cpfloat([]);
162 |   cpfloat([]);
163 |   cpfloat([],fp);
164 |   cpfloat(1,[]);
165 |   cpfloat(1,fp);
166 |   c = cpfloat(1,fp);
167 | 
168 |   % Test matrix.
169 |   options.format = 'b';
170 |   options = rmfield(options, 'params');
171 |   A = magic(4);
172 |   C = cpfloat(A,options);
173 |   assert_eq(A,C);
174 |   B = A + randn(size(A))*1e-12;
175 |   C = cpfloat(B,options);
176 |   assert_eq(A,C);
177 |   A2 = hilb(6);
178 |   C = cpfloat(A2);
179 | 
180 |   options.format = 'c';
181 |   options.params = [8 -126 127];  % bfloat16
182 |   C1 = cpfloat(A,options);
183 |   assert_eq(A,C1);
184 |   C2 = cpfloat(B,options);
185 |   assert_eq(A,C2);
186 |   assert_eq(C,cpfloat(A2));
187 | 
188 |   clear options
189 |   options.format = 'c';
190 |   options.params = [11 -14 15];  % h
191 |   options2.format = 'h';
192 |   A = hilb(6);
193 |   [X1,opt] = cpfloat(A,options);
194 |   [X2,opt2] = cpfloat(A,options2);
195 |   assert_eq(X1,X2)
196 |   assert_eq(cpfloat(A,opt),cpfloat(A,opt2));
197 | 
198 |   % Row vector
199 |   clear options
200 |   options.format = 'h';
201 |   A = -10:10;
202 |   C = cpfloat(A,options);
203 |   assert_eq(A,C);
204 |   B = A + randn(size(A))*1e-12;
205 |   C = cpfloat(B,options);
206 |   assert_eq(A,C);
207 | 
208 |   % Column vector
209 |   options.format = 's';
210 |   A = (-10:10)';
211 |   C = cpfloat(A,options);
212 |   assert_eq(A,C);
213 |   B = A + A.*rand(size(A))*1e-14;  % Keep 0 as 0.
214 |   C = cpfloat(B,options);
215 |   assert_eq(A,C);
216 | 
217 |   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
218 |   % Main loop: test single and half formats.
219 |   for i = 1:4
220 |     clear cpfloat fp options
221 | 
222 |     if i == 1
223 |       % Single precision tests.
224 |       [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('single');
225 |       options.format = 's';
226 |     elseif i == 2
227 |       % Half precision tests.
228 |       [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('half');
229 |       options.format = 'h';
230 |     elseif i == 3
231 |       % Quarter precision tests.
232 |       [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('q43');
233 |       options.format = 'E4M3';
234 |       % Modification for OCP compliant q43.
235 |       emin = -6; % Previously thought to be 1-emax=-7.
236 |       emax = 8;  % Previously thought to be 7
237 |       emins = emin + 1 - p; % Exponent of smallest subnormal number.
238 |       xmins = 2^emins;
239 |       xmin = 2^emin;
240 |       xmax = 2^emax * (2-2^(1-p));
241 |     elseif i == 4
242 |       % Quarter precision tests.
243 |       [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('q52');
244 |       options.format = 'E5M2';
245 |     end
246 |     options.subnormal = 0;
247 | 
248 |     x = pi;
249 |     if i == 1
250 |       y = double(single(x));
251 |     elseif i == 2
252 |       y = pi_h; % double(fp16(x));
253 |     elseif i == 3
254 |       y = 3.25;
255 |     elseif i == 4
256 |       y = 3.0;
257 |     end
258 |     c = cpfloat(x,options);
259 |     assert_eq(c,y);
260 |     x = -pi;
261 |     c = cpfloat(x,options);
262 |     assert_eq(c,-y);
263 | 
264 |     % Next number power of 2.
265 |     y = 2^10;
266 |     if i == 1
267 |       dy = double(eps(single(y)));
268 |     elseif i == 2
269 |       dy = 2*y*uh; % double(eps(fp16(y)));
270 |     elseif i == 3
271 |       y = 2^4;
272 |       dy = 2*y*u;
273 |     elseif i == 4
274 |       y = 2^4;
275 |       dy = 2*y*u;
276 |     end
277 |     x = y + dy;
278 |     c = cpfloat(x,options);
279 |     assert_eq(c,x)
280 | 
281 |     % Number just before a power of 2.
282 |     x = y - dy;
283 |     c = cpfloat(x,options);
284 |     assert_eq(c,x)
285 | 
286 |     % Next number power of 2.
287 |     y = 2^(-4);
288 |     if i == 1
289 |       dy = double(eps(single(y)));
290 |     elseif i == 2
291 |       dy = 2*y*uh; % double(eps(fp16(y)));
292 |     elseif i == 3
293 |       dy = 2*y*u;
294 |     elseif i == 4
295 |       dy = 2*y*u;
296 |     end
297 |     x = y + dy;
298 |     c = cpfloat(x,options);
299 |     assert_eq(c,x)
300 | 
301 |     % Check other rounding options
302 |     for rmode = 1:6
303 |       options.round = rmode;
304 |       x = y + (dy*10^(-3));
305 |       c = cpfloat(x,options);
306 |       if options.round == 2
307 |         assert_eq(c,y+dy) % Rounding up.
308 |       elseif options.round >= 5
309 |         % Check rounded either up or down.
310 |         if c ~= y+dy
311 |           assert_eq(c,y);
312 |         end
313 |       else
314 |         assert_eq(c,y);
315 |       end
316 |     end
317 | 
318 |     % Overflow tests.
319 |     for j = 1:6
320 |       options.round = j;
321 |       x = xmax;
322 |       c = cpfloat(x,options);
323 |       assert_eq(c,x)
324 |     end
325 | 
326 |     % Saturation tests.
327 |     options.saturation = 1;
328 |     for j = 1:6
329 |       options.round = j;
330 |       x = inf;
331 |       c = cpfloat(x,options);
332 |       assert_eq(c,xmax)
333 |       c = cpfloat(-x,options);
334 |       assert_eq(c,-xmax)
335 |     end
336 | 
337 |     % Infinities tests.
338 |     [~,fpopts] = cpfloat;
339 |     prev_infinity = fpopts.infinity;
340 |     options.infinity = 1;
341 |     options.saturation = 0;
342 |     for j = 1:6
343 |       options.round = j;
344 |       x = inf;
345 |       c = cpfloat(x,options);
346 |       assert_eq(c,x)
347 |       c = cpfloat(-x,options);
348 |       assert_eq(c,-x)
349 |     end
350 | 
351 |     % IEEE 754-2019, page 27: rule for rounding to infinity.
352 |     % Round to nearest
353 |     options.round = 1; % reset the rounding mode to default
354 |     x = 2^emax * (2-(1/2)*2^(1-p));  % Round to inf.
355 |     c = cpfloat(x,options);
356 |     assert_eq(c,inf)
357 |     c = cpfloat(-x,options);
358 |     assert_eq(c,-inf)
359 | 
360 |     x = 2^emax * (2-(3/4)*2^(1-p));  % Round to realmax.
361 |     c = cpfloat(x,options);
362 |     assert_eq(c,xmax)
363 |     c = cpfloat(-x,options);
364 |     assert_eq(c,-xmax)
365 | 
366 |     % Round toward plus infinity
367 |     options.round = 2;
368 |     x = 2^emax * (2-(1/2)*2^(1-p));
369 |     c = cpfloat(x,options);
370 |     assert_eq(c,inf)
371 |     c = cpfloat(-x,options);
372 |     assert_eq(c,-xmax)
373 | 
374 |     % Round toward minus infinity
375 |     options.round = 3;
376 |     c = cpfloat(x,options);
377 |     assert_eq(c,xmax)
378 |     c = cpfloat(-x,options);
379 |     assert_eq(c,-inf)
380 | 
381 |     % Round toward zero
382 |     options.round = 4;
383 |     c = cpfloat(x,options);
384 |     assert_eq(c,xmax)
385 |     c = cpfloat(-x,options);
386 |     assert_eq(c,-xmax)
387 | 
388 |     % Round to nearest.
389 |     options.round = 1; % reset the rounding mode to default
390 |     if i == 2
391 |       x = 1 + 2^(-11);
392 |       c = cpfloat(x,options);
393 |       assert_eq(c,1)
394 |     end
395 | 
396 |     % Underflow tests.
397 |     if i == 1
398 |       delta = double(eps(single(1)));
399 |     else
400 |       delta = 2*uh; % double(eps(fp16(1)));
401 |     end
402 | 
403 |     options.subnormal = 1;
404 |     c = cpfloat(xmin,options);
405 |     assert_eq(c,xmin)
406 |     x = [xmins xmin/2 xmin 0 xmax 2*xmax 1-delta/5 1+delta/4];
407 |     c = cpfloat(x,options);
408 |     c_expected = [x(1:5) inf 1 1];
409 |     assert_eq(c,c_expected)
410 | 
411 |     options.subnormal = 0;
412 |     c = cpfloat(xmin,options);
413 |     assert_eq(c,xmin)
414 |     x = [xmins xmin/2 xmin 0 xmax 2*xmax 1-delta/5 1+delta/4];
415 |     c = cpfloat(x,options);
416 |     c_expected = [0 0 x(3:5) inf 1 1];
417 |     assert_eq(c,c_expected)
418 |     options.infinity = prev_infinity;
419 | 
420 |     % Smallest normal number and spacing between the subnormal numbers.
421 |     y = xmin; delta = xmin*2^(1-p);
422 |     x = y - delta; % The largest subnormal number.
423 |     options.subnormal = 1;
424 |     c = cpfloat(x,options);
425 |     assert_eq(c,x)
426 |     % Round up if subnormals are not supported.
427 |     options.subnormal = 0;
428 |     c = cpfloat(x,options);
429 |     assert_eq(c,xmin)
430 |     % Flush subnormals to zero if subnormals are not supported.
431 |     options.subnormal = 0;
432 |     c = cpfloat(xmins,options);
433 |     assert_eq(c,0)
434 | 
435 |     options.subnormal = 1;
436 |     x = xmins*8;  % A subnormal number.
437 |     c = cpfloat(x,options);
438 |     assert_eq(c,x)
439 | 
440 |     % Numbers smaller than smallest representable number.
441 |     options.subnormal = 0;
442 |     x = xmin / 2;
443 |     c = cpfloat(x,options);
444 |     assert_eq(c,0)
445 |     x = -xmin / 2;
446 |     c = cpfloat(x,options);
447 |     assert_eq(c,-0)
448 |     x = xmin / 4;
449 |     c = cpfloat(x,options);
450 |     assert_eq(c,0)
451 |     x = -xmin / 4;
452 |     c = cpfloat(x,options);
453 |     assert_eq(c,0)
454 | 
455 |     options.subnormal = 1;
456 |     x = xmins / 2;
457 |     c = cpfloat(x,options);
458 |     assert_eq(c,0)
459 |     x = -xmins / 2;
460 |     c = cpfloat(x,options);
461 |     assert_eq(c,0)
462 |     x = xmins / 4;
463 |     c = cpfloat(x,options);
464 |     assert_eq(c,0)
465 |     x = -xmins / 4;
466 |     c = cpfloat(x,options);
467 |     assert_eq(c,0)
468 | 
469 |     % Do not limit exponent.
470 |     options.explim = 0;
471 |     x = xmin/2;
472 |     c = cpfloat(x,options);
473 |     assert_eq(c,x)
474 |     x = -xmin/2;
475 |     c = cpfloat(x,options);
476 |     assert_eq(c,x)
477 |     x = xmax*2;
478 |     c = cpfloat(x,options);
479 |     assert_eq(c,x)
480 |     x = -xmax*2;
481 |     c = cpfloat(x,options);
482 |     assert_eq(c,x)
483 |     x = xmins/2;
484 |     c = cpfloat(x,options);
485 |     assert_eq(c,x)
486 |     x = -xmins/2;
487 |     c = cpfloat(x,options);
488 |     assert_eq(c,x)
489 |     A = [pi -pi; pi -pi];
490 |     C = cpfloat(A,options);
491 |     options.explim = 1;
492 |     assert_eq(C,cpfloat(A,options));
493 | 
494 |     % Round toward plus infinity
495 |     options.round = 2;
496 |     options.subnormal = 0;
497 |     x = xmin / 2;
498 |     c = cpfloat(x,options);
499 |     assert_eq(c,xmin)
500 |     x = -xmin / 2;
501 |     c = cpfloat(x,options);
502 |     assert_eq(c,0)
503 | 
504 |     options.subnormal = 1;
505 |     x = xmins / 2;
506 |     c = cpfloat(x,options);
507 |     assert_eq(c,xmins)
508 |     x = -xmins / 2;
509 |     c = cpfloat(x,options);
510 |     assert_eq(c,0)
511 |     x = xmins / 4;
512 |     c = cpfloat(x,options);
513 |     assert_eq(c,xmins)
514 |     x = -xmins / 4;
515 |     c = cpfloat(x,options);
516 |     assert_eq(c,0)
517 | 
518 |     % Round toward minus infinity
519 |     options.round = 3;
520 |     options.subnormal = 0;
521 |     x = xmin / 2;
522 |     c = cpfloat(x,options);
523 |     assert_eq(c,0)
524 |     x = -xmin / 2;
525 |     c = cpfloat(x,options);
526 |     assert_eq(c,-xmin)
527 | 
528 |     options.subnormal = 1;
529 |     x = xmins / 2;
530 |     c = cpfloat(x,options);
531 |     assert_eq(c,0)
532 |     x = -xmins / 2;
533 |     c = cpfloat(x,options);
534 |     assert_eq(c,-xmins)
535 |     x = xmins / 4;
536 |     c = cpfloat(x,options);
537 |     assert_eq(c,0)
538 |     x = -xmins / 4;
539 |     c = cpfloat(x,options);
540 |     assert_eq(c,-xmins)
541 | 
542 |     % Round toward zero.
543 |     options.round = 4;
544 |     options.subnormal = 0;
545 |     x = xmin / 2;
546 |     c = cpfloat(x,options);
547 |     assert_eq(c,0)
548 |     x = -xmin / 2;
549 |     c = cpfloat(x,options);
550 |     assert_eq(c,0)
551 | 
552 |     options.subnormal = 1;
553 |     x = xmins / 2;
554 |     c = cpfloat(x,options);
555 |     assert_eq(c,0)
556 |     x = -xmins / 2;
557 |     c = cpfloat(x,options);
558 |     assert_eq(c,0)
559 |     x = xmins / 4;
560 |     c = cpfloat(x,options);
561 |     assert_eq(c,0)
562 |     x = -xmins / 4;
563 |     c = cpfloat(x,options);
564 |     assert_eq(c,0)
565 | 
566 |   end % for i
567 |   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
568 |   clear options
569 | 
570 |   % Test rounding with CHOPFAST versus native rounding.
571 |   options.format = 's';
572 |   m = 100;
573 |   y = zeros(3,n);
574 |   z = y;
575 |   for i = 1:m
576 |     x = randn;
577 |     options.round = 2;
578 |     y(i,1) = cpfloat(x,options);
579 |     options.round = 3;
580 |     y(i,2) = cpfloat(x,options);
581 |     options.round = 4;
582 |     y(i,3) = cpfloat(x,options);
583 |     if usingoctave
584 |       fesetround(inf);
585 |       z(i,1) = single(x);
586 |       fesetround(-inf);
587 |       z(i,2) = single(x);
588 |       fesetround(0);
589 |       z(i,3) = single(x);
590 |     else
591 |       % Use undocumented function to set rounding mode in MATLAB.
592 |       feature('setround',inf);
593 |       z(i,1) = single(x);
594 |       feature('setround',-inf);
595 |       z(i,2) = single(x);
596 |       feature('setround',0);
597 |       z(i,3) = single(x);
598 |     end
599 |   end
600 |   assert_eq(y,z)
601 |   % Switch back to round to nearest.
602 |   if usingoctave
603 |     fesetround(0.5);
604 |   else
605 |     feature('setround',0.5)
606 |   end
607 | 
608 |   % Double precision tests.
609 |   [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('d');
610 |   options.format = 'd';
611 |   x = [1e-309 1e-320 1 1e306];  % First two entries are subnormal.
612 |   c = cpfloat(x,options);
613 |   assert_eq(c,x)
614 |   options.subnormal = 0;
615 |   c = cpfloat(x,options);
616 |   assert_eq(c,[0 0 x(3:4)])
617 | 
618 |   options.format = 'd';
619 |   options.subnormal = 0;
620 |   cpfloat([],options);
621 |   a = cpfloat(pi);
622 |   assert_eq(a,pi)
623 | 
624 |   options.format = 'd';
625 |   options.subnormal = 1;
626 |   cpfloat([],options);
627 |   a = cpfloat(pi); assert_eq(a,pi)
628 | 
629 |   x = pi^2;
630 |   clear options
631 |   options.format = 'd';
632 |   y = cpfloat(x,options);  % Should not change x.
633 |   assert_eq(x,y);
634 |   options.round = 2;
635 |   y = cpfloat(x,options);  % Should not change x.
636 |   assert_eq(x,y);
637 |   options.round = 3;
638 |   y = cpfloat(x,options);  % Should not change x.
639 |   assert_eq(x,y);
640 |   options.round = 4;
641 |   y = cpfloat(x,options);  % Should not change x.
642 |   assert_eq(x,y);
643 | 
644 |   % Test on single inputs.
645 |   clear options
646 |   ps = single(pi);
647 |   pd = double(ps);
648 |   options.format = 'b';
649 |   ys = cpfloat(ps,options);
650 |   assert_eq(isa(ys,'single'),true)
651 |   yd = cpfloat(pd);
652 |   assert_eq(double(ys),yd)
653 | 
654 |   options.format = 'h';
655 |   options.round = 2;
656 |   as = single(rand(n,1));
657 |   ad = double(as);
658 |   delta = single(rand(n,1));
659 |   cd = cpfloat(ad + 1e-5*double(delta),options);
660 |   cs = cpfloat(as + 1e-5*delta,options);
661 |   assert_eq(cd,double(cs));
662 | 
663 |   options.format = 'c';
664 |   options.params = [11 -4 5];
665 |   temp1 = cpfloat(single(pi),options);
666 |   options.format = 'h';
667 |   options = rmfield(options, 'params');
668 |   temp2 = cpfloat(single(pi),options);
669 |   assert_eq(temp1,temp2)
670 | 
671 |   % Test base 2 logarithm
672 |   options.format = 'h';
673 |   options.round = 4;
674 |   x = single(2^-3 * (sum(2.^(-(0:23)))));
675 |   assert_eq(cpfloat(x,options), single(2^-3 * (sum(2.^(-(0:10))))))
676 | 
677 |   x = 2^-3 * (sum(2.^(-(0:52))));
678 |   assert_eq(cpfloat(x,options), 2^-3 * (sum(2.^(-(0:10)))))
679 | 
680 |   options.format = 's';
681 |   x = single(2^-3 * (sum(2.^(-(0:23)))));
682 |   assert_eq(cpfloat(x,options), x)
683 | 
684 |   x = 2^-3 * (sum(2.^(-(0:52))));
685 |   assert_eq(cpfloat(x,options), 2^-3 * (sum(2.^(-(0:23)))))
686 | 
687 |   options.format = 'd';
688 |   x = 2^-3 * (sum(2.^(-(0:52))));
689 |   assert_eq(cpfloat(x,options), x)
690 | 
691 |   options.round = 1;
692 |   temp = 0;
693 |   try
694 |     options.format = 'c';
695 |     options.params = [12 -4 5];
696 |     temp = cpfloat(single(pi),options); % Error - double rounding!
697 |   catch
698 |   end
699 |   assert_eq(temp,0)
700 |   try
701 |     options.format = 'c';
702 |     options.params = [26 -8 9];
703 |     temp = cpfloat(pi,options); % Error - double rounding!
704 |   catch
705 |   end
706 |   assert_eq(temp,0)
707 |   try
708 |     temp = cpfloat(complex(1,1)); % Error - complex data!
709 |   catch
710 |   end
711 |   assert_eq(temp,0)
712 | 
713 |   fprintf('All tests successful!\n')
714 | 
715 |   clear cpfloat fp options options2 assert_eq
716 | 
717 |   %%%%%%%%%%%%%%%%%%%%%%%
718 |   function assert_eq(a,b)
719 |   % if isempty(n), n = 0; end  % First call.
720 |     n = n+1;
721 |     if ~isequal(a,b)
722 |       error('Failure')
723 |     end
724 |     fprintf('Test %g succeeded.\n', n )
725 |   end
726 | 
727 | end
728 | 
729 | % CPFloat - Custom Precision Floating-point numbers.
730 | %
731 | % Copyright 2020 Massimiliano Fasi and Mantas Mikaitis
732 | %
733 | % This library is free software; you can redistribute it and/or modify it under
734 | % the terms of the GNU Lesser General Public License as published by the Free
735 | % Software Foundation; either version 2.1 of the License, or (at your option)
736 | % any later version.
737 | %
738 | % This library is distributed in the hope that it will be useful, but WITHOUT
739 | % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
740 | % FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
741 | % details.
742 | %
743 | % You should have received a copy of the GNU Lesser General Public License along
744 | % with this library; if not, write to the Free Software Foundation, Inc., 51
745 | % Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
746 | 


--------------------------------------------------------------------------------
/util/generate_spdx.sh:
--------------------------------------------------------------------------------
  1 | #! /bin/zsh
  2 | 
  3 | VERSION='0.5.0'
  4 | 
  5 | # Compute package hash using spdx-tools.
  6 | CURL=curl
  7 | DEPSDIR=./deps/
  8 | SPDX_TOOLS_JAR='spdx-tools.jar'
  9 | SPDX_TOOLS_ULR='https://github.com/spdx/tools/releases/download/v2.2.4/spdx-tools-2.2.4-jar-with-dependencies.jar'
 10 | if [ ! -f $DEPSDIR$SPDX_TOOLS_JAR ]; then \
 11 |     $CURL -sL -o $DEPSDIR$SPDX_TOOLS_JAR $SPDX_TOOLS_ULR
 12 | fi
 13 | 
 14 | OUTPUT=`java -jar $DEPSDIR$SPDX_TOOLS_JAR \
 15 |       GenerateVerificationCode . ".*\.spdx|.*/deps/.*|.*\.sh" \
 16 |       | awk -F ' ' 'NR==1{print $4}'`
 17 | 
 18 | 
 19 | # Add document and package information.
 20 | echo "##
 21 | ## Document Creation Information
 22 | ##
 23 | 
 24 | SPDXVersion: SPDX-2.2
 25 | DataLicense: CC0-1.0
 26 | SPDXID: SPDXRef-DOCUMENT
 27 | DocumentName: cpfloat-$VERSION
 28 | DocumentNamespace: https://raw.githubusercontent.com/north-numerical-computing/cpfloat/master/license.spdx
 29 | Creator: Person: Massimiliano Fasi (massimiliano.fasi@durham.ac.uk)
 30 | Creator: Person: Mantas Mikaitis (mantas.mikaitis@manchester.ac.uk)
 31 | Created: `date -u +%Y-%m-%dT%H:%M:%SZ`
 32 | 
 33 | 
 34 | 
 35 | ##
 36 | ## Package Information
 37 | ##
 38 | 
 39 | PackageName: cpfloat
 40 | SPDXID: SPDXRef-1
 41 | PackageVersion: $VERSION
 42 | PackageDownloadLocation: git://github.com/north-numerical-computing/cpfloat
 43 | PackageVerificationCode: $OUTPUT (excludes: ./license.spdx)
 44 | PackageHomePage: https://github.com/north-numerical-computing/cpfloat
 45 | PackageLicenseConcluded: LGPL-2.1-or-later
 46 | PackageLicenseInfoFromFiles: LGPL-2.1-or-later
 47 | PackageLicenseDeclared: LGPL-2.1-or-later
 48 | PackageCopyrightText: <text>Copyright 2020 Massimiliano Fasi and Mantas Mikaitis</text>
 49 | PackageSummary:<text>Custom Precision Floating-point numbers.</text>
 50 | 
 51 | 
 52 | 
 53 | ##
 54 | ## File Information
 55 | ##"
 56 | 
 57 | # Add file information.
 58 | counter=1
 59 | for file in `find .`; do
 60 |     if [[ ! -d $file && $file != (./.git*|./deps/*|*.spdx) ]]; then
 61 |         echo ""
 62 |         echo "FileName: $file"
 63 |         echo "SPDXID: SPDXRef-1-$counter"
 64 |         counter=$((counter+1))
 65 |         case $file in
 66 |             *.sh|.git*|*/deps/*|license.spdx)
 67 |             # Ignore:
 68 |             # * housekeeping scripts;
 69 |             # * git files;
 70 |             # * third-party files;
 71 |             # * license.spdx file.
 72 |             ;;
 73 |             *doc*|Doxyfile|cpfloat.m)
 74 |                 echo "FileType: DOCUMENTATION"
 75 |                 ;;
 76 |             Makefile|*.c|*.h|*.ts|*.m|*.cpp)
 77 |                 echo "FileType: SOURCE"
 78 |                 ;;
 79 |             *.md|*.txt)
 80 |                 echo "FileType: TEXT"
 81 |                 ;;
 82 |             *.spdx)
 83 |                 echo "FileType: SPDX"
 84 |                 ;;
 85 |             *)
 86 |                 echo "FileType: OTHER"
 87 |         esac
 88 |         echo "FileChecksum: SHA1: `shasum -a 1 $file | \
 89 |                                       awk -F ' ' '{print $1}'`"
 90 |         echo "FileChecksum: MD5: `md5sum $file | awk -F ' ' '{print $1}'`"
 91 |         echo "LicenseConcluded: LGPL-2.1-or-later"
 92 |         LICENSE=`grep "SPDX-License-Identifier" $file | \
 93 |                       awk -F ' ' '{printf $3}'`
 94 |         if [[ $LICENSE = "" ]]; then
 95 |             LICENSE=NONE
 96 |         fi
 97 |         echo "LicenseInfoInFile: $LICENSE"
 98 |     fi
 99 | done
100 | 


--------------------------------------------------------------------------------