├── .gitignore ├── CMakeLists.txt ├── LICENCE ├── README.md ├── build ├── qfpio.s.o └── qfplib.s.o ├── library.json ├── qfpio.S ├── qfpio.h ├── qfplib.S └── qfplib.h /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | 3 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Build Qfplib, the ARM Cortex-M0 floating-point library 2 | 3 | project(qfplib) 4 | 5 | # enable verbose log. Must be after project() 6 | # set(CMAKE_VERBOSE_MAKEFILE ON) 7 | # set(ENV{VERBOSE} "1") 8 | # message("VERBOSE: $ENV{VERBOSE}") 9 | 10 | # apply codal build settings 11 | include("${CODAL_UTILS_LOCATION}") 12 | 13 | # CMAKE_CURRENT_SOURCE_DIR is codal/libraries/codal-libopencm3 14 | set(LIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}") 15 | 16 | ############################################################################### 17 | # qfplib 18 | 19 | set(QFPLIB_DIR "${LIB_DIR}") 20 | message("QFPLIB_DIR: ${QFPLIB_DIR}") 21 | 22 | # find sources and headers 23 | set(TOP_LEVEL_INCLUDE_DIRS "${QFPLIB_DIR}") 24 | # RECURSIVE_FIND_FILE(TOP_LEVEL_SOURCE_FILES "${QFPLIB_DIR}" "*.S") 25 | set( 26 | TOP_LEVEL_SOURCE_FILES 27 | "qfpio.S" 28 | "qfplib.S" 29 | ) 30 | 31 | # create library 32 | add_library(qfplib ${TOP_LEVEL_SOURCE_FILES}) 33 | message("TOP_LEVEL_SOURCE_FILES: ${TOP_LEVEL_SOURCE_FILES}") 34 | 35 | ############################################################################### 36 | # global settings 37 | 38 | # target for STM32 Blue Pill (Arm Cortex-M3), even though code is for Arm Cortex-M0 (armv6-m) 39 | set(CMAKE_SYSTEM_PROCESSOR "armv7-m" PARENT_SCOPE) 40 | 41 | # include directories 42 | include_directories( 43 | ${INCLUDE_DIRS} 44 | ${TOP_LEVEL_INCLUDE_DIRS} 45 | ${LIB_DIR} 46 | ) 47 | 48 | # expose include directories to parent cmake 49 | target_include_directories(qfplib PUBLIC ${TOP_LEVEL_INCLUDE_DIRS}) 50 | 51 | # include all functions plus fast divide and square root. not required, all are selected by default. 52 | # set( 53 | # CMAKE_ASM_FLAGS 54 | # "${CMAKE_ASM_FLAGS} -Dinclude_faster=1 -Dinclude_conversions=1 -Dinclude_scientific=1" 55 | # ) 56 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Qfplib: an ARM Cortex-M0 floating-point library in 1 kbyte 2 | 3 | From: https://www.quinapalus.com/qfplib.html 4 | 5 | Qfplib is open source, licensed under version 2 of the GNU GPL. A copy 6 | of that licence is included in this archive. The archive also contains: 7 | 8 | - qfplib.s, the source code to qfplib. The GNU assembler syntax is used. 9 | 10 | - qfplib.h, a C header file giving prototypes for the qfplib functions. 11 | 12 | - qfpio.s, the source code to qfpio, routines for converting between 13 | strings and floating-point values. 14 | 15 | - qfpio.h, a C header file giving prototypes for the qfpio functions. 16 | 17 | Visit http://www.quinapalus.com/qfplib.html for more information. 18 | -------------------------------------------------------------------------------- /build/qfpio.s.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lupyuen/qfplib/db64866a734417a718af5de4fc419ef2f23f1536/build/qfpio.s.o -------------------------------------------------------------------------------- /build/qfplib.s.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lupyuen/qfplib/db64866a734417a718af5de4fc419ef2f23f1536/build/qfplib.s.o -------------------------------------------------------------------------------- /library.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "qfplib", 3 | "frameworks": "*", 4 | "platforms": "ststm32", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/lupyuen/qfplib" 8 | }, 9 | "description": "Stub for compiling qfplib under PlatformIO", 10 | "build": { 11 | "unflags": [ 12 | ], 13 | "flags": [ 14 | ], 15 | "srcFilter": [ 16 | "+<./*.S>" 17 | ] 18 | } 19 | } -------------------------------------------------------------------------------- /qfpio.S: -------------------------------------------------------------------------------- 1 | @ Copyright 2015 Mark Owen 2 | @ http://www.quinapalus.com 3 | @ E-mail: qfp@quinapalus.com 4 | @ 5 | @ This file is free software: you can redistribute it and/or modify 6 | @ it under the terms of version 2 of the GNU General Public License 7 | @ as published by the Free Software Foundation. 8 | @ 9 | @ This file is distributed in the hope that it will be useful, 10 | @ but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | @ GNU General Public License for more details. 13 | @ 14 | @ You should have received a copy of the GNU General Public License 15 | @ along with this file. If not, see or 16 | @ write to the Free Software Foundation, Inc., 51 Franklin Street, 17 | @ Fifth Floor, Boston, MA 02110-1301, USA. 18 | 19 | .syntax unified 20 | .cpu cortex-m0 21 | .thumb 22 | 23 | @ exported symbols 24 | 25 | .global qfp_float2str 26 | .global qfp_str2float 27 | 28 | @ C code in comments is intended to give an idea of the function 29 | @ of the following assembler code. The translation is not exact. 30 | 31 | @ // multiply by 128/125: used by conversions in both directions 32 | @ unsigned int div125(unsigned int u) { 33 | @ unsigned int a,b,c,k0=0x4189; // 0x4189~=128/125 Q14 34 | @ a=u>>14; 35 | @ a=a*k0; // calculate first approximation to answer, good to about 14 bits 36 | @ b=((a>>1)+(a>>2))>>4; 37 | @ b=a-(b>>1)-(b&1); // find error in approximation 38 | @ c=(u-b)*k0; 39 | @ return a+(c>>14)+1; // result good to about 28 bits 40 | @ } 41 | 42 | div125: 43 | push {r1-r4,r14} 44 | ldr r4,=#0x4189 @ k0=0x4189; 45 | lsrs r1,r0,#14 @ a=u>>14; 46 | muls r1,r4 @ a=a*k0; 47 | lsrs r2,r1,#1 @ a>>1 48 | lsrs r3,r1,#2 @ a>>2 49 | add r2,r3 @ (a>>1)+(a>>2) 50 | lsrs r2,#4 @ b=((a>>1)+(a>>2))>>4; 51 | subs r0,r1 @ u-a 52 | lsrs r2,#1 @ b>>1 53 | adcs r0,r2 @ u-a+(b>>1)+(b&1) 54 | muls r0,r4 @ c=(u-b)*k0; 55 | lsrs r0,#14 @ c>>14 56 | add r0,r1 @ a+(c>>14) 57 | adds r0,#1 @ a+(c>>14)+1 58 | pop {r1-r4,r15} 59 | 60 | .ltorg 61 | 62 | opoint: @ output decimal point 63 | adds r5,#2 64 | movs r3,#'.' 65 | b och 66 | ozero: @ output '0' 67 | movs r3,#0 68 | odig: @ output one digit from r3 69 | adds r3,#'0' 70 | och: @ output one character from r3 71 | strb r3,[r1] 72 | adds r1,#1 73 | bx r14 74 | 75 | naninf: @ r4=0 for Inf, otherwise NaN 76 | ldr r3,=#0x00666e49 @ "fnI" 77 | cmp r4,#0 78 | beq 1f 79 | ldr r3,=#0x004e614e @ "NaN" 80 | 1: 81 | bl och 82 | lsrs r3,#8 83 | bne 1b 84 | b 10f 85 | 86 | @ fmt is format control word: 87 | @ b7..b0: number of significant figures 88 | @ b15..b8: -(minimum exponent printable in F format) 89 | @ b23..b16: maximum exponent printable in F format-1 90 | @ b24: output positive mantissas with ' ' 91 | @ b25: output positive mantissas with '+' 92 | @ b26: output positive exponents with ' ' 93 | @ b27: output positive exponents with '+' 94 | @ b28: suppress traling zeros in fraction 95 | @ b29: fixed-point output: b7..0 give number of decimal places 96 | @ default: 0x18060406 97 | @ Note that if b28 is set (as it is in the default format value) the code will 98 | @ write the trailing decimal point and zeros to the output buffer before truncating 99 | @ the string. Thus it is essential that the output buffer is large enough to accommodate 100 | @ these characters temporarily. 101 | @ 102 | @ Overall accuracy is sufficient to print all exactly-representable integers up to 10^8 correctly 103 | @ in 0x18160408 format. 104 | @ 105 | @ void float2str(float f,char*s,unsigned int fmt) { 106 | 107 | qfp_float2str: 108 | push {r4-r7,r14} 109 | 110 | @ if(fmt==0) fmt=0x18060406; // default format 111 | 112 | cmp r2,#0 113 | bne 1f 114 | ldr r2,=#0x18060406 115 | 1: 116 | 117 | @ i=*(int*)&f; 118 | @ if(i&0x80000000) { // output sign of mantissa 119 | @ *p++='-'; 120 | @ i&=0x7fffffff; 121 | @ } else { 122 | @ if(fmt&0x01000000) *p++=' '; 123 | @ else if(fmt&0x02000000) *p++='+'; 124 | @ } 125 | 126 | movs r3,#'-' 127 | lsls r0,#1 128 | bcs 2f 129 | movs r3,#' ' 130 | lsrs r4,r2,#25 131 | bcs 2f 132 | movs r3,#'+' 133 | lsrs r4,r2,#26 134 | bcc 3f 135 | 2: 136 | bl och 137 | 3: 138 | 139 | @ e2=(i>>23)-127; // get binary exponent e2 140 | 141 | movs r4,#0 142 | lsrs r3,r0,#24 143 | beq 1f @ treat zero case specially 144 | subs r3,#127 145 | 146 | @ m=((i&0x7fffff)|0x800000)<<8; // get mantissa, restore implied 1, make Q31 147 | 148 | lsls r4,r0,#8 149 | cmp r3,#128 150 | beq naninf @ handle NaN/Inf cases 151 | adds r4,#1 152 | 153 | @ if(e2==-127) {e2=0; m=0;} // flush denormals to zero 154 | 155 | movs r5,#1 156 | rors r4,r5 157 | 1: 158 | movs r0,r4 159 | 160 | @ now binary exponent e2 in r3, mantissa in r0 161 | 162 | @ e10=0; // decimal exponent 163 | @ overall plan is to manipulate m, e2 and e10 so as to take e2 to zero, while maintaining the 164 | @ invariant m * 2^e2 * 10^e10 165 | 166 | movs r4,#0 167 | 168 | @ while(e2>0) { // add 3 to e10, take 10 off e2, multiply m by 1024/1000=128/125 169 | @ if(m>=0xf0000000) m>>=1,e2++; 170 | @ m=div125(m); 171 | @ e2-=10; 172 | @ e10+=3; 173 | @ } // now e2<=0 174 | 175 | b 2f 176 | 1: 177 | lsrs r5,r0,#28 178 | cmp r5,#0x0f 179 | blo 3f 180 | lsrs r0,#1 181 | adds r3,#1 182 | 3: 183 | bl div125 184 | subs r3,#10 185 | adds r4,#3 186 | 2: 187 | cmp r3,#0 188 | bgt 1b 189 | 190 | @ while(e2<=-10) { // take 3 off e10, add 10 to e2, multiply m by 1000/1024=125/128 191 | @ m0=(m>>5)+(m>>6); 192 | @ m-=(m0>>1)+(m0&1); // *125/128, more accurate than using multiply instruction 193 | @ e2+=10; 194 | @ e10-=3; 195 | @ } // now -10 < e2 <= 0 196 | 197 | b 2f 198 | 1: 199 | lsrs r5,r0,#5 200 | lsrs r6,r0,#6 201 | add r5,r6 202 | movs r6,#0 203 | lsrs r5,#1 204 | adcs r5,r6 205 | subs r0,r5 206 | subs r4,#3 207 | 2: 208 | adds r3,#10 209 | ble 1b 210 | subs r3,#10 211 | 212 | @ m>>=1; // Q30; make sure m will not overflow 213 | 214 | lsrs r0,#1 215 | 216 | @ while(e2<=-3) { // take 1 off e10, add 3 to e2, multiply m by 10/8 217 | @ m0=m>>1; 218 | @ m+=(m0>>1)+(m0&1); // *10/8 219 | @ e2+=3; 220 | @ e10--; 221 | @ } // now -3 < e2 <=0 222 | 223 | b 2f 224 | 1: 225 | lsrs r5,r0,#1 226 | lsrs r5,#1 227 | adcs r0,r5 228 | subs r4,#1 229 | 2: 230 | adds r3,#3 231 | ble 1b 232 | subs r3,#3 233 | 234 | @ while(e2<0) { // add 1 to e2, halve m 235 | @ m>>=1; // *1/2 236 | @ e2++; 237 | @ } // now e2==0 238 | 239 | b 2f 240 | 1: 241 | lsrs r0,#1 242 | 2: 243 | adds r3,#1 244 | ble 1b 245 | subs r3,#1 246 | 247 | @ if(m>=0x40000000) m>>=2; // convert Q30 to Q28 248 | @ else { 249 | @ m=(m<<1)+(m>>1)+(m&1); // multiply by 10 (maintaining accuracy) if result will not overflow, compensate e10 250 | @ e10--; 251 | @ } 252 | 253 | lsrs r5,r0,#30 254 | beq 1f 255 | lsrs r0,#2 256 | b 2f 257 | 1: 258 | lsls r5,r0,#1 259 | lsrs r0,#1 260 | adcs r0,r5 261 | subs r4,#1 262 | 2: 263 | 264 | @ now all of binary exponent has been transferred to decimal exponent 265 | @ we have 266 | @ r0: mantissa m, Q28, 1<=m<10 267 | @ r1: output pointer 268 | @ r2: format 269 | @ r3: 0 (was binary exponent) 270 | @ r4: decimal exponent e10 271 | 272 | @ sf=fmt&0xff; // number of significant figures 273 | 274 | uxtb r3,r2 @ e2 is no longer used 275 | 276 | @ ff=0; // flag to indicate that output is in "F" format (i.e., will not use "E" notation) 277 | 278 | movs r5,#0 279 | 280 | @ d0=e10; // first digit output has significance 10^d0 wrt output '.' 281 | @ d1=d0-sf; // last digit output has significance 10^(d1+1) wrt output '.' 282 | 283 | movs r6,r4 284 | subs r7,r6,r3 285 | 286 | @ r0: mantissa m, Q28, 1<=m<10 287 | @ r1: output pointer 288 | @ r2: format 289 | @ r3: sf 290 | @ r4: decimal exponent e10 291 | @ r5b0: ff 292 | @ r6: d0 293 | @ r7: d1 294 | 295 | @ if(fmt&0x20000000) { // forced "F" output format? 296 | @ d1=-(fmt&0xff)-1; 297 | @ sf=d0-d1; 298 | @ ff=1; 299 | @ } 300 | 301 | push {r1,r2} 302 | lsrs r1,r2,#30 303 | bcc 1f 304 | mvns r7,r3 305 | subs r3,r6,r7 306 | movs r5,#1 307 | 1: 308 | 309 | @ m0=0x08000000; // 0.5 Q28 310 | @ for(i=1;i>1; // multiply by 0.1 312 | @ m0+=m0>>4; 313 | @ m0+=m0>>8; 314 | @ m0+=m0>>16; 315 | @ m0>>=4; 316 | @ } 317 | @ m+=m0; // rounding 318 | 319 | push {r3} 320 | movs r1,#8 321 | lsls r1,#24 322 | 2: 323 | subs r3,#1 324 | ble 1f 325 | lsrs r2,r1,#1 326 | add r1,r2 327 | lsrs r2,r1,#4 328 | add r1,r2 329 | lsrs r2,r1,#8 330 | add r1,r2 331 | lsrs r2,r1,#16 332 | add r1,r2 333 | lsrs r1,#4 334 | b 2b 335 | 1: 336 | add r0,r1 337 | pop {r3} 338 | 339 | @ if(m>=0xa0000000) { // has rounding pushed m to 10 (Q28)? if so, set to 1 and increment decimal exponent 340 | @ m=0x10000000; 341 | @ e10++; 342 | @ d0++; 343 | @ if((fmt&0x20000000)==0) d1++; 344 | @ } 345 | 346 | lsrs r1,r0,#28 347 | cmp r1,#0x0a 348 | pop {r1,r2} 349 | blo 1f 350 | lsrs r0,r2,#30 351 | bcs 2f 352 | adds r7,#1 353 | 2: 354 | movs r0,#0x10 355 | lsls r0,#24 356 | adds r4,#1 357 | adds r6,#1 358 | 1: 359 | 360 | @ if(d0>=-(int)((fmt>>8)&0xff)&&d0<(int)((fmt>>16)&0xff)) ff=1; // in range for F format? 361 | 362 | push {r4} 363 | lsrs r4,r2,#8 364 | uxtb r4,r4 365 | adds r4,r6 366 | blt 1f 367 | lsrs r4,r2,#16 368 | uxtb r4,r4 369 | cmp r6,r4 370 | bge 1f 371 | movs r5,#1 372 | 1: 373 | 374 | @ if(!ff) d0=0,d1=-sf; // for E format we have one digit before the decimal point 375 | 376 | cmp r5,#0 377 | bne 1f 378 | movs r6,#0 379 | rsbs r7,r3,#0 380 | 1: 381 | 382 | @ sf (r3) no longer used 383 | 384 | @ f0=0; // flag to indicate whether we have we output a '.' 385 | 386 | @ f0 in r5b1 387 | 388 | @ if(d0<0) *p++='0',*p++='.',f0=1,i=-1; // value <1, so output "0." 389 | @ else i=d0; 390 | 391 | mov r4,r6 392 | cmp r6,#0 393 | bge 1f 394 | bl ozero 395 | bl opoint 396 | movs r4,#0 397 | mvns r4,r4 398 | 1: 399 | 400 | @ while(i>d0&&i>d1) *p++='0',i--; // output leading zeros before significand as necessary 401 | 402 | 2: 403 | cmp r4,r6 404 | ble 1f 405 | cmp r4,r7 406 | ble 1f 407 | bl ozero 408 | subs r4,#1 409 | b 2b 410 | 1: 411 | 412 | @ d0 (r6) no longer used 413 | 414 | @ for(;i>d1;i--) { // now output digits of significand 415 | @ *p++='0'+(m>>28); // output integer part of Q28 value 416 | @ m&=0x0fffffff; // fractional part of Q28 value 417 | @ m=(m<<1)+(m<<3); // multiply by 10 418 | @ if(i==0) *p++='.',f0=1; // output decimal point as significance goes through 10^0 419 | @ } 420 | 421 | 2: 422 | cmp r4,r7 423 | ble 1f 424 | lsrs r3,r0,#28 425 | bl odig 426 | lsls r0,#4 427 | lsrs r0,#1 428 | lsrs r3,r0,#2 429 | add r0,r3 430 | subs r4,#1 431 | bcs 2b 432 | bl opoint 433 | b 2b 434 | 1: 435 | 436 | @ m (r0) no longer used 437 | @ d1 (r7) no longer used 438 | 439 | @ for(;i>=0;i--) *p++='0'; // output remaining zeros of integer part 440 | 441 | 2: 442 | cmp r4,#0 443 | blt 1f 444 | bl ozero 445 | subs r4,#1 446 | b 2b 447 | 1: 448 | 449 | @ i (r4) no longer used 450 | 451 | @ if(f0) { // remove trailing zeros and decimal point? 452 | @ if(fmt&0x10000000) while(p[-1]=='0') p--; 453 | @ if(p[-1]=='.') p--; 454 | @ *p=0; 455 | @ } 456 | 457 | lsrs r4,r5,#2 458 | bcc 1f 459 | lsrs r4,r2,#29 460 | bcc 2f 461 | 3: 462 | subs r1,#1 463 | ldrb r4,[r1] 464 | cmp r4,#'0' 465 | beq 3b 466 | adds r1,#1 467 | 2: 468 | subs r1,#1 469 | ldrb r4,[r1] 470 | cmp r4,#'.' 471 | beq 4f 472 | adds r1,#1 473 | 4: 474 | 1: 475 | pop {r4} 476 | 477 | @ now: 478 | @ r0 479 | @ r1: output pointer 480 | @ r2: format 481 | @ r3 482 | @ r4: decimal exponent e10 483 | @ r5b0: ff 484 | @ r6: 485 | @ r7: 486 | 487 | @ if(!ff) { // output exponent? 488 | 489 | lsrs r5,#1 490 | bcs 10f 491 | 492 | @ *p++='E'; 493 | 494 | movs r3,#'E' 495 | bl och 496 | 497 | @ if(e10<0) *p++='-',e10=-e10; // output exponent sign 498 | @ else { 499 | @ if(fmt&0x04000000) *p++=' '; 500 | @ else if(fmt&0x08000000) *p++='+'; 501 | @ } 502 | 503 | cmp r4,#0 504 | bge 2f 505 | rsbs r4,#0 506 | movs r3,#'-' 507 | b 3f 508 | 2: 509 | movs r3,#' ' 510 | lsrs r6,r2,#27 511 | bcs 3f 512 | movs r3,#'+' 513 | lsrs r6,r2,#28 514 | bcc 4f 515 | 3: 516 | bl och 517 | 4: 518 | 519 | @ m=(e10*0xcd)>>11; // tens digit of exponent 520 | @ *p++='0'+m; 521 | @ e10-=m*10; // units digit of exponent 522 | @ *p++='0'+e10; 523 | 524 | movs r3,#0xcd 525 | muls r3,r4 526 | lsrs r3,#11 527 | movs r0,#10 528 | muls r0,r3 529 | bl odig 530 | subs r3,r4,r0 531 | bl odig 532 | 533 | @ } 534 | 535 | 10: 536 | 537 | @ *p++=0; 538 | 539 | movs r3,#0 540 | bl och 541 | 542 | @ } 543 | 544 | pop {r4-r7,r15} 545 | 546 | 547 | 548 | 549 | 550 | @ Convert string pointed to by p into float, stored at f. On failure 551 | @ return 1; on success, return 0 and store pointer to first non-converted 552 | @ character at endptr if endptr!=0. 553 | 554 | @ #define ISDIG(x) ((x)>='0'&&(x)<='9') 555 | 556 | isdig: @ convert ASCII to digit 557 | subs r2,#'0' 558 | cmp r2,#10 @ clear carry if digit 559 | bx r14 560 | 561 | @ int str2float(float*f,char*p,char**endptr) { 562 | 563 | qfp_str2float: 564 | 565 | @ if(*p=='+') p++; 566 | @ else if(*p=='-') sm=0x80000000,p++; // capture mantissa sign 567 | 568 | push {r0,r2,r4-r7,r14} 569 | movs r7,#0 570 | ldrb r2,[r1] 571 | cmp r2,#'+' 572 | beq 1f 573 | cmp r2,#'-' 574 | bne 2f 575 | movs r7,#1 576 | 1: 577 | adds r1,#1 578 | 2: 579 | movs r0,#0 @ mantissa 580 | movs r3,#0 @ f0: have we seen a '.'? 581 | movs r5,#0 @ exponent 582 | movs r6,#0 @ count of mantissa digits processed 583 | 584 | @ r0: m 585 | @ r1: input pointer 586 | @ r3: f0 587 | @ r5: e 588 | @ r6: d 589 | @ r7b0: sm 590 | @ stack: output pointer, end pointer 591 | 592 | @ for(;;) { 593 | @ if(f0==0&&*p=='.') {f0=1; p++; continue;} 594 | @ if(!ISDIG(*p)) goto l0; // break out on non-digit 595 | @ if(m<0x10000000) { // accumulate digits (up to about 8 significant figures) 596 | @ m=m*10+*p-'0'; 597 | @ if(f0==1) e--; // decrement exponent if we are past the decimal point 598 | @ } else if(f0==0) e++; // just increment exponent after we have captured enough significance in m 599 | @ d++; 600 | @ p++; 601 | @ } 602 | @ l0: 603 | 604 | 2: 605 | ldrb r2,[r1] 606 | cmp r2,#'.' 607 | bne 1f 608 | cmp r3,#0 609 | bne 1f 610 | movs r3,#1 611 | b 3f 612 | 1: 613 | bl isdig 614 | bcs 4f 615 | lsrs r4,r0,#28 616 | bne 5f 617 | movs r4,#10 618 | muls r0,r4 619 | add r0,r2 620 | subs r5,#1 621 | 5: 622 | adds r5,#1 623 | subs r5,r3 624 | adds r6,#1 625 | 3: 626 | adds r1,#1 627 | b 2b 628 | 4: 629 | 630 | @ if(d==0) return 1; // no digits seen: error 631 | 632 | cmp r6,#0 633 | bne 1f 634 | movs r0,#1 635 | pop {r2-r7,r15} 636 | 637 | @ f0 (r3) no longer used 638 | @ d (r6) no longer used 639 | 640 | @ e10=0; // decimal exponent 641 | 642 | 1: 643 | movs r3,#0 644 | 645 | @ if(*p=='e'||*p=='E') { // exponent given? 646 | @ se=0; 647 | @ p++; 648 | @ if(*p=='+') p++; 649 | @ else if(*p=='-') se=1,p++; // capture exponent sign 650 | @ while(ISDIG(*p)) { // capture exponent digits 651 | @ if(e10<0x01000000) e10=e10*10+*p-'0'; // prevent overflow 652 | @ p++; 653 | @ } 654 | @ if(se) e10=-e10; // apply exponent sign 655 | @ } 656 | 657 | mov r6,r1 @ save r1 658 | ldrb r2,[r1] 659 | cmp r2,#'e' 660 | beq 1f 661 | cmp r2,#'E' 662 | bne 2f 663 | 1: 664 | adds r1,#1 665 | ldrb r2,[r1] 666 | cmp r2,#'+' 667 | beq 3f 668 | cmp r2,#'-' 669 | bne 4f 670 | adds r7,#2 @ se in r7b1 671 | 3: 672 | adds r1,#1 673 | ldrb r2,[r1] 674 | 4: 675 | bl isdig 676 | bcc 6f 677 | mov r1,r6 @ E without following digits: restore r1 678 | b 2f 679 | 6: 680 | lsrs r4,r3,#24 681 | bne 5f 682 | movs r4,#10 683 | muls r3,r4 684 | add r3,r2 685 | 5: 686 | adds r1,#1 687 | ldrb r2,[r1] 688 | bl isdig 689 | bcc 6b 690 | cmp r7,#2 691 | blo 2f 692 | rsbs r3,#0 693 | 2: 694 | 695 | @ if(m==0) goto l2; // zero? then we have finished 696 | 697 | movs r2,#0 698 | cmp r0,#0 699 | beq 11f 700 | 701 | @ e10+=e; // offset e by captured exponent 702 | @ if(e10> 127) e10=127; // clip overflows: 10^127 will be converted later to Inf, 10^-128 to zero 703 | @ if(e10<-128) e10=-128; 704 | 705 | add r3,r5 706 | lsls r4,r3,#2 @ temporarily set e2 to e10*4: this will cause subsequent conversion to Inf/zero if required 707 | sxtb r5,r3 708 | cmp r5,r3 709 | bne 12f @ not equal to its sign-extended version? 710 | 711 | @ e (r5) no longer used 712 | 713 | @ r0: m 714 | @ r1: input pointer 715 | @ r3: e10 716 | @ r7b0: sm 717 | @ stack: output pointer, end pointer 718 | 719 | @ e2=31; // binary exponent 720 | @ plan is to manipulate m, e2 and e10 so as to take e10 to zero, while maintaining the 721 | @ invariant m * 2^e2 * 10^e10 722 | 723 | movs r4,#31 724 | 725 | @ while(m<0x40000000) m+=m,e2--; // normalise so m is now 0x40000000..0xa0000000 726 | 727 | 2: 728 | lsrs r2,r0,#30 729 | bne 1f 730 | lsls r0,#1 731 | subs r4,#1 732 | b 2b 733 | 1: 734 | 735 | @ while(e10<0) { // add 3 to e10, take 10 off e2 and multiply m by 1024/1000=128/125 736 | @ m=div125(m); 737 | @ e10+=3; e2-=10; 738 | @ if(m>=0x80000000) m>>=1,e2++; 739 | @ } // now e10 >= 0 740 | 741 | 2: 742 | cmp r3,#0 743 | bge 1f 744 | bl div125 745 | adds r3,#3 746 | subs r4,#10 747 | lsrs r2,r0,#31 748 | beq 2b 749 | lsrs r0,#1 750 | adds r4,#1 751 | b 2b 752 | 1: 753 | 754 | @ while(e10>2) { // take 3 off e10, add 10 to e2 and multiply m by 1000/1024=125/128 755 | @ m0=(m>>6)+(m>>5); 756 | @ m-=(m0>>1)+(m0&1); // *125/128 757 | @ e10-=3; e2+=10; 758 | @ } // now 0 <= e10 < 3 759 | 760 | 2: 761 | cmp r3,#2 762 | ble 1f 763 | lsrs r2,r0,#6 764 | lsrs r5,r0,#5 765 | add r2,r5 766 | movs r5,#0 767 | lsrs r2,#1 768 | adcs r2,r5 769 | subs r0,r2 770 | subs r3,#3 771 | adds r4,#10 772 | b 2b 773 | 1: 774 | 775 | @ while(e10>0) { // take 1off e10, add 3 to e2 and multiply m by 10/8 = 5/4 776 | @ m0=(m>>1); 777 | @ m+=(m0>>1)+(m0&1); // *5/4 778 | @ e10-=1; e2+=3; 779 | @ } // now e10==0 780 | 781 | 2: 782 | cmp r3,#0 783 | ble 1f 784 | lsrs r2,r0,#1 785 | lsrs r2,#1 786 | adcs r0,r2 787 | subs r3,#1 788 | adds r4,#3 789 | b 2b 790 | 1: 791 | 792 | @ e10 (r3) no longer used 793 | 794 | @ while(m<0x80000000) m+=m,e2--; // renormalise m so MSB is set 795 | 796 | cmp r0,#0 797 | blt 1f 798 | 2: 799 | subs r4,#1 800 | adds r0,r0 801 | bpl 2b 802 | 1: 803 | 804 | @ m=((m>>7)+1)>>1; // to 24 bits, with rounding 805 | 806 | lsrs r0,#7 807 | adds r0,#1 808 | lsrs r0,#1 809 | 810 | @ if(m==0x01000000) m>>=1,e2++; // has rounding pushed m to 25 bits? renormalise if so 811 | 812 | lsrs r2,r0,#24 813 | beq 1f 814 | lsrs r0,#1 815 | adds r4,#1 816 | 1: 817 | 818 | @ e2+=127; // add exponent offset 819 | 820 | 12: 821 | movs r2,#0 822 | movs r3,#0 823 | adds r4,#127 824 | 825 | @ if(e2<=0) {m=0; goto l1;} // too small? flush to zero 826 | 827 | ble 10f 828 | 829 | @ if(e2>=255) {m=0x7f800000; goto l1;} // too big? make infinity 830 | 831 | movs r3,#255 832 | cmp r4,#255 833 | bge 10f 834 | 835 | @ m&=0x007fffff; // remove implied 1 836 | 837 | lsls r2,r0,#9 838 | lsrs r2,#9 839 | mov r3,r4 840 | 841 | @ m|=e2<<23; // insert exponent bits 842 | 843 | 10: 844 | lsls r3,#23 845 | orrs r2,r3 846 | 847 | @ m|=sm; // apply mantissa sign 848 | 849 | 11: 850 | lsls r7,#31 851 | orrs r2,r7 852 | 853 | @ *f=*(float*)&m; // write output 854 | @ if(end) *end=p; 855 | 856 | pop {r0,r3} 857 | str r2,[r0] 858 | cmp r3,#0 859 | beq 1f 860 | str r1,[r3] 861 | 1: 862 | 863 | @ return 0; 864 | 865 | movs r0,#0 866 | pop {r4-r7,r15} 867 | 868 | @ } 869 | -------------------------------------------------------------------------------- /qfpio.h: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Mark Owen 2 | // http://www.quinapalus.com 3 | // E-mail: qfp@quinapalus.com 4 | // 5 | // This file is free software: you can redistribute it and/or modify 6 | // it under the terms of version 2 of the GNU General Public License 7 | // as published by the Free Software Foundation. 8 | // 9 | // This file is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU General Public License 15 | // along with this file. If not, see or 16 | // write to the Free Software Foundation, Inc., 51 Franklin Street, 17 | // Fifth Floor, Boston, MA 02110-1301, USA. 18 | 19 | #ifndef _QFPIO_H_ 20 | #define _QFPIO_H_ 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | extern void qfp_float2str(float f,char*s,unsigned int fmt); 27 | extern int qfp_str2float(float*f,char*p,char**endptr); 28 | 29 | #ifdef __cplusplus 30 | } // extern "C" 31 | #endif 32 | #endif 33 | -------------------------------------------------------------------------------- /qfplib.S: -------------------------------------------------------------------------------- 1 | @ Copyright 2015 Mark Owen 2 | @ http://www.quinapalus.com 3 | @ E-mail: qfp@quinapalus.com 4 | @ 5 | @ This file is free software: you can redistribute it and/or modify 6 | @ it under the terms of version 2 of the GNU General Public License 7 | @ as published by the Free Software Foundation. 8 | @ 9 | @ This file is distributed in the hope that it will be useful, 10 | @ but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | @ GNU General Public License for more details. 13 | @ 14 | @ You should have received a copy of the GNU General Public License 15 | @ along with this file. If not, see or 16 | @ write to the Free Software Foundation, Inc., 51 Franklin Street, 17 | @ Fifth Floor, Boston, MA 02110-1301, USA. 18 | 19 | @.equ include_faster,0 @ include fast divide and square root? 20 | @.equ include_conversions,1 @ include float <-> fixed point conversion functions? 21 | @.equ include_scientific,1 @ include trignometic, exponential etc. functions? 22 | 23 | .ifndef include_faster 24 | .equ include_faster,1 25 | .endif 26 | 27 | .ifndef include_conversions 28 | .equ include_conversions,1 29 | .endif 30 | 31 | .ifndef include_scientific 32 | .equ include_scientific,1 33 | .endif 34 | 35 | .if include_scientific 36 | .equ include_conversions,1 37 | .endif 38 | 39 | .syntax unified 40 | .cpu cortex-m0 41 | .thumb 42 | 43 | @ exported symbols 44 | 45 | .global qfp_fadd 46 | .global qfp_fsub 47 | .global qfp_fmul 48 | .global qfp_fdiv 49 | .global qfp_fcmp 50 | .if include_conversions 51 | .global qfp_float2int 52 | .global qfp_float2fix 53 | .global qfp_float2uint 54 | .global qfp_float2ufix 55 | .global qfp_int2float 56 | .global qfp_fix2float 57 | .global qfp_uint2float 58 | .global qfp_ufix2float 59 | .endif 60 | .if include_scientific 61 | .global qfp_fcos 62 | .global qfp_fsin 63 | .global qfp_ftan 64 | .global qfp_fatan2 65 | .global qfp_fexp 66 | .global qfp_fln 67 | .global qfp_fsqrt 68 | .endif 69 | 70 | .if include_faster 71 | .global qfp_fdiv_fast 72 | .global qfp_fsqrt_fast 73 | .endif 74 | 75 | @ exchange r0<->r1, r2<->r3 76 | xchxy: 77 | push {r0,r2,r14} 78 | mov r0,r1 79 | mov r2,r3 80 | pop {r1,r3,r15} 81 | 82 | @ IEEE single precision floats in r0,r1-> mantissae in r1,r0 exponents in r3,r2 *respectively* 83 | @ trashes r4 84 | unpackxy: 85 | push {r14} 86 | bl unpackx 87 | bl xchxy 88 | pop {r4} 89 | mov r14,r4 90 | 91 | @ IEEE single in r0-> signed (two's complemennt) mantissa in r0 9Q23 (24 significant bits), signed exponent (bias removed) in r2 92 | @ trashes r4; zero, denormal -> mantissa=+/-1, exponent=-380; Inf, NaN -> mantissa=+/-1, exponent=+640 93 | unpackx: 94 | lsrs r2,r0,#23 @ save exponent and sign 95 | lsls r0,#9 @ extract mantissa 96 | lsrs r0,#9 97 | movs r4,#1 98 | lsls r4,#23 99 | orrs r0,r4 @ reinstate implied leading 1 100 | cmp r2,#255 @ test sign bit 101 | uxtb r2,r2 @ clear it 102 | bls 1f @ branch on positive 103 | rsbs r0,#0 @ negate mantissa 104 | 1: 105 | subs r2,#1 106 | cmp r2,#254 @ zero/denormal/Inf/NaN? 107 | bhs 2f 108 | subs r2,#126 @ remove exponent bias: can now be -126..+127 109 | bx r14 110 | 111 | 2: @ here with special-case values 112 | cmp r0,#0 113 | mov r0,r4 @ set mantissa to +1 114 | bpl 3f 115 | rsbs r0,#0 @ zero/denormal/Inf/NaN: mantissa=+/-1 116 | 3: 117 | subs r2,#126 @ zero/denormal: exponent -> -127; Inf, NaN: exponent -> 128 118 | lsls r2,#2 @ zero/denormal: exponent -> -508; Inf, NaN: exponent -> 512 119 | adds r2,#128 @ zero/denormal: exponent -> -380; Inf, NaN: exponent -> 640 120 | bx r14 121 | 122 | @ normalise and pack signed mantissa in r0 nominally 3Q29, signed exponent in r2-> IEEE single in r0 123 | @ trashes r4, preserves r1,r3 124 | @ r5: "sticky bits", must be zero iff all result bits below r0 are zero for correct rounding 125 | packx: 126 | lsrs r4,r0,#31 @ save sign bit 127 | lsls r4,r4,#31 @ sign now in b31 128 | bpl 2f @ skip if positive 129 | cmp r5,#0 130 | beq 11f 131 | adds r0,#1 @ fiddle carry in to following rsb if sticky bits are non-zero 132 | 11: 133 | rsbs r0,#0 @ can now treat r0 as unsigned 134 | packx0: 135 | bmi 3f @ catch r0=0x80000000 case 136 | 2: 137 | subs r2,#1 @ normalisation loop 138 | adds r0,r0 139 | beq 1f @ zero? special case 140 | bpl 2b @ normalise so leading "1" in bit 31 141 | 3: 142 | adds r2,#129 @ (mis-)offset exponent 143 | bne 12f @ special case: highest denormal can round to lowest normal 144 | adds r0,#0x80 @ in special case, need to add 256 to r0 for rounding 145 | bcs 4f @ tripped carry? then have leading 1 in C as required 146 | 12: 147 | adds r0,#0x80 @ rounding 148 | bcs 4f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) 149 | cmp r5,#0 150 | beq 7f @ sticky bits zero? 151 | 8: 152 | lsls r0,#1 @ remove leading 1 153 | 9: 154 | subs r2,#1 @ compensate exponent on this path 155 | 4: 156 | cmp r2,#254 157 | bge 5f @ overflow? 158 | adds r2,#1 @ correct exponent offset 159 | ble 10f @ denormal/underflow? 160 | lsrs r0,#9 @ align mantissa 161 | lsls r2,#23 @ align exponent 162 | orrs r0,r2 @ assemble exponent and mantissa 163 | 6: 164 | orrs r0,r4 @ apply sign 165 | 1: 166 | bx r14 167 | 168 | 5: 169 | movs r0,#0xff @ create infinity 170 | lsls r0,#23 171 | b 6b 172 | 173 | 10: 174 | movs r0,#0 @ create zero 175 | bx r14 176 | 177 | 7: @ sticky bit rounding case 178 | lsls r5,r0,#24 @ check bottom 8 bits of r0 179 | bne 8b @ in rounding-tie case? 180 | lsrs r0,#9 @ ensure even result 181 | lsls r0,#10 182 | b 9b 183 | 184 | @ unpack two arguments (r0,r1) and shift one down to have common exponent, returned in r2; note that arguments are exchanged 185 | @ sticky bits shifted off bottom of smaller argument in r5 186 | @ following code is unnecessarily general for fadd, but is shared with atan2 187 | unpackxyalign: 188 | push {r14} 189 | bl unpackxy 190 | lsls r0,r0,#6 @ Q29 191 | lsls r1,r1,#6 @ Q29 192 | subs r4,r2,r3 @ calculate shift 193 | bge 1f @ x>=y? 194 | mov r2,r3 @ no: take common exponent from y 195 | mov r5,r0 @ potential sticky bits from x 196 | rsbs r4,#0 @ make shift positive 197 | asrs r0,r4 198 | cmp r4,#32 199 | blo 2f 200 | movs r0,#0 @ large shift, so all bits are sticky and result is zero 201 | pop {r15} 202 | 1: 203 | mov r5,r1 @ common exponent from x; potential sticky bits from y 204 | asrs r1,r4 205 | cmp r4,#32 206 | blo 2f 207 | movs r1,#0 @ large shift, so all bits are sticky and result is zero 208 | pop {r15} 209 | 2: 210 | rsbs r4,#0 211 | adds r4,#32 212 | lsls r5,r4 @ extract sticky bits 213 | pop {r15} 214 | 215 | .thumb_func 216 | qfp_fsub: 217 | movs r2,#1 @ subtract: flip sign bit of second argument and fall through to fadd 218 | lsls r2,#31 219 | eors r1,r2 220 | .thumb_func 221 | qfp_fadd: 222 | push {r4,r5,r14} 223 | bl unpackxyalign 224 | adds r0,r1 @ do addition 225 | bne 2f @ not in Inf-Inf case? 226 | cmp r2,#200 227 | blt 2f 228 | movs r0,#1 229 | lsls r0,#29 @ for Inf-Inf, set mantissa to +1 to prevent zero result 230 | 2: 231 | packret: @ common return point: "pack and return" 232 | bl packx 233 | pop {r4,r5,r15} 234 | 235 | @ signed multiply r0 1Q23 by r1 4Q23, result in r0 7Q25, sticky bits in r5 236 | @ trashes r3,r4 237 | mul0: 238 | uxth r3,r0 @ Q23 239 | asrs r4,r1,#16 @ Q7 240 | muls r3,r4 @ L*H, Q30 signed 241 | asrs r4,r0,#16 @ Q7 242 | uxth r5,r1 @ Q23 243 | muls r4,r5 @ H*L, Q30 signed 244 | adds r3,r4 @ sum of middle partial products 245 | uxth r4,r0 246 | muls r4,r5 @ L*L, Q46 unsigned 247 | lsls r5,r4,#16 @ initialise sticky bits from low half of low partial product 248 | lsrs r4,#16 @ Q25 249 | adds r3,r4 @ add high half of low partial product to sum of middle partial products 250 | @ (cannot generate carry by limits on input arguments) 251 | asrs r0,#16 @ Q7 252 | asrs r1,#16 @ Q7 253 | muls r0,r1 @ H*H, Q14 signed 254 | lsls r0,#11 @ high partial product Q25 255 | lsls r1,r3,#27 @ sticky 256 | orrs r5,r1 @ collect further sticky bits 257 | asrs r1,r3,#5 @ middle partial products Q25 258 | adds r0,r1 @ final result 259 | bx r14 260 | 261 | .thumb_func 262 | qfp_fcmp: 263 | movs r2,#1 @ initialise result 264 | lsls r3,r2,#31 @ r3=0x80000000 265 | tst r0,r3 @ check sign of first argument 266 | beq 1f 267 | subs r0,r3,r0 @ convert to 2's complement form for direct comparison 268 | 1: 269 | tst r1,r3 @ repeat for second argument 270 | beq 2f 271 | subs r1,r3,r1 272 | 2: 273 | subs r0,r1 @ perform comparison 274 | beq 4f @ equal? return 0 275 | bgt 3f @ r0>r1? return +1 276 | rsbs r2,#0 @ r0>20 Q7 365 | ldrb r4,[r4,r3] @ m=rcpapp[(y>>20)&7]; // Q8, .5>=21; // Q12 371 | muls r3,r4 @ s*=m; // Q28 372 | asrs r3,#12 @ s>>=12; // Q16 373 | subs r4,r3 @ m=m-s; // Q16 374 | 375 | mov r3,r4 @ s=y*m // Q39 second Newton-Raphson iteration 376 | muls r4,r0 @ ... 377 | asrs r4,#16 @ s>>=16; // Q23 378 | muls r4,r3 @ s*=m; // Q39 379 | lsls r3,#8 @ m<<=8; // Q24 380 | asrs r4,#15 @ s>>=15; // Q24 381 | subs r3,r4 @ m=m-s; // Q24 382 | 383 | lsls r4,r3,#7 @ \/ s=y*m; // Q47 third Newton-Raphson iteration 384 | muls r3,r0 @ /\ m<<=7; // Q31 385 | asrs r3,#15 @ s>>=15; // Q32 386 | lsrs r0,r4,#16 @ s*=(m>>16); // Q47 387 | muls r3,r0 @ ... 388 | asrs r3,#16 @ s>>=16; // Q31 389 | subs r0,r4,r3 @ m=m-s; // Q31 390 | div0: 391 | adds r0,#7 @ rounding; reduce systematic error 392 | lsrs r0,#4 @ Q27 393 | b fmul0 @ drop into multiplication code to calculate result 394 | 395 | @ The fast square root routine uses an initial approximation to the reciprocal of the square root of the argument based 396 | @ on the top four bits of the mantissa (possibly shifted one place to make the exponent even). It then performs three 397 | @ Newton-Raphson iterations, resulting in about 28-29 bits of accuracy. This reciprocal is then multiplied by 398 | @ the original argument to produce the result. 399 | @ Again, the fixed-point calculation is carefully implemented to preserve accuracy, and similar comments to those 400 | @ made above on the fast division routine apply. 401 | @ The reciprocal square root calculation has been tested for all possible (possibly shifted) input mantissa values. 402 | .thumb_func 403 | qfp_fsqrt_fast: 404 | push {r4,r5,r14} 405 | bl unpackx 406 | movs r1,r0 407 | bmi infret @ negative? return -Inf 408 | asrs r0,r2,#1 @ check LSB of exponent 409 | bcc 1f 410 | lsls r1,#1 @ was odd: double mantissa; mantissa y now 1..4 Q23 411 | 1: 412 | adds r2,#4 @ correction for packing 413 | adr r4,rsqrtapp-4@ first four table entries are never accessed because of the mantissa's leading 1 414 | lsrs r3,r1,#21 @ y>>21 Q2 415 | ldrb r4,[r4,r3] @ initial approximation to reciprocal square root m Q8 416 | 417 | lsrs r0,r1,#7 @ y>>7 // Q16 first Newton-Raphson iteration 418 | muls r0,r4 @ m*y 419 | muls r0,r4 @ s=m*y*y // Q32 420 | asrs r0,#12 @ s>>12 421 | muls r0,r4 @ m*s // Q28 422 | asrs r0,#13 @ m*s // Q15 423 | lsls r4,#8 @ m // Q16 424 | subs r4,r0 @ m=(m<<8)-(s>>13) // Q16-Q15/2 -> Q16 425 | 426 | mov r0,r4 @ // second Newton-Raphson iteration 427 | muls r0,r0 @ u=m*m // Q32 428 | lsrs r0,#16 @ u>>16 // Q16 429 | lsrs r3,r1,#7 @ y>>7 // Q16 430 | muls r0,r3 @ s=u*(y>>7) // Q32 431 | asrs r0,#12 @ s>>12 // Q20 432 | muls r0,r4 @ s*m // Q36 433 | asrs r0,#21 @ s*m // Q15 434 | subs r4,r0 @ m=m-s // Q16-Q15/2 435 | 436 | mov r0,r4 @ // third Newton-Raphson iteration 437 | muls r0,r0 @ u=m*m // Q32 438 | lsrs r3,r0,#12 @ now multiply u and y in two parts: u>>12 439 | muls r3,r1 @ first partial product (u>>12)*y Q43 440 | lsls r0,#20 441 | lsrs r0,#20 @ u&0xfff 442 | lsrs r5,r1,#12 @ y>>12 443 | muls r0,r5 @ second partial product (u&0xfff)*(y>>12) Q43 444 | add r0,r3 @ s=u*y // Q43 445 | asrs r0,#15 @ s>>15 // Q28 446 | muls r0,r4 @ (s>>15)*m // Q44 447 | lsls r4,#13 @ m<<13 // Q29 448 | asrs r0,#16 @ s>>16 // Q28 449 | subs r0,r4,r0 @ // Q29-Q28/2 450 | 451 | asrs r2,#1 @ halve exponent 452 | bcc div0 @ was y shifted? 453 | lsrs r0,#1 454 | lsls r1,#1 @ shift y back 455 | b div0 @ round and complete with multiplication 456 | 457 | .align 2 458 | 459 | @ round(2^15./[136:16:248]) 460 | rcpapp: 461 | .byte 0xf1,0xd8,0xc3,0xb2, 0xa4,0x98,0x8d,0x84 462 | 463 | @ round(sqrt(2^22./[72:16:248])) 464 | rsqrtapp: 465 | .byte 0xf1,0xda,0xc9,0xbb, 0xb0,0xa6,0x9e,0x97, 0x91,0x8b,0x86,0x82 466 | 467 | .endif 468 | 469 | .if include_conversions 470 | 471 | @ convert float to signed int, rounding towards -Inf, clamping 472 | .thumb_func 473 | qfp_float2int: 474 | movs r1,#0 @ fall through 475 | 476 | @ convert float in r0 to signed fixed point in r0, clamping 477 | .thumb_func 478 | qfp_float2fix: 479 | push {r4,r14} 480 | bl unpackx 481 | add r2,r1 @ incorporate binary point position into exponent 482 | subs r2,#23 @ r2 is now amount of left shift required 483 | blt 1f @ requires right shift? 484 | cmp r2,#7 @ overflow? 485 | ble 4f 486 | 3: @ overflow 487 | asrs r1,r0,#31 @ +ve:0 -ve:0xffffffff 488 | mvns r1,r1 @ +ve:0xffffffff -ve:0 489 | movs r0,#1 490 | lsls r0,#31 491 | 5: 492 | eors r0,r1 @ +ve:0x7fffffff -ve:0x80000000 (unsigned path: 0xffffffff) 493 | pop {r4,r15} 494 | 1: 495 | rsbs r2,#0 @ right shift for r0, >0 496 | cmp r2,#32 497 | blt 2f @ more than 32 bits of right shift? 498 | movs r2,#32 499 | 2: 500 | asrs r0,r0,r2 501 | pop {r4,r15} 502 | 503 | @ unsigned version 504 | .thumb_func 505 | qfp_float2uint: 506 | movs r1,#0 @ fall through 507 | .thumb_func 508 | qfp_float2ufix: 509 | push {r4,r14} 510 | bl unpackx 511 | add r2,r1 @ incorporate binary point position into exponent 512 | movs r1,r0 513 | bmi 5b @ negative? return zero 514 | subs r2,#23 @ r2 is now amount of left shift required 515 | blt 1b @ requires right shift? 516 | mvns r1,r0 @ ready to return 0xffffffff 517 | cmp r2,#8 @ overflow? 518 | bgt 5b 519 | 4: 520 | lsls r0,r0,r2 @ result fits, left shifted 521 | pop {r4,r15} 522 | 523 | @ convert signed int to float, rounding 524 | .thumb_func 525 | qfp_int2float: 526 | movs r1,#0 @ fall through 527 | 528 | @ convert signed fix to float, rounding; number of r0 bits after point in r1 529 | .thumb_func 530 | qfp_fix2float: 531 | push {r4,r5,r14} 532 | 1: 533 | movs r2,#29 534 | subs r2,r1 @ fix exponent 535 | packretns: @ pack and return, sticky bits=0 536 | movs r5,#0 537 | b packret 538 | 539 | @ unsigned version 540 | .thumb_func 541 | qfp_uint2float: 542 | movs r1,#0 @ fall through 543 | .thumb_func 544 | qfp_ufix2float: 545 | push {r4,r5,r14} 546 | cmp r0,#0 547 | bge 1b @ treat <2^31 as signed 548 | movs r2,#30 549 | subs r2,r1 @ fix exponent 550 | lsls r5,r0,#31 @ one sticky bit 551 | lsrs r0,#1 552 | b packret 553 | 554 | .endif 555 | 556 | .if include_scientific 557 | 558 | @ All the scientific functions are implemented using the CORDIC algorithm. For notation, 559 | @ details not explained in the comments below, and a good overall survey see 560 | @ "50 Years of CORDIC: Algorithms, Architectures, and Applications" by Meher et al., 561 | @ IEEE Transactions on Circuits and Systems Part I, Volume 56 Issue 9. 562 | 563 | @ Register use: 564 | @ r0: x 565 | @ r1: y 566 | @ r2: z/omega 567 | @ r3: coefficient pointer 568 | @ r4,r8: m 569 | @ r5: i (shift) 570 | 571 | cordic_start: @ initialisation 572 | mov r7,r8 573 | push {r7} 574 | movs r5,#0 @ initial shift=0 575 | mov r8,r4 576 | b 5f 577 | 578 | cordic_vstep: @ one step of algorithm in vector mode 579 | cmp r1,#0 @ check sign of y 580 | bgt 4f 581 | b 1f 582 | cordic_rstep: @ one step of algorithm in rotation mode 583 | cmp r2,#0 @ check sign of angle 584 | bge 1f 585 | 4: 586 | subs r1,r6 @ negative rotation: y=y-(x>>i) 587 | rsbs r7,#0 588 | adds r2,r4 @ accumulate angle 589 | b 2f 590 | 1: 591 | adds r1,r6 @ positive rotation: y=y+(x>>i) 592 | subs r2,r4 @ accumulate angle 593 | 2: 594 | mov r4,r8 595 | muls r7,r4 @ apply sign from m 596 | subs r0,r7 @ finish rotation: x=x{+/-}(y>>i) 597 | 5: 598 | ldr r4,[r3] @ fetch next angle from table 599 | adds r3,#4 @ bump pointer 600 | lsrs r4,#1 @ repeated angle? 601 | bcs 3f 602 | adds r5,#1 @ adjust shift if not 603 | 3: 604 | mov r6,r0 605 | asrs r6,r5 @ x>>i 606 | mov r7,r1 607 | asrs r7,r5 @ y>>i 608 | lsrs r4,#1 @ shift end flag into carry 609 | bx r14 610 | 611 | @ CORDIC rotation mode 612 | cordic_rot: 613 | push {r6,r7,r14} 614 | bl cordic_start @ initialise 615 | 1: 616 | bl cordic_rstep 617 | bcc 1b @ step until table finished 618 | asrs r6,r0,#14 @ remaining small rotations can be linearised: see IV.B of paper referenced above 619 | asrs r7,r1,#14 620 | asrs r2,#3 621 | muls r6,r2 @ all remaining CORDIC steps in a multiplication 622 | muls r7,r2 623 | mov r4,r8 624 | muls r7,r4 625 | asrs r6,#12 626 | asrs r7,#12 627 | subs r0,r7 @ x=x{+/-}(yz>>k) 628 | adds r1,r6 @ y=y+(xz>>k) 629 | cordic_exit: 630 | pop {r7} 631 | mov r8,r7 632 | pop {r6,r7,r15} 633 | 634 | @ CORDIC vector mode 635 | cordic_vec: 636 | push {r6,r7,r14} 637 | bl cordic_start @ initialise 638 | 1: 639 | bl cordic_vstep 640 | bcc 1b @ step until table finished 641 | 4: 642 | cmp r1,#0 @ continue as in cordic_vstep but without using table; x is not affected as y is small 643 | bgt 2f @ check sign of y 644 | adds r1,r6 @ positive rotation: y=y+(x>>i) 645 | subs r2,r4 @ accumulate angle 646 | b 3f 647 | 2: 648 | subs r1,r6 @ negative rotation: y=y-(x>>i) 649 | adds r2,r4 @ accumulate angle 650 | 3: 651 | asrs r6,#1 652 | asrs r4,#1 @ next "table entry" 653 | bne 4b 654 | b cordic_exit 655 | 656 | .thumb_func 657 | qfp_fsin: @ calculate sin and cos using CORDIC rotation method 658 | push {r4,r5,r14} 659 | movs r1,#24 660 | bl qfp_float2fix @ range reduction by repeated subtraction/addition in fixed point 661 | ldr r4,pi_q29 662 | lsrs r4,#4 @ 2pi Q24 663 | 1: 664 | subs r0,r4 665 | bge 1b 666 | 1: 667 | adds r0,r4 668 | bmi 1b @ now in range 0..2pi 669 | lsls r2,r0,#2 @ z Q26 670 | lsls r5,r4,#1 @ pi Q26 (r4=pi/2 Q26) 671 | ldr r0,=#0x136e9db4 @ initialise CORDIC x,y with scaling 672 | movs r1,#0 673 | 1: 674 | cmp r2,r4 @ >pi/2? 675 | blt 2f 676 | subs r2,r5 @ reduce range to -pi/2..pi/2 677 | rsbs r0,#0 @ rotate vector by pi 678 | b 1b 679 | 2: 680 | lsls r2,#3 @ Q29 681 | adr r3,tab_cc @ circular coefficients 682 | movs r4,#1 @ m=1 683 | bl cordic_rot 684 | adds r1,#9 @ fiddle factor to make sin(0)==0 685 | movs r2,#0 @ exponents to zero 686 | movs r3,#0 687 | movs r5,#0 @ no sticky bits 688 | bl packx @ pack cosine 689 | bl xchxy 690 | b packretns @ pack sine 691 | 692 | .thumb_func 693 | qfp_fcos: 694 | push {r14} 695 | bl qfp_fsin 696 | mov r0,r1 @ extract cosine result 697 | pop {r15} 698 | 699 | .thumb_func 700 | qfp_ftan: 701 | push {r4,r5,r14} 702 | bl qfp_fsin @ sine in r0/r2, cosine in r1/r3 703 | .if include_faster 704 | b fdiv_fast_n @ sin/cos 705 | .else 706 | b fdiv_n 707 | 708 | .endif 709 | 710 | .thumb_func 711 | qfp_fexp: @ calculate cosh and sinh using rotation method; add to obtain exp 712 | push {r4,r5,r14} 713 | movs r1,#24 714 | bl qfp_float2fix @ Q24: covers entire valid input range 715 | asrs r1,r0,#16 @ Q8 716 | ldr r2,=#5909 @ log_2(e) Q12 717 | muls r1,r2 @ estimate exponent of result Q20 718 | asrs r1,#19 @ Q1 719 | adds r1,#1 @ rounding 720 | asrs r1,#1 @ rounded estimate of exponent of result 721 | push {r1} @ save for later 722 | lsls r2,r0,#5 @ Q29 723 | ldr r0,=#0x162e42ff @ ln(2) Q29 724 | muls r1,r0 @ accurate contribution of estimated exponent 725 | subs r2,r1 @ residual to be exponentiated, approximately -.5..+.5 Q29 726 | ldr r0,=#0x2c9e15ca @ initialise CORDIC x,y with scaling 727 | movs r1,#0 728 | adr r3,tab_ch @ hyperbolic coefficients 729 | mvns r4,r1 @ m=-1 730 | bl cordic_rot @ calculate cosh and sinh 731 | add r0,r1 @ exp=cosh+sinh 732 | pop {r2} @ recover exponent 733 | b packretns @ pack result 734 | 735 | .thumb_func 736 | qfp_fsqrt: @ calculate sqrt and ln using vector method 737 | push {r4,r5,r14} 738 | bl unpackx 739 | movs r1,r0 @ -ve argument? 740 | bmi 3f @ return -Inf, -Inf 741 | ldr r1,=#0x0593C2B9 @ scale factor for CORDIC 742 | bl mul0 @ Q29 743 | asrs r1,r2,#1 @ halve exponent 744 | bcc 1f 745 | adds r1,#1 @ was odd: add 1 and shift mantissa 746 | asrs r0,#1 747 | 1: 748 | push {r1} @ save exponent/2 for later 749 | mov r1,r0 750 | ldr r3,=#0x0593C2B9 @ re-use constant 751 | lsls r3,#2 752 | adds r0,r3 @ "a+1" 753 | subs r1,r3 @ "a-1" 754 | movs r2,#0 755 | adr r3,tab_ch @ hyperbolic coefficients 756 | mvns r4,r2 @ m=-1 757 | bl cordic_vec 758 | mov r1,r2 @ keep ln result 759 | pop {r2} @ retrieve exponent/2 760 | 2: 761 | movs r3,r2 762 | b packretns @ pack sqrt result 763 | 764 | 3: 765 | movs r2,#255 766 | b 2b 767 | 768 | .thumb_func 769 | qfp_fln: 770 | push {r4,r5,r14} 771 | bl qfp_fsqrt @ get unpacked ln in r1/r3; exponent has been halved 772 | cmp r3,#70 @ ln(Inf)? 773 | bgt 2f @ return Inf 774 | rsbs r3,#0 775 | cmp r3,#70 776 | bgt 1f @ ln(0)? return -Inf 777 | 3: 778 | ldr r0,=#0x0162e430 @ ln(4) Q24 779 | muls r0,r3 @ contribution from negated, halved exponent 780 | adds r1,#8 @ round result of ln 781 | asrs r1,#4 @ Q24 782 | subs r0,r1,r0 @ add in contribution from (negated) exponent 783 | movs r2,#5 @ pack expects Q29 784 | b packretns 785 | 1: 786 | mvns r0,r0 @ make result -Inf 787 | 2: 788 | movs r2,#255 789 | b packretns 790 | 791 | .thumb_func 792 | qfp_fatan2: 793 | push {r4,r5,r14} 794 | bl unpackxyalign @ convert to fixed point (ensure common exponent, which is discarded) 795 | movs r2,#0 @ initial angle 796 | cmp r0,#0 @ x negative 797 | bge 5f 798 | rsbs r0,#0 @ rotate to 1st/4th quadrants 799 | rsbs r1,#0 800 | ldr r2,pi_q29 @ pi Q29 801 | 5: 802 | adr r3,tab_cc @ circular coefficients 803 | movs r4,#1 @ m=1 804 | bl cordic_vec @ also produces magnitude (with scaling factor 1.646760119), which is discarded 805 | mov r0,r2 @ result here is -pi/2..3pi/2 Q29 806 | ldr r2,pi_q29 @ pi Q29 807 | adds r4,r0,r2 @ attempt to fix -3pi/2..-pi case 808 | bcs 6f @ -pi/2..0? leave result as is 809 | subs r4,r0,r2 @ pi: take off 2pi 812 | 6: 813 | subs r0,#1 @ fiddle factor so atan2(0,1)==0 814 | movs r2,#0 @ exponent for pack 815 | b packretns 816 | 817 | .align 2 818 | .ltorg 819 | 820 | @ first entry in following table is pi Q29 821 | pi_q29: 822 | @ circular CORDIC coefficients: atan(2^-i), b0=flag for preventing shift, b1=flag for end of table 823 | tab_cc: 824 | .word 0x1921fb54*4+1 @ no shift before first iteration 825 | .word 0x0ed63383*4+0 826 | .word 0x07d6dd7e*4+0 827 | .word 0x03fab753*4+0 828 | .word 0x01ff55bb*4+0 829 | .word 0x00ffeaae*4+0 830 | .word 0x007ffd55*4+0 831 | .word 0x003fffab*4+0 832 | .word 0x001ffff5*4+0 833 | .word 0x000fffff*4+0 834 | .word 0x0007ffff*4+0 835 | .word 0x00040000*4+0 836 | .word 0x00020000*4+0+2 @ +2 marks end 837 | 838 | @ hyperbolic CORDIC coefficients: atanh(2^-i), flags as above 839 | tab_ch: 840 | .word 0x1193ea7b*4+0 841 | .word 0x1193ea7b*4+1 @ repeat i=1 842 | .word 0x082c577d*4+0 843 | .word 0x04056247*4+0 844 | .word 0x0200ab11*4+0 845 | .word 0x0200ab11*4+1 @ repeat i=4 846 | .word 0x01001559*4+0 847 | .word 0x008002ab*4+0 848 | .word 0x00400055*4+0 849 | .word 0x0020000b*4+0 850 | .word 0x00100001*4+0 851 | .word 0x00080001*4+0 852 | .word 0x00040000*4+0 853 | .word 0x00020000*4+0 854 | .word 0x00020000*4+1+2 @ repeat i=12 855 | 856 | .endif 857 | 858 | qfp_lib_end: 859 | -------------------------------------------------------------------------------- /qfplib.h: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Mark Owen 2 | // http://www.quinapalus.com 3 | // E-mail: qfp@quinapalus.com 4 | // 5 | // Thanks to Bill Westfield 6 | // 7 | // This file is free software: you can redistribute it and/or modify 8 | // it under the terms of version 2 of the GNU General Public License 9 | // as published by the Free Software Foundation. 10 | // 11 | // This file is distributed in the hope that it will be useful, 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | // GNU General Public License for more details. 15 | // 16 | // You should have received a copy of the GNU General Public License 17 | // along with this file. If not, see or 18 | // write to the Free Software Foundation, Inc., 51 Franklin Street, 19 | // Fifth Floor, Boston, MA 02110-1301, USA. 20 | 21 | #ifndef _QFPLIB_H_ 22 | #define _QFPLIB_H_ 23 | 24 | #ifdef __cplusplus 25 | extern "C" { 26 | #endif 27 | 28 | extern float qfp_fadd(float x,float y); 29 | extern float qfp_fsub(float x,float y); 30 | extern float qfp_fmul(float x,float y); 31 | extern float qfp_fdiv(float x,float y); 32 | extern float qfp_fdiv_fast(float x,float y); 33 | extern int qfp_float2int(float x); 34 | extern int qfp_float2fix(float x,int y); 35 | extern unsigned int qfp_float2uint(float x); 36 | extern unsigned int qfp_float2ufix(float x,int y); 37 | extern float qfp_int2float(int x); 38 | extern float qfp_fix2float(int x,int y); 39 | extern float qfp_uint2float(unsigned int x); 40 | extern float qfp_ufix2float(unsigned int x,int y); 41 | extern int qfp_fcmp(float x,float y); 42 | extern float qfp_fcos(float x); 43 | extern float qfp_fsin(float x); 44 | extern float qfp_ftan(float x); 45 | extern float qfp_fatan2(float y,float x); 46 | extern float qfp_fexp(float x); 47 | extern float qfp_fln(float x); 48 | extern float qfp_fsqrt(float x); 49 | extern float qfp_fsqrt_fast(float x); 50 | 51 | #ifdef __cplusplus 52 | } // extern "C" 53 | #endif 54 | #endif 55 | --------------------------------------------------------------------------------