├── .gitignore
├── CMakeLists.txt
├── LICENCE
├── README.md
├── build
    ├── qfpio.s.o
    └── qfplib.s.o
├── library.json
├── qfpio.S
├── qfpio.h
├── qfplib.S
└── qfplib.h


/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | 
3 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Build Qfplib, the ARM Cortex-M0 floating-point library
 2 | 
 3 | project(qfplib)
 4 | 
 5 | # enable verbose log. Must be after project()
 6 | # set(CMAKE_VERBOSE_MAKEFILE ON)
 7 | # set(ENV{VERBOSE} "1")
 8 | # message("VERBOSE: $ENV{VERBOSE}")
 9 | 
10 | # apply codal build settings
11 | include("${CODAL_UTILS_LOCATION}")
12 | 
13 | # CMAKE_CURRENT_SOURCE_DIR is codal/libraries/codal-libopencm3
14 | set(LIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
15 | 
16 | ###############################################################################
17 | # qfplib
18 | 
19 | set(QFPLIB_DIR "${LIB_DIR}")
20 | message("QFPLIB_DIR: ${QFPLIB_DIR}")
21 | 
22 | # find sources and headers
23 | set(TOP_LEVEL_INCLUDE_DIRS "${QFPLIB_DIR}")
24 | # RECURSIVE_FIND_FILE(TOP_LEVEL_SOURCE_FILES "${QFPLIB_DIR}" "*.S")
25 | set(
26 |     TOP_LEVEL_SOURCE_FILES
27 |     "qfpio.S"
28 |     "qfplib.S"
29 | )
30 | 
31 | # create library
32 | add_library(qfplib ${TOP_LEVEL_SOURCE_FILES})
33 | message("TOP_LEVEL_SOURCE_FILES: ${TOP_LEVEL_SOURCE_FILES}")
34 | 
35 | ###############################################################################
36 | # global settings
37 | 
38 | # target for STM32 Blue Pill (Arm Cortex-M3), even though code is for Arm Cortex-M0 (armv6-m)
39 | set(CMAKE_SYSTEM_PROCESSOR "armv7-m" PARENT_SCOPE)
40 | 
41 | # include directories
42 | include_directories(    
43 |     ${INCLUDE_DIRS} 
44 |     ${TOP_LEVEL_INCLUDE_DIRS}
45 |     ${LIB_DIR}
46 | )
47 | 
48 | # expose include directories to parent cmake
49 | target_include_directories(qfplib PUBLIC ${TOP_LEVEL_INCLUDE_DIRS})
50 | 
51 | # include all functions plus fast divide and square root. not required, all are selected by default.
52 | # set(
53 | #     CMAKE_ASM_FLAGS 
54 | #     "${CMAKE_ASM_FLAGS} -Dinclude_faster=1 -Dinclude_conversions=1 -Dinclude_scientific=1"
55 | # )
56 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
  1 | 		    GNU GENERAL PUBLIC LICENSE
  2 | 		       Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 | 			    Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 | 		    GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 | 			    NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 | 		     END OF TERMS AND CONDITIONS
281 | 
282 | 	    How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Qfplib: an ARM Cortex-M0 floating-point library in 1 kbyte
 2 | 
 3 | From: https://www.quinapalus.com/qfplib.html
 4 | 
 5 | Qfplib is open source, licensed under version 2 of the GNU GPL. A copy
 6 | of that licence is included in this archive. The archive also contains:
 7 | 
 8 | - qfplib.s, the source code to qfplib. The GNU assembler syntax is used.
 9 | 
10 | - qfplib.h, a C header file giving prototypes for the qfplib functions.
11 | 
12 | - qfpio.s, the source code to qfpio, routines for converting between
13 | strings and floating-point values.
14 | 
15 | - qfpio.h, a C header file giving prototypes for the qfpio functions.
16 | 
17 | Visit http://www.quinapalus.com/qfplib.html for more information.
18 | 


--------------------------------------------------------------------------------
/build/qfpio.s.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lupyuen/qfplib/db64866a734417a718af5de4fc419ef2f23f1536/build/qfpio.s.o


--------------------------------------------------------------------------------
/build/qfplib.s.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lupyuen/qfplib/db64866a734417a718af5de4fc419ef2f23f1536/build/qfplib.s.o


--------------------------------------------------------------------------------
/library.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "qfplib",
 3 |     "frameworks": "*",
 4 |     "platforms": "ststm32",
 5 |     "repository": {
 6 |         "type": "git",
 7 |         "url": "https://github.com/lupyuen/qfplib"
 8 |     },
 9 |     "description": "Stub for compiling qfplib under PlatformIO",
10 |     "build": {        
11 |         "unflags": [
12 |         ],
13 |         "flags": [
14 |         ],
15 |         "srcFilter": [
16 |             "+<./*.S>"
17 |         ]
18 |     }
19 | }


--------------------------------------------------------------------------------
/qfpio.S:
--------------------------------------------------------------------------------
  1 | @ Copyright 2015 Mark Owen
  2 | @ http://www.quinapalus.com
  3 | @ E-mail: qfp@quinapalus.com
  4 | @
  5 | @ This file is free software: you can redistribute it and/or modify
  6 | @ it under the terms of version 2 of the GNU General Public License
  7 | @ as published by the Free Software Foundation.
  8 | @
  9 | @ This file is distributed in the hope that it will be useful,
 10 | @ but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | @ GNU General Public License for more details.
 13 | @
 14 | @ You should have received a copy of the GNU General Public License
 15 | @ along with this file.  If not, see <http://www.gnu.org/licenses/> or
 16 | @ write to the Free Software Foundation, Inc., 51 Franklin Street,
 17 | @ Fifth Floor, Boston, MA  02110-1301, USA.
 18 | 
 19 | .syntax unified
 20 | .cpu cortex-m0
 21 | .thumb
 22 | 
 23 | @ exported symbols
 24 | 
 25 | .global qfp_float2str
 26 | .global qfp_str2float
 27 | 
 28 | @ C code in comments is intended to give an idea of the function
 29 | @ of the following assembler code. The translation is not exact.
 30 | 
 31 | @ // multiply by 128/125: used by conversions in both directions
 32 | @ unsigned int div125(unsigned int u) {
 33 | @   unsigned int a,b,c,k0=0x4189; // 0x4189~=128/125 Q14
 34 | @   a=u>>14;
 35 | @   a=a*k0;               // calculate first approximation to answer, good to about 14 bits
 36 | @   b=((a>>1)+(a>>2))>>4;
 37 | @   b=a-(b>>1)-(b&1);     // find error in approximation
 38 | @   c=(u-b)*k0;
 39 | @   return a+(c>>14)+1;   // result good to about 28 bits
 40 | @   }
 41 | 
 42 | div125:
 43 |  push {r1-r4,r14}
 44 |  ldr r4,=#0x4189  @ k0=0x4189;
 45 |  lsrs r1,r0,#14   @ a=u>>14;
 46 |  muls r1,r4       @ a=a*k0;
 47 |  lsrs r2,r1,#1    @ a>>1
 48 |  lsrs r3,r1,#2    @ a>>2
 49 |  add r2,r3        @ (a>>1)+(a>>2)
 50 |  lsrs r2,#4       @ b=((a>>1)+(a>>2))>>4;
 51 |  subs r0,r1       @ u-a
 52 |  lsrs r2,#1       @ b>>1
 53 |  adcs r0,r2       @ u-a+(b>>1)+(b&1)
 54 |  muls r0,r4       @ c=(u-b)*k0;
 55 |  lsrs r0,#14      @ c>>14
 56 |  add r0,r1        @ a+(c>>14)
 57 |  adds r0,#1       @ a+(c>>14)+1
 58 |  pop {r1-r4,r15}
 59 | 
 60 | .ltorg
 61 | 
 62 | opoint: @ output decimal point
 63 |  adds r5,#2
 64 |  movs r3,#'.'
 65 |  b och
 66 | ozero: @ output '0'
 67 |  movs r3,#0
 68 | odig:  @ output one digit from r3
 69 |  adds r3,#'0'
 70 | och:   @ output one character from r3
 71 |  strb r3,[r1]
 72 |  adds r1,#1
 73 |  bx r14
 74 | 
 75 | naninf: @ r4=0 for Inf, otherwise NaN
 76 |  ldr r3,=#0x00666e49 @ "fnI"
 77 |  cmp r4,#0
 78 |  beq 1f
 79 |  ldr r3,=#0x004e614e @ "NaN"
 80 | 1:
 81 |  bl och
 82 |  lsrs r3,#8
 83 |  bne 1b
 84 |  b 10f
 85 | 
 86 | @ fmt is format control word:
 87 | @ b7..b0: number of significant figures
 88 | @ b15..b8: -(minimum exponent printable in F format)
 89 | @ b23..b16: maximum exponent printable in F format-1
 90 | @ b24: output positive mantissas with ' '
 91 | @ b25: output positive mantissas with '+'
 92 | @ b26: output positive exponents with ' '
 93 | @ b27: output positive exponents with '+'
 94 | @ b28: suppress traling zeros in fraction
 95 | @ b29: fixed-point output: b7..0 give number of decimal places
 96 | @ default: 0x18060406
 97 | @ Note that if b28 is set (as it is in the default format value) the code will
 98 | @ write the trailing decimal point and zeros to the output buffer before truncating
 99 | @ the string. Thus it is essential that the output buffer is large enough to accommodate
100 | @ these characters temporarily.
101 | @
102 | @ Overall accuracy is sufficient to print all exactly-representable integers up to 10^8 correctly
103 | @ in 0x18160408 format.
104 | @ 
105 | @ void float2str(float f,char*s,unsigned int fmt) {
106 | 
107 | qfp_float2str:
108 |  push {r4-r7,r14}
109 | 
110 | @   if(fmt==0) fmt=0x18060406; // default format
111 | 
112 |  cmp r2,#0
113 |  bne 1f
114 |  ldr r2,=#0x18060406
115 | 1:
116 | 
117 | @   i=*(int*)&f;
118 | @   if(i&0x80000000) { // output sign of mantissa
119 | @     *p++='-';
120 | @     i&=0x7fffffff;
121 | @   } else {
122 | @     if(fmt&0x01000000) *p++=' ';
123 | @     else if(fmt&0x02000000) *p++='+';
124 | @     }
125 | 
126 |  movs r3,#'-'
127 |  lsls r0,#1
128 |  bcs 2f
129 |  movs r3,#' '
130 |  lsrs r4,r2,#25
131 |  bcs 2f
132 |  movs r3,#'+'
133 |  lsrs r4,r2,#26
134 |  bcc 3f
135 | 2:
136 |  bl och
137 | 3:
138 | 
139 | @   e2=(i>>23)-127; // get binary exponent e2
140 | 
141 |  movs r4,#0
142 |  lsrs r3,r0,#24
143 |  beq 1f  @ treat zero case specially
144 |  subs r3,#127
145 | 
146 | @   m=((i&0x7fffff)|0x800000)<<8; // get mantissa, restore implied 1, make Q31
147 | 
148 |  lsls r4,r0,#8
149 |  cmp r3,#128
150 |  beq naninf @ handle NaN/Inf cases
151 |  adds r4,#1
152 | 
153 | @   if(e2==-127) {e2=0; m=0;} // flush denormals to zero
154 | 
155 |  movs r5,#1
156 |  rors r4,r5
157 | 1:
158 |  movs r0,r4
159 | 
160 | @ now binary exponent e2 in r3, mantissa in r0
161 | 
162 | @   e10=0;  // decimal exponent
163 | @ overall plan is to manipulate m, e2 and e10 so as to take e2 to zero, while maintaining the
164 | @ invariant m * 2^e2 * 10^e10
165 | 
166 |  movs r4,#0
167 | 
168 | @   while(e2>0) { // add 3 to e10, take 10 off e2, multiply m by 1024/1000=128/125
169 | @     if(m>=0xf0000000) m>>=1,e2++;
170 | @     m=div125(m);
171 | @     e2-=10;
172 | @     e10+=3;
173 | @     } // now e2<=0
174 | 
175 |  b 2f
176 | 1:
177 |  lsrs r5,r0,#28
178 |  cmp r5,#0x0f
179 |  blo 3f
180 |  lsrs r0,#1
181 |  adds r3,#1
182 | 3:
183 |  bl div125
184 |  subs r3,#10
185 |  adds r4,#3
186 | 2:
187 |  cmp r3,#0
188 |  bgt 1b
189 | 
190 | @   while(e2<=-10) { // take 3 off e10, add 10 to e2, multiply m by 1000/1024=125/128
191 | @     m0=(m>>5)+(m>>6);
192 | @     m-=(m0>>1)+(m0&1); // *125/128, more accurate than using multiply instruction
193 | @     e2+=10;
194 | @     e10-=3;
195 | @     } // now -10 < e2 <= 0
196 | 
197 |  b 2f
198 | 1:
199 |  lsrs r5,r0,#5
200 |  lsrs r6,r0,#6
201 |  add r5,r6
202 |  movs r6,#0
203 |  lsrs r5,#1
204 |  adcs r5,r6
205 |  subs r0,r5
206 |  subs r4,#3
207 | 2:
208 |  adds r3,#10
209 |  ble 1b
210 |  subs r3,#10
211 | 
212 | @   m>>=1; // Q30; make sure m will not overflow
213 | 
214 |  lsrs r0,#1
215 | 
216 | @   while(e2<=-3) { // take 1 off e10, add 3 to e2, multiply m by 10/8
217 | @     m0=m>>1;
218 | @     m+=(m0>>1)+(m0&1); // *10/8
219 | @     e2+=3;
220 | @     e10--;
221 | @     } // now -3 < e2 <=0
222 | 
223 |  b 2f
224 | 1:
225 |  lsrs r5,r0,#1
226 |  lsrs r5,#1
227 |  adcs r0,r5
228 |  subs r4,#1
229 | 2:
230 |  adds r3,#3
231 |  ble 1b
232 |  subs r3,#3
233 | 
234 | @   while(e2<0) { // add 1 to e2, halve m
235 | @     m>>=1; // *1/2
236 | @     e2++;
237 | @     } // now e2==0
238 | 
239 |  b 2f
240 | 1:
241 |  lsrs r0,#1
242 | 2:
243 |  adds r3,#1
244 |  ble 1b
245 |  subs r3,#1
246 | 
247 | @   if(m>=0x40000000) m>>=2; // convert Q30 to Q28
248 | @   else {
249 | @     m=(m<<1)+(m>>1)+(m&1); // multiply by 10 (maintaining accuracy) if result will not overflow, compensate e10
250 | @     e10--;
251 | @     }
252 | 
253 |  lsrs r5,r0,#30
254 |  beq 1f
255 |  lsrs r0,#2
256 |  b 2f
257 | 1:
258 |  lsls r5,r0,#1
259 |  lsrs r0,#1
260 |  adcs r0,r5
261 |  subs r4,#1
262 | 2:
263 | 
264 | @ now all of binary exponent has been transferred to decimal exponent
265 | @ we have 
266 | @ r0: mantissa m, Q28, 1<=m<10
267 | @ r1: output pointer
268 | @ r2: format
269 | @ r3: 0 (was binary exponent)
270 | @ r4: decimal exponent e10
271 | 
272 | @   sf=fmt&0xff; // number of significant figures
273 | 
274 |  uxtb r3,r2 @ e2 is no longer used
275 | 
276 | @   ff=0; // flag to indicate that output is in "F" format (i.e., will not use "E" notation)
277 | 
278 |  movs r5,#0
279 | 
280 | @   d0=e10; // first digit output has significance 10^d0 wrt output '.'
281 | @   d1=d0-sf; // last digit output has significance 10^(d1+1) wrt output '.'
282 | 
283 |  movs r6,r4
284 |  subs r7,r6,r3
285 | 
286 | @ r0: mantissa m, Q28, 1<=m<10
287 | @ r1: output pointer
288 | @ r2: format
289 | @ r3: sf
290 | @ r4: decimal exponent e10
291 | @ r5b0: ff
292 | @ r6: d0
293 | @ r7: d1
294 | 
295 | @   if(fmt&0x20000000) { // forced "F" output format?
296 | @     d1=-(fmt&0xff)-1;
297 | @     sf=d0-d1;
298 | @     ff=1;
299 | @     }
300 | 
301 |  push {r1,r2}
302 |  lsrs r1,r2,#30
303 |  bcc 1f
304 |  mvns r7,r3
305 |  subs r3,r6,r7
306 |  movs r5,#1
307 | 1:
308 | 
309 | @   m0=0x08000000; // 0.5 Q28
310 | @   for(i=1;i<sf;i++) { // calculate amount to add to m for decimal rounding
311 | @     m0+=m0>>1; // multiply by 0.1
312 | @     m0+=m0>>4;
313 | @     m0+=m0>>8;
314 | @     m0+=m0>>16;
315 | @     m0>>=4;
316 | @     }
317 | @   m+=m0; // rounding
318 | 
319 |  push {r3}
320 |  movs r1,#8
321 |  lsls r1,#24
322 | 2:
323 |  subs r3,#1
324 |  ble 1f
325 |  lsrs r2,r1,#1
326 |  add r1,r2
327 |  lsrs r2,r1,#4
328 |  add r1,r2
329 |  lsrs r2,r1,#8
330 |  add r1,r2
331 |  lsrs r2,r1,#16
332 |  add r1,r2
333 |  lsrs r1,#4
334 |  b 2b
335 | 1:
336 |  add r0,r1
337 |  pop {r3}
338 | 
339 | @   if(m>=0xa0000000) { // has rounding pushed m to 10 (Q28)? if so, set to 1 and increment decimal exponent
340 | @     m=0x10000000;
341 | @     e10++;
342 | @     d0++;
343 | @     if((fmt&0x20000000)==0) d1++;
344 | @     }
345 | 
346 |  lsrs r1,r0,#28
347 |  cmp r1,#0x0a
348 |  pop {r1,r2}
349 |  blo 1f
350 |  lsrs r0,r2,#30
351 |  bcs 2f
352 |  adds r7,#1
353 | 2:
354 |  movs r0,#0x10
355 |  lsls r0,#24
356 |  adds r4,#1
357 |  adds r6,#1
358 | 1:
359 | 
360 | @   if(d0>=-(int)((fmt>>8)&0xff)&&d0<(int)((fmt>>16)&0xff)) ff=1; // in range for F format?
361 | 
362 |  push {r4}
363 |  lsrs r4,r2,#8
364 |  uxtb r4,r4
365 |  adds r4,r6
366 |  blt 1f
367 |  lsrs r4,r2,#16
368 |  uxtb r4,r4
369 |  cmp r6,r4
370 |  bge 1f
371 |  movs r5,#1
372 | 1:
373 | 
374 | @   if(!ff) d0=0,d1=-sf; // for E format we have one digit before the decimal point
375 | 
376 |  cmp r5,#0
377 |  bne 1f
378 |  movs r6,#0
379 |  rsbs r7,r3,#0
380 | 1:
381 | 
382 | @ sf (r3) no longer used
383 | 
384 | @   f0=0; // flag to indicate whether we have we output a '.'
385 | 
386 | @ f0 in r5b1
387 | 
388 | @   if(d0<0) *p++='0',*p++='.',f0=1,i=-1; // value <1, so output "0."
389 | @   else i=d0;
390 | 
391 |  mov r4,r6
392 |  cmp r6,#0
393 |  bge 1f
394 |  bl ozero
395 |  bl opoint
396 |  movs r4,#0
397 |  mvns r4,r4
398 | 1:
399 | 
400 | @   while(i>d0&&i>d1) *p++='0',i--; // output leading zeros before significand as necessary
401 | 
402 | 2:
403 |  cmp r4,r6
404 |  ble 1f
405 |  cmp r4,r7
406 |  ble 1f
407 |  bl ozero
408 |  subs r4,#1
409 |  b 2b
410 | 1:
411 | 
412 | @ d0 (r6) no longer used
413 | 
414 | @   for(;i>d1;i--) {          // now output digits of significand
415 | @     *p++='0'+(m>>28);       // output integer part of Q28 value
416 | @     m&=0x0fffffff;          // fractional part of Q28 value
417 | @     m=(m<<1)+(m<<3);        // multiply by 10
418 | @     if(i==0) *p++='.',f0=1; // output decimal point as significance goes through 10^0
419 | @     }
420 | 
421 | 2:
422 |  cmp r4,r7
423 |  ble 1f
424 |  lsrs r3,r0,#28
425 |  bl odig
426 |  lsls r0,#4
427 |  lsrs r0,#1
428 |  lsrs r3,r0,#2
429 |  add r0,r3
430 |  subs r4,#1
431 |  bcs 2b
432 |  bl opoint
433 |  b 2b
434 | 1:
435 | 
436 | @ m (r0) no longer used
437 | @ d1 (r7) no longer used
438 | 
439 | @   for(;i>=0;i--) *p++='0'; // output remaining zeros of integer part
440 | 
441 | 2:
442 |  cmp r4,#0
443 |  blt 1f
444 |  bl ozero
445 |  subs r4,#1
446 |  b 2b
447 | 1:
448 | 
449 | @ i (r4) no longer used
450 | 
451 | @   if(f0) { // remove trailing zeros and decimal point?
452 | @     if(fmt&0x10000000) while(p[-1]=='0') p--;
453 | @     if(p[-1]=='.') p--;
454 | @     *p=0;
455 | @     }
456 | 
457 |  lsrs r4,r5,#2
458 |  bcc 1f
459 |  lsrs r4,r2,#29
460 |  bcc 2f
461 | 3:
462 |  subs r1,#1
463 |  ldrb r4,[r1]
464 |  cmp r4,#'0'
465 |  beq 3b
466 |  adds r1,#1
467 | 2:
468 |  subs r1,#1
469 |  ldrb r4,[r1]
470 |  cmp r4,#'.'
471 |  beq 4f
472 |  adds r1,#1
473 | 4:
474 | 1:
475 |  pop {r4}
476 | 
477 | @ now:
478 | @ r0
479 | @ r1: output pointer
480 | @ r2: format
481 | @ r3
482 | @ r4: decimal exponent e10
483 | @ r5b0: ff
484 | @ r6:
485 | @ r7:
486 | 
487 | @   if(!ff) { // output exponent?
488 | 
489 |  lsrs r5,#1
490 |  bcs 10f
491 | 
492 | @     *p++='E';
493 | 
494 |  movs r3,#'E'
495 |  bl och
496 | 
497 | @     if(e10<0) *p++='-',e10=-e10; // output exponent sign
498 | @     else {
499 | @            if(fmt&0x04000000) *p++=' ';
500 | @       else if(fmt&0x08000000) *p++='+';
501 | @       }
502 | 
503 |  cmp r4,#0
504 |  bge 2f
505 |  rsbs r4,#0
506 |  movs r3,#'-'
507 |  b 3f
508 | 2:
509 |  movs r3,#' '
510 |  lsrs r6,r2,#27
511 |  bcs 3f
512 |  movs r3,#'+'
513 |  lsrs r6,r2,#28
514 |  bcc 4f
515 | 3:
516 |  bl och
517 | 4:
518 | 
519 | @     m=(e10*0xcd)>>11; // tens digit of exponent
520 | @     *p++='0'+m;
521 | @     e10-=m*10;        // units digit of exponent
522 | @     *p++='0'+e10;
523 | 
524 |  movs r3,#0xcd
525 |  muls r3,r4
526 |  lsrs r3,#11
527 |  movs r0,#10
528 |  muls r0,r3
529 |  bl odig
530 |  subs r3,r4,r0
531 |  bl odig
532 | 
533 | @     }
534 | 
535 | 10:
536 | 
537 | @   *p++=0;
538 | 
539 |  movs r3,#0
540 |  bl och
541 | 
542 | @   }
543 | 
544 |  pop {r4-r7,r15}
545 | 
546 | 
547 | 
548 | 
549 | 
550 | @ Convert string pointed to by p into float, stored at f. On failure
551 | @ return 1; on success, return 0 and store pointer to first non-converted
552 | @ character at endptr if endptr!=0.
553 | 
554 | @ #define ISDIG(x) ((x)>='0'&&(x)<='9')
555 | 
556 | isdig: @ convert ASCII to digit
557 |  subs r2,#'0'
558 |  cmp r2,#10 @ clear carry if digit
559 |  bx r14
560 | 
561 | @ int str2float(float*f,char*p,char**endptr) {
562 | 
563 | qfp_str2float:
564 | 
565 | @   if(*p=='+') p++;
566 | @   else if(*p=='-') sm=0x80000000,p++; // capture mantissa sign
567 | 
568 |  push {r0,r2,r4-r7,r14}
569 |  movs r7,#0
570 |  ldrb r2,[r1]
571 |  cmp r2,#'+'
572 |  beq 1f
573 |  cmp r2,#'-'
574 |  bne 2f
575 |  movs r7,#1
576 | 1:
577 |  adds r1,#1
578 | 2:
579 |  movs r0,#0 @ mantissa
580 |  movs r3,#0 @ f0: have we seen a '.'?
581 |  movs r5,#0 @ exponent
582 |  movs r6,#0 @ count of mantissa digits processed
583 | 
584 | @ r0: m
585 | @ r1: input pointer
586 | @ r3: f0
587 | @ r5: e
588 | @ r6: d
589 | @ r7b0: sm
590 | @ stack: output pointer, end pointer
591 | 
592 | @   for(;;) {
593 | @     if(f0==0&&*p=='.') {f0=1; p++; continue;}
594 | @     if(!ISDIG(*p)) goto l0; // break out on non-digit
595 | @     if(m<0x10000000) { // accumulate digits (up to about 8 significant figures)
596 | @       m=m*10+*p-'0';
597 | @       if(f0==1) e--; // decrement exponent if we are past the decimal point
598 | @     } else if(f0==0) e++; // just increment exponent after we have captured enough significance in m
599 | @     d++;
600 | @     p++;
601 | @     }
602 | @ l0:
603 | 
604 | 2:
605 |  ldrb r2,[r1]
606 |  cmp r2,#'.'
607 |  bne 1f
608 |  cmp r3,#0
609 |  bne 1f
610 |  movs r3,#1
611 |  b 3f
612 | 1:
613 |  bl isdig
614 |  bcs 4f
615 |  lsrs r4,r0,#28
616 |  bne 5f
617 |  movs r4,#10
618 |  muls r0,r4
619 |  add r0,r2
620 |  subs r5,#1
621 | 5:
622 |  adds r5,#1
623 |  subs r5,r3
624 |  adds r6,#1
625 | 3:
626 |  adds r1,#1
627 |  b 2b
628 | 4:
629 | 
630 | @   if(d==0) return 1; // no digits seen: error
631 | 
632 |  cmp r6,#0
633 |  bne 1f
634 |  movs r0,#1
635 |  pop {r2-r7,r15}
636 | 
637 | @ f0 (r3) no longer used
638 | @ d (r6) no longer used
639 | 
640 | @   e10=0; // decimal exponent
641 | 
642 | 1:
643 |  movs r3,#0
644 | 
645 | @   if(*p=='e'||*p=='E') { // exponent given?
646 | @     se=0;
647 | @     p++;
648 | @     if(*p=='+') p++; 
649 | @     else if(*p=='-') se=1,p++; // capture exponent sign
650 | @     while(ISDIG(*p)) { // capture exponent digits
651 | @       if(e10<0x01000000) e10=e10*10+*p-'0'; // prevent overflow
652 | @       p++;
653 | @       }
654 | @     if(se) e10=-e10; // apply exponent sign
655 | @     }
656 | 
657 |  mov r6,r1 @ save r1
658 |  ldrb r2,[r1]
659 |  cmp r2,#'e'
660 |  beq 1f
661 |  cmp r2,#'E'
662 |  bne 2f
663 | 1:
664 |  adds r1,#1
665 |  ldrb r2,[r1]
666 |  cmp r2,#'+'
667 |  beq 3f
668 |  cmp r2,#'-'
669 |  bne 4f
670 |  adds r7,#2 @ se in r7b1
671 | 3:
672 |  adds r1,#1
673 |  ldrb r2,[r1]
674 | 4:
675 |  bl isdig
676 |  bcc 6f
677 |  mov r1,r6 @ E without following digits: restore r1
678 |  b 2f
679 | 6:
680 |  lsrs r4,r3,#24
681 |  bne 5f
682 |  movs r4,#10
683 |  muls r3,r4
684 |  add r3,r2
685 | 5:
686 |  adds r1,#1
687 |  ldrb r2,[r1]
688 |  bl isdig
689 |  bcc 6b
690 |  cmp r7,#2
691 |  blo 2f
692 |  rsbs r3,#0
693 | 2:
694 | 
695 | @   if(m==0) goto l2; // zero? then we have finished
696 | 
697 |  movs r2,#0
698 |  cmp r0,#0
699 |  beq 11f
700 | 
701 | @   e10+=e; // offset e by captured exponent
702 | @   if(e10> 127) e10=127; // clip overflows: 10^127 will be converted later to Inf, 10^-128 to zero
703 | @   if(e10<-128) e10=-128;
704 | 
705 |  add r3,r5
706 |  lsls r4,r3,#2 @ temporarily set e2 to e10*4: this will cause subsequent conversion to Inf/zero if required
707 |  sxtb r5,r3
708 |  cmp r5,r3
709 |  bne 12f @ not equal to its sign-extended version?
710 | 
711 | @ e (r5) no longer used
712 | 
713 | @ r0: m
714 | @ r1: input pointer
715 | @ r3: e10
716 | @ r7b0: sm
717 | @ stack: output pointer, end pointer
718 | 
719 | @   e2=31; // binary exponent
720 | @ plan is to manipulate m, e2 and e10 so as to take e10 to zero, while maintaining the
721 | @ invariant m * 2^e2 * 10^e10
722 | 
723 |  movs r4,#31
724 | 
725 | @   while(m<0x40000000) m+=m,e2--; // normalise so m is now 0x40000000..0xa0000000
726 | 
727 | 2:
728 |  lsrs r2,r0,#30
729 |  bne 1f
730 |  lsls r0,#1
731 |  subs r4,#1
732 |  b 2b
733 | 1:
734 | 
735 | @   while(e10<0) { // add 3 to e10, take 10 off e2 and multiply m by 1024/1000=128/125
736 | @     m=div125(m);
737 | @     e10+=3; e2-=10;
738 | @     if(m>=0x80000000) m>>=1,e2++;
739 | @     } // now e10 >= 0
740 | 
741 | 2:
742 |  cmp r3,#0
743 |  bge 1f
744 |  bl div125
745 |  adds r3,#3
746 |  subs r4,#10
747 |  lsrs r2,r0,#31
748 |  beq 2b
749 |  lsrs r0,#1
750 |  adds r4,#1
751 |  b 2b
752 | 1:
753 | 
754 | @   while(e10>2) { // take 3 off e10, add 10 to e2 and multiply m by 1000/1024=125/128
755 | @     m0=(m>>6)+(m>>5);
756 | @     m-=(m0>>1)+(m0&1); // *125/128
757 | @     e10-=3; e2+=10;
758 | @     } // now 0 <= e10 < 3
759 | 
760 | 2:
761 |  cmp r3,#2
762 |  ble 1f
763 |  lsrs r2,r0,#6
764 |  lsrs r5,r0,#5
765 |  add r2,r5
766 |  movs r5,#0
767 |  lsrs r2,#1
768 |  adcs r2,r5
769 |  subs r0,r2
770 |  subs r3,#3
771 |  adds r4,#10
772 |  b 2b
773 | 1:
774 | 
775 | @   while(e10>0) { // take 1off e10, add 3 to e2 and multiply m by 10/8 = 5/4
776 | @     m0=(m>>1);
777 | @     m+=(m0>>1)+(m0&1); // *5/4
778 | @     e10-=1; e2+=3;
779 | @     } // now e10==0
780 | 
781 | 2:
782 |  cmp r3,#0
783 |  ble 1f
784 |  lsrs r2,r0,#1
785 |  lsrs r2,#1
786 |  adcs r0,r2
787 |  subs r3,#1
788 |  adds r4,#3
789 |  b 2b
790 | 1:
791 | 
792 | @ e10 (r3) no longer used
793 | 
794 | @   while(m<0x80000000) m+=m,e2--; // renormalise m so MSB is set
795 | 
796 |  cmp r0,#0
797 |  blt 1f
798 | 2:
799 |  subs r4,#1
800 |  adds r0,r0
801 |  bpl 2b
802 | 1:
803 | 
804 | @   m=((m>>7)+1)>>1; // to 24 bits, with rounding
805 | 
806 |  lsrs r0,#7
807 |  adds r0,#1
808 |  lsrs r0,#1
809 | 
810 | @   if(m==0x01000000) m>>=1,e2++; // has rounding pushed m to 25 bits? renormalise if so
811 | 
812 |  lsrs r2,r0,#24
813 |  beq 1f
814 |  lsrs r0,#1
815 |  adds r4,#1
816 | 1:
817 | 
818 | @   e2+=127; // add exponent offset
819 | 
820 | 12:
821 |  movs r2,#0
822 |  movs r3,#0
823 |  adds r4,#127
824 | 
825 | @   if(e2<=0) {m=0; goto l1;} // too small? flush to zero
826 | 
827 |  ble 10f
828 | 
829 | @   if(e2>=255) {m=0x7f800000; goto l1;} // too big? make infinity
830 | 
831 |  movs r3,#255
832 |  cmp r4,#255
833 |  bge 10f
834 | 
835 | @   m&=0x007fffff; // remove implied 1
836 | 
837 |  lsls r2,r0,#9
838 |  lsrs r2,#9
839 |  mov r3,r4
840 | 
841 | @   m|=e2<<23; // insert exponent bits
842 | 
843 | 10:
844 |  lsls r3,#23
845 |  orrs r2,r3
846 | 
847 | @   m|=sm; // apply mantissa sign
848 | 
849 | 11:
850 |  lsls r7,#31
851 |  orrs r2,r7
852 | 
853 | @   *f=*(float*)&m; // write output
854 | @   if(end) *end=p;
855 | 
856 |  pop {r0,r3}
857 |  str r2,[r0]
858 |  cmp r3,#0
859 |  beq 1f
860 |  str r1,[r3]
861 | 1:
862 | 
863 | @   return 0;
864 | 
865 |  movs r0,#0
866 |  pop {r4-r7,r15}
867 | 
868 | @   }
869 | 


--------------------------------------------------------------------------------
/qfpio.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 Mark Owen
 2 | // http://www.quinapalus.com
 3 | // E-mail: qfp@quinapalus.com
 4 | //
 5 | // This file is free software: you can redistribute it and/or modify
 6 | // it under the terms of version 2 of the GNU General Public License
 7 | // as published by the Free Software Foundation.
 8 | //
 9 | // This file is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | // GNU General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU General Public License
15 | // along with this file.  If not, see <http://www.gnu.org/licenses/> or
16 | // write to the Free Software Foundation, Inc., 51 Franklin Street,
17 | // Fifth Floor, Boston, MA  02110-1301, USA.
18 | 
19 | #ifndef _QFPIO_H_
20 | #define _QFPIO_H_
21 | 
22 | #ifdef __cplusplus
23 |   extern "C" {
24 | #endif
25 | 
26 | extern void qfp_float2str(float f,char*s,unsigned int fmt);
27 | extern int qfp_str2float(float*f,char*p,char**endptr);
28 | 
29 | #ifdef __cplusplus
30 |   } // extern "C"
31 | #endif
32 | #endif
33 | 


--------------------------------------------------------------------------------
/qfplib.S:
--------------------------------------------------------------------------------
  1 | @ Copyright 2015 Mark Owen
  2 | @ http://www.quinapalus.com
  3 | @ E-mail: qfp@quinapalus.com
  4 | @
  5 | @ This file is free software: you can redistribute it and/or modify
  6 | @ it under the terms of version 2 of the GNU General Public License
  7 | @ as published by the Free Software Foundation.
  8 | @
  9 | @ This file is distributed in the hope that it will be useful,
 10 | @ but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | @ GNU General Public License for more details.
 13 | @
 14 | @ You should have received a copy of the GNU General Public License
 15 | @ along with this file.  If not, see <http://www.gnu.org/licenses/> or
 16 | @ write to the Free Software Foundation, Inc., 51 Franklin Street,
 17 | @ Fifth Floor, Boston, MA  02110-1301, USA.
 18 | 
 19 | @.equ include_faster,0        @ include fast divide and square root?
 20 | @.equ include_conversions,1   @ include float <-> fixed point conversion functions?
 21 | @.equ include_scientific,1    @ include trignometic, exponential etc. functions?
 22 | 
 23 | .ifndef include_faster
 24 | .equ include_faster,1
 25 | .endif
 26 | 
 27 | .ifndef include_conversions
 28 | .equ include_conversions,1
 29 | .endif
 30 | 
 31 | .ifndef include_scientific
 32 | .equ include_scientific,1
 33 | .endif
 34 | 
 35 | .if include_scientific
 36 | .equ include_conversions,1
 37 | .endif
 38 | 
 39 | .syntax unified
 40 | .cpu cortex-m0
 41 | .thumb
 42 | 
 43 | @ exported symbols
 44 | 
 45 | .global qfp_fadd
 46 | .global qfp_fsub
 47 | .global qfp_fmul
 48 | .global qfp_fdiv
 49 | .global qfp_fcmp
 50 | .if include_conversions
 51 | .global qfp_float2int
 52 | .global qfp_float2fix
 53 | .global qfp_float2uint
 54 | .global qfp_float2ufix
 55 | .global qfp_int2float
 56 | .global qfp_fix2float
 57 | .global qfp_uint2float
 58 | .global qfp_ufix2float
 59 | .endif
 60 | .if include_scientific
 61 | .global qfp_fcos
 62 | .global qfp_fsin
 63 | .global qfp_ftan
 64 | .global qfp_fatan2
 65 | .global qfp_fexp
 66 | .global qfp_fln
 67 | .global qfp_fsqrt
 68 | .endif
 69 | 
 70 | .if include_faster
 71 | .global qfp_fdiv_fast
 72 | .global qfp_fsqrt_fast
 73 | .endif
 74 | 
 75 | @ exchange r0<->r1, r2<->r3
 76 | xchxy:
 77 |  push {r0,r2,r14}
 78 |  mov r0,r1
 79 |  mov r2,r3
 80 |  pop {r1,r3,r15}
 81 | 
 82 | @ IEEE single precision floats in r0,r1-> mantissae in r1,r0 exponents in r3,r2 *respectively*
 83 | @ trashes r4
 84 | unpackxy:
 85 |  push {r14}
 86 |  bl unpackx
 87 |  bl xchxy
 88 |  pop {r4}
 89 |  mov r14,r4
 90 | 
 91 | @ IEEE single in r0-> signed (two's complemennt) mantissa in r0 9Q23 (24 significant bits), signed exponent (bias removed) in r2
 92 | @ trashes r4; zero, denormal -> mantissa=+/-1, exponent=-380; Inf, NaN -> mantissa=+/-1, exponent=+640
 93 | unpackx:
 94 |  lsrs r2,r0,#23 @ save exponent and sign
 95 |  lsls r0,#9     @ extract mantissa
 96 |  lsrs r0,#9
 97 |  movs r4,#1
 98 |  lsls r4,#23
 99 |  orrs r0,r4     @ reinstate implied leading 1
100 |  cmp r2,#255    @ test sign bit
101 |  uxtb r2,r2     @ clear it
102 |  bls 1f         @ branch on positive
103 |  rsbs r0,#0     @ negate mantissa
104 | 1:
105 |  subs r2,#1
106 |  cmp r2,#254    @ zero/denormal/Inf/NaN?
107 |  bhs 2f
108 |  subs r2,#126   @ remove exponent bias: can now be -126..+127
109 |  bx r14
110 | 
111 | 2:              @ here with special-case values
112 |  cmp r0,#0
113 |  mov r0,r4      @ set mantissa to +1
114 |  bpl 3f
115 |  rsbs r0,#0     @ zero/denormal/Inf/NaN: mantissa=+/-1
116 | 3:
117 |  subs r2,#126   @ zero/denormal: exponent -> -127; Inf, NaN: exponent -> 128
118 |  lsls r2,#2     @ zero/denormal: exponent -> -508; Inf, NaN: exponent -> 512
119 |  adds r2,#128   @ zero/denormal: exponent -> -380; Inf, NaN: exponent -> 640
120 |  bx r14
121 | 
122 | @ normalise and pack signed mantissa in r0 nominally 3Q29, signed exponent in r2-> IEEE single in r0
123 | @ trashes r4, preserves r1,r3
124 | @ r5: "sticky bits", must be zero iff all result bits below r0 are zero for correct rounding
125 | packx:
126 |  lsrs r4,r0,#31 @ save sign bit
127 |  lsls r4,r4,#31 @ sign now in b31
128 |  bpl 2f         @ skip if positive
129 |  cmp r5,#0
130 |  beq 11f
131 |  adds r0,#1     @ fiddle carry in to following rsb if sticky bits are non-zero
132 | 11:
133 |  rsbs r0,#0     @ can now treat r0 as unsigned
134 | packx0:
135 |  bmi 3f         @ catch r0=0x80000000 case
136 | 2:
137 |  subs r2,#1     @ normalisation loop
138 |  adds r0,r0
139 |  beq 1f         @ zero? special case
140 |  bpl 2b         @ normalise so leading "1" in bit 31
141 | 3:
142 |  adds r2,#129   @ (mis-)offset exponent
143 |  bne 12f        @ special case: highest denormal can round to lowest normal
144 |  adds r0,#0x80  @ in special case, need to add 256 to r0 for rounding
145 |  bcs 4f         @ tripped carry? then have leading 1 in C as required
146 | 12:
147 |  adds r0,#0x80  @ rounding
148 |  bcs 4f         @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)
149 |  cmp r5,#0
150 |  beq 7f         @ sticky bits zero?
151 | 8:
152 |  lsls r0,#1     @ remove leading 1
153 | 9:
154 |  subs r2,#1     @ compensate exponent on this path
155 | 4:
156 |  cmp r2,#254
157 |  bge 5f         @ overflow?
158 |  adds r2,#1     @ correct exponent offset
159 |  ble 10f        @ denormal/underflow?
160 |  lsrs r0,#9     @ align mantissa
161 |  lsls r2,#23    @ align exponent
162 |  orrs r0,r2     @ assemble exponent and mantissa
163 | 6:
164 |  orrs r0,r4     @ apply sign
165 | 1:
166 |  bx r14
167 | 
168 | 5:
169 |  movs r0,#0xff  @ create infinity
170 |  lsls r0,#23
171 |  b 6b
172 | 
173 | 10:
174 |  movs r0,#0     @ create zero
175 |  bx r14
176 | 
177 | 7:              @ sticky bit rounding case
178 |  lsls r5,r0,#24 @ check bottom 8 bits of r0
179 |  bne 8b         @ in rounding-tie case?
180 |  lsrs r0,#9     @ ensure even result
181 |  lsls r0,#10
182 |  b 9b
183 | 
184 | @ unpack two arguments (r0,r1) and shift one down to have common exponent, returned in r2; note that arguments are exchanged
185 | @ sticky bits shifted off bottom of smaller argument in r5
186 | @ following code is unnecessarily general for fadd, but is shared with atan2
187 | unpackxyalign:
188 |  push {r14}
189 |  bl unpackxy
190 |  lsls r0,r0,#6  @ Q29
191 |  lsls r1,r1,#6  @ Q29
192 |  subs r4,r2,r3  @ calculate shift
193 |  bge 1f         @ x>=y?
194 |  mov r2,r3      @ no: take common exponent from y
195 |  mov r5,r0      @ potential sticky bits from x
196 |  rsbs r4,#0     @ make shift positive
197 |  asrs r0,r4
198 |  cmp r4,#32
199 |  blo 2f
200 |  movs r0,#0    @ large shift, so all bits are sticky and result is zero
201 |  pop {r15}
202 | 1:
203 |  mov r5,r1     @ common exponent from x; potential sticky bits from y
204 |  asrs r1,r4
205 |  cmp r4,#32
206 |  blo 2f
207 |  movs r1,#0    @ large shift, so all bits are sticky and result is zero
208 |  pop {r15}
209 | 2:
210 |  rsbs r4,#0
211 |  adds r4,#32
212 |  lsls r5,r4    @ extract sticky bits
213 |  pop {r15}
214 | 
215 | .thumb_func
216 | qfp_fsub:
217 |  movs r2,#1    @ subtract: flip sign bit of second argument and fall through to fadd
218 |  lsls r2,#31
219 |  eors r1,r2
220 | .thumb_func
221 | qfp_fadd:
222 |  push {r4,r5,r14}
223 |  bl unpackxyalign
224 |  adds r0,r1    @ do addition
225 |  bne 2f        @ not in Inf-Inf case?
226 |  cmp r2,#200
227 |  blt 2f
228 |  movs r0,#1
229 |  lsls r0,#29   @ for Inf-Inf, set mantissa to +1 to prevent zero result
230 | 2:
231 | packret:       @ common return point: "pack and return"
232 |  bl packx
233 |  pop {r4,r5,r15}
234 | 
235 | @ signed multiply r0 1Q23 by r1 4Q23, result in r0 7Q25, sticky bits in r5
236 | @ trashes r3,r4
237 | mul0:
238 |  uxth r3,r0      @ Q23
239 |  asrs r4,r1,#16  @ Q7
240 |  muls r3,r4      @ L*H, Q30 signed
241 |  asrs r4,r0,#16  @ Q7
242 |  uxth r5,r1      @ Q23
243 |  muls r4,r5      @ H*L, Q30 signed
244 |  adds r3,r4      @ sum of middle partial products
245 |  uxth r4,r0
246 |  muls r4,r5      @ L*L, Q46 unsigned
247 |  lsls r5,r4,#16  @ initialise sticky bits from low half of low partial product
248 |  lsrs r4,#16     @ Q25
249 |  adds r3,r4      @ add high half of low partial product to sum of middle partial products
250 |                  @ (cannot generate carry by limits on input arguments)
251 |  asrs r0,#16     @ Q7
252 |  asrs r1,#16     @ Q7
253 |  muls r0,r1      @ H*H, Q14 signed
254 |  lsls r0,#11     @ high partial product Q25
255 |  lsls r1,r3,#27  @ sticky
256 |  orrs r5,r1      @ collect further sticky bits
257 |  asrs r1,r3,#5   @ middle partial products Q25
258 |  adds r0,r1      @ final result
259 |  bx r14
260 | 
261 | .thumb_func
262 | qfp_fcmp:
263 |  movs r2,#1      @ initialise result
264 |  lsls r3,r2,#31  @ r3=0x80000000
265 |  tst r0,r3       @ check sign of first argument
266 |  beq 1f
267 |  subs r0,r3,r0   @ convert to 2's complement form for direct comparison
268 | 1:
269 |  tst r1,r3       @ repeat for second argument
270 |  beq 2f
271 |  subs r1,r3,r1
272 | 2:
273 |  subs r0,r1     @ perform comparison
274 |  beq 4f         @ equal? return 0
275 |  bgt 3f         @ r0>r1? return +1
276 |  rsbs r2,#0     @ r0<r1: return -1
277 | 3:
278 |  mov r0,r2
279 | 4:
280 |  bx r14
281 | 
282 | .thumb_func
283 | qfp_fmul:
284 |  push {r4,r5,r14}
285 |  bl unpackxy
286 |  add r2,r3      @ sum exponents
287 |  adds r2,#4     @ adjust exponent for pack which expects Q29
288 | fmul0:
289 |  bl mul0
290 |  b packret
291 | 
292 | .thumb_func
293 | qfp_fdiv:
294 |  push {r4,r5,r14}
295 | fdiv_n:
296 |  bl unpackxy
297 |  movs r5,#1      @ result cannot fall exactly half-way between two representable numbers (exercise for reader - note that
298 |                  @ we do not handle denormals) so there will always be sticky bits
299 |  cmp r0,#0       @ check divisor sign
300 |  bpl 1f
301 |  rsbs r0,#0      @ ensure divisor positive
302 |  rsbs r1,#0      @ preserve sign of result
303 | 1:
304 |  movs r4,#0
305 |  cmp r1,#0       @ check sign of dividend
306 |  bpl 2f
307 |  rsbs r1,#0      @ result will be negative
308 |  mvns r4,r4      @ save sign as 0x00000000 or 0xffffffff
309 | 2:               @ now do unsigned division on unpacked values {r1,r3}/{r0,r2}
310 |  cmp r3,#200     @ dividend is an infinity? return it
311 |  bge 3f
312 |  rsbs r2,#0
313 |  cmp r2,#200     @ divisor is zero? return infinity
314 |  bge 3f
315 |  adds r2,r3      @ difference of exponents
316 |  movs r3,#0x40   @ termination marker (calculate enough bits to do rounding correctly)
317 | 2:               @ division loop
318 |  subs r1,r0      @ trial subtraction
319 |  bcs 1f
320 |  add r1,r0       @ restore if failed
321 | 1:
322 |  adcs r3,r3      @ shift in result bit
323 |  add r1,r1       @ shift up dividend
324 |  bcc 2b          @ loop until marker appears in carry
325 |  lsls r0,r3,#4   @ align for packing
326 | 4:
327 |  eors r0,r4      @ apply sign
328 |  b packret
329 | infret:
330 |  movs r4,#0
331 | 3:
332 |  mov r0,r1
333 |  movs r2,#255    @ return infinity
334 |  b 4b
335 | 
336 | .if include_faster
337 | 
338 | @ The fast divide routine uses an initial approximation to the reciprocal of the divisor based on the top four bits of the mantissa
339 | @ followed by three Newton-Raphson iterations, resulting in about 27 bits of accuracy. This reciprocal is then multiplied by
340 | @ the dividend.
341 | @ The (fixed-point) reciprocal calculation is carefully implemented to preserve the necessary accuracy throughout. In places
342 | @ the implied binary point is not within the stored value. For example where "Q47" is shown below it means that the least
343 | @ significant bit of the value has significance 2^-47 and hence the most significant bit has significance 2^-16. In these
344 | @ cases the value is known to be very close to an integer (usually 1) and so the bits of greater significance do not need
345 | @ to be stored.
346 | @ The reciprocal calculation has been tested for all possible input mantissa values.
347 | .thumb_func
348 | qfp_fdiv_fast:
349 |  push {r4,r5,r14}
350 | fdiv_fast_n:
351 |  bl unpackxy
352 |  cmp r0,#0
353 |  bpl 1f
354 |  rsbs r0,#0
355 |  rsbs r1,#0      @ ensure divisor positive
356 | 1:
357 |  cmp r3,#200
358 |  bge infret      @ dividend is an infinity? return it
359 |  rsbs r2,#0
360 |  cmp r2,#200     @ divisor is zero?
361 |  bge infret      @ return infinity
362 |  adds r2,r3      @ difference of exponents
363 |  adr r4,rcpapp-8 @ the first 8 elements of the table are never accessed because of the mantissa's leading 1
364 |  lsrs r3,r0,#20  @ y Q23; y>>20 Q7
365 |  ldrb r4,[r4,r3] @ m=rcpapp[(y>>20)&7]; // Q8, .5<m<1
366 | 
367 |  lsls r3,r4,#2   @ m<<2         // Q10  first Newton-Raphson iteration
368 |  muls r3,r0      @ s=y*(m<<2);  // Q33
369 |  lsls r4,#8      @ m<<=8;       // Q16
370 |  asrs r3,#21     @ s>>=21;      // Q12
371 |  muls r3,r4      @ s*=m;        // Q28
372 |  asrs r3,#12     @ s>>=12;      // Q16
373 |  subs r4,r3      @ m=m-s;       // Q16
374 | 
375 |  mov r3,r4       @ s=y*m        // Q39 second Newton-Raphson iteration
376 |  muls r4,r0      @ ...
377 |  asrs r4,#16     @ s>>=16;      // Q23
378 |  muls r4,r3      @ s*=m;        // Q39
379 |  lsls r3,#8      @ m<<=8;       // Q24
380 |  asrs r4,#15     @ s>>=15;      // Q24
381 |  subs r3,r4      @ m=m-s;       // Q24
382 | 
383 |  lsls r4,r3,#7   @ \/ s=y*m;    // Q47 third Newton-Raphson iteration
384 |  muls r3,r0      @ /\ m<<=7;    // Q31
385 |  asrs r3,#15     @ s>>=15;      // Q32
386 |  lsrs r0,r4,#16  @ s*=(m>>16);  // Q47
387 |  muls r3,r0      @ ...
388 |  asrs r3,#16     @ s>>=16;      // Q31
389 |  subs r0,r4,r3   @ m=m-s;       // Q31
390 | div0:
391 |  adds r0,#7      @ rounding; reduce systematic error
392 |  lsrs r0,#4      @ Q27
393 |  b fmul0         @ drop into multiplication code to calculate result
394 | 
395 | @ The fast square root routine uses an initial approximation to the reciprocal of the square root of the argument based
396 | @ on the top four bits of the mantissa (possibly shifted one place to make the exponent even). It then performs three
397 | @ Newton-Raphson iterations, resulting in about 28-29 bits of accuracy. This reciprocal is then multiplied by
398 | @ the original argument to produce the result.
399 | @ Again, the fixed-point calculation is carefully implemented to preserve accuracy, and similar comments to those
400 | @ made above on the fast division routine apply.
401 | @ The reciprocal square root calculation has been tested for all possible (possibly shifted) input mantissa values.
402 | .thumb_func
403 | qfp_fsqrt_fast:
404 |  push {r4,r5,r14}
405 |  bl unpackx
406 |  movs r1,r0
407 |  bmi infret       @ negative? return -Inf
408 |  asrs r0,r2,#1    @ check LSB of exponent
409 |  bcc 1f
410 |  lsls r1,#1       @ was odd: double mantissa; mantissa y now 1..4 Q23
411 | 1:
412 |  adds r2,#4       @ correction for packing
413 |  adr r4,rsqrtapp-4@ first four table entries are never accessed because of the mantissa's leading 1
414 |  lsrs r3,r1,#21   @ y>>21 Q2
415 |  ldrb r4,[r4,r3]  @ initial approximation to reciprocal square root m Q8
416 | 
417 |  lsrs r0,r1,#7    @ y>>7             // Q16 first Newton-Raphson iteration
418 |  muls r0,r4       @ m*y
419 |  muls r0,r4       @ s=m*y*y          // Q32
420 |  asrs r0,#12      @ s>>12
421 |  muls r0,r4       @ m*s              // Q28
422 |  asrs r0,#13      @ m*s              // Q15
423 |  lsls r4,#8       @ m                // Q16
424 |  subs r4,r0       @ m=(m<<8)-(s>>13) // Q16-Q15/2 -> Q16
425 | 
426 |  mov r0,r4        @                  // second Newton-Raphson iteration
427 |  muls r0,r0       @ u=m*m            // Q32
428 |  lsrs r0,#16      @ u>>16            // Q16
429 |  lsrs r3,r1,#7    @ y>>7             // Q16
430 |  muls r0,r3       @ s=u*(y>>7)       // Q32
431 |  asrs r0,#12      @ s>>12            // Q20
432 |  muls r0,r4       @ s*m              // Q36
433 |  asrs r0,#21      @ s*m              // Q15
434 |  subs r4,r0       @ m=m-s            // Q16-Q15/2
435 | 
436 |  mov r0,r4        @                  // third Newton-Raphson iteration
437 |  muls r0,r0       @ u=m*m            // Q32
438 |  lsrs r3,r0,#12   @ now multiply u and y in two parts: u>>12
439 |  muls r3,r1       @ first partial product (u>>12)*y Q43
440 |  lsls r0,#20
441 |  lsrs r0,#20      @ u&0xfff
442 |  lsrs r5,r1,#12   @ y>>12
443 |  muls r0,r5       @ second partial product (u&0xfff)*(y>>12) Q43
444 |  add r0,r3        @ s=u*y            // Q43
445 |  asrs r0,#15      @ s>>15            // Q28
446 |  muls r0,r4       @ (s>>15)*m        // Q44
447 |  lsls r4,#13      @ m<<13            // Q29
448 |  asrs r0,#16      @ s>>16            // Q28
449 |  subs r0,r4,r0    @                  // Q29-Q28/2
450 | 
451 |  asrs r2,#1       @ halve exponent
452 |  bcc div0         @ was y shifted?
453 |  lsrs r0,#1
454 |  lsls r1,#1       @ shift y back
455 |  b div0           @ round and complete with multiplication
456 | 
457 | .align 2
458 | 
459 | @ round(2^15./[136:16:248])
460 | rcpapp:
461 | .byte 0xf1,0xd8,0xc3,0xb2, 0xa4,0x98,0x8d,0x84
462 | 
463 | @ round(sqrt(2^22./[72:16:248]))
464 | rsqrtapp:
465 | .byte 0xf1,0xda,0xc9,0xbb, 0xb0,0xa6,0x9e,0x97, 0x91,0x8b,0x86,0x82
466 | 
467 | .endif
468 | 
469 | .if include_conversions
470 | 
471 | @ convert float to signed int, rounding towards -Inf, clamping
472 | .thumb_func
473 | qfp_float2int:
474 |  movs r1,#0      @ fall through
475 | 
476 | @ convert float in r0 to signed fixed point in r0, clamping
477 | .thumb_func
478 | qfp_float2fix:
479 |  push {r4,r14}
480 |  bl unpackx
481 |  add r2,r1       @ incorporate binary point position into exponent
482 |  subs r2,#23     @ r2 is now amount of left shift required
483 |  blt 1f          @ requires right shift?
484 |  cmp r2,#7       @ overflow?
485 |  ble 4f
486 | 3:               @ overflow
487 |  asrs r1,r0,#31  @ +ve:0 -ve:0xffffffff
488 |  mvns r1,r1      @ +ve:0xffffffff -ve:0
489 |  movs r0,#1
490 |  lsls r0,#31
491 | 5:
492 |  eors r0,r1      @ +ve:0x7fffffff -ve:0x80000000 (unsigned path: 0xffffffff)
493 |  pop {r4,r15}
494 | 1:
495 |  rsbs r2,#0      @ right shift for r0, >0
496 |  cmp r2,#32
497 |  blt 2f          @ more than 32 bits of right shift?
498 |  movs r2,#32
499 | 2:
500 |  asrs r0,r0,r2
501 |  pop {r4,r15}
502 | 
503 | @ unsigned version
504 | .thumb_func
505 | qfp_float2uint:
506 |  movs r1,#0      @ fall through
507 | .thumb_func
508 | qfp_float2ufix:
509 |  push {r4,r14}
510 |  bl unpackx
511 |  add r2,r1       @ incorporate binary point position into exponent
512 |  movs r1,r0
513 |  bmi 5b          @ negative? return zero
514 |  subs r2,#23     @ r2 is now amount of left shift required
515 |  blt 1b          @ requires right shift?
516 |  mvns r1,r0      @ ready to return 0xffffffff
517 |  cmp r2,#8       @ overflow?
518 |  bgt 5b
519 | 4:
520 |  lsls r0,r0,r2   @ result fits, left shifted
521 |  pop {r4,r15}
522 | 
523 | @ convert signed int to float, rounding
524 | .thumb_func
525 | qfp_int2float:
526 |  movs r1,#0      @ fall through
527 | 
528 | @ convert signed fix to float, rounding; number of r0 bits after point in r1
529 | .thumb_func
530 | qfp_fix2float:
531 |  push {r4,r5,r14}
532 | 1:
533 |  movs r2,#29
534 |  subs r2,r1      @ fix exponent
535 | packretns:       @ pack and return, sticky bits=0
536 |  movs r5,#0
537 |  b packret
538 | 
539 | @ unsigned version
540 | .thumb_func
541 | qfp_uint2float:
542 |  movs r1,#0      @ fall through
543 | .thumb_func
544 | qfp_ufix2float:
545 |  push {r4,r5,r14}
546 |  cmp r0,#0
547 |  bge 1b          @ treat <2^31 as signed
548 |  movs r2,#30
549 |  subs r2,r1      @ fix exponent
550 |  lsls r5,r0,#31  @ one sticky bit
551 |  lsrs r0,#1
552 |  b packret
553 | 
554 | .endif
555 | 
556 | .if include_scientific
557 | 
558 | @ All the scientific functions are implemented using the CORDIC algorithm. For notation,
559 | @ details not explained in the comments below, and a good overall survey see
560 | @ "50 Years of CORDIC: Algorithms, Architectures, and Applications" by Meher et al.,
561 | @ IEEE Transactions on Circuits and Systems Part I, Volume 56 Issue 9.
562 | 
563 | @ Register use:
564 | @ r0: x
565 | @ r1: y
566 | @ r2: z/omega
567 | @ r3: coefficient pointer
568 | @ r4,r8: m
569 | @ r5: i (shift)
570 | 
571 | cordic_start: @ initialisation
572 |  mov r7,r8
573 |  push {r7}
574 |  movs r5,#0   @ initial shift=0
575 |  mov r8,r4
576 |  b 5f
577 | 
578 | cordic_vstep: @ one step of algorithm in vector mode
579 |  cmp r1,#0    @ check sign of y
580 |  bgt 4f
581 |  b 1f
582 | cordic_rstep: @ one step of algorithm in rotation mode
583 |  cmp r2,#0    @ check sign of angle
584 |  bge 1f
585 | 4:
586 |  subs r1,r6   @ negative rotation: y=y-(x>>i)
587 |  rsbs r7,#0
588 |  adds r2,r4   @ accumulate angle
589 |  b 2f
590 | 1:
591 |  adds r1,r6   @ positive rotation: y=y+(x>>i)
592 |  subs r2,r4   @ accumulate angle
593 | 2:
594 |  mov r4,r8
595 |  muls r7,r4   @ apply sign from m
596 |  subs r0,r7   @ finish rotation: x=x{+/-}(y>>i)
597 | 5:
598 |  ldr r4,[r3]  @ fetch next angle from table
599 |  adds r3,#4   @ bump pointer
600 |  lsrs r4,#1   @ repeated angle?
601 |  bcs 3f
602 |  adds r5,#1   @ adjust shift if not
603 | 3:
604 |  mov r6,r0
605 |  asrs r6,r5   @ x>>i
606 |  mov r7,r1
607 |  asrs r7,r5   @ y>>i
608 |  lsrs r4,#1   @ shift end flag into carry
609 |  bx r14
610 | 
611 | @ CORDIC rotation mode
612 | cordic_rot:
613 |  push {r6,r7,r14}
614 |  bl cordic_start   @ initialise
615 | 1:
616 |  bl cordic_rstep
617 |  bcc 1b            @ step until table finished
618 |  asrs r6,r0,#14    @ remaining small rotations can be linearised: see IV.B of paper referenced above
619 |  asrs r7,r1,#14
620 |  asrs r2,#3
621 |  muls r6,r2        @ all remaining CORDIC steps in a multiplication
622 |  muls r7,r2
623 |  mov r4,r8
624 |  muls r7,r4
625 |  asrs r6,#12
626 |  asrs r7,#12
627 |  subs r0,r7        @ x=x{+/-}(yz>>k)
628 |  adds r1,r6        @ y=y+(xz>>k)
629 | cordic_exit:
630 |  pop {r7}
631 |  mov r8,r7
632 |  pop {r6,r7,r15}
633 | 
634 | @ CORDIC vector mode
635 | cordic_vec:
636 |  push {r6,r7,r14}
637 |  bl cordic_start   @ initialise
638 | 1:
639 |  bl cordic_vstep
640 |  bcc 1b            @ step until table finished
641 | 4:
642 |  cmp r1,#0         @ continue as in cordic_vstep but without using table; x is not affected as y is small
643 |  bgt 2f            @ check sign of y
644 |  adds r1,r6        @ positive rotation: y=y+(x>>i)
645 |  subs r2,r4        @ accumulate angle
646 |  b 3f
647 | 2:
648 |  subs r1,r6        @ negative rotation: y=y-(x>>i)
649 |  adds r2,r4        @ accumulate angle
650 | 3:
651 |  asrs r6,#1
652 |  asrs r4,#1        @ next "table entry"
653 |  bne 4b
654 |  b cordic_exit
655 | 
656 | .thumb_func
657 | qfp_fsin:            @ calculate sin and cos using CORDIC rotation method
658 |  push {r4,r5,r14}
659 |  movs r1,#24
660 |  bl qfp_float2fix    @ range reduction by repeated subtraction/addition in fixed point
661 |  ldr r4,pi_q29
662 |  lsrs r4,#4          @ 2pi Q24
663 | 1:
664 |  subs r0,r4
665 |  bge 1b
666 | 1:
667 |  adds r0,r4
668 |  bmi 1b              @ now in range 0..2pi
669 |  lsls r2,r0,#2       @ z Q26
670 |  lsls r5,r4,#1       @ pi Q26 (r4=pi/2 Q26)
671 |  ldr r0,=#0x136e9db4 @ initialise CORDIC x,y with scaling
672 |  movs r1,#0
673 | 1:
674 |  cmp r2,r4           @ >pi/2?
675 |  blt 2f
676 |  subs r2,r5          @ reduce range to -pi/2..pi/2
677 |  rsbs r0,#0          @ rotate vector by pi
678 |  b 1b
679 | 2:
680 |  lsls r2,#3          @ Q29
681 |  adr r3,tab_cc       @ circular coefficients
682 |  movs r4,#1          @ m=1
683 |  bl cordic_rot
684 |  adds r1,#9          @ fiddle factor to make sin(0)==0
685 |  movs r2,#0          @ exponents to zero
686 |  movs r3,#0
687 |  movs r5,#0          @ no sticky bits
688 |  bl packx            @ pack cosine
689 |  bl xchxy
690 |  b packretns         @ pack sine
691 | 
692 | .thumb_func
693 | qfp_fcos:
694 |  push {r14}
695 |  bl qfp_fsin
696 |  mov r0,r1           @ extract cosine result
697 |  pop {r15}
698 | 
699 | .thumb_func
700 | qfp_ftan:
701 |  push {r4,r5,r14}
702 |  bl qfp_fsin         @ sine in r0/r2, cosine in r1/r3
703 | .if include_faster
704 |  b fdiv_fast_n       @ sin/cos
705 | .else
706 |  b fdiv_n
707 | 
708 | .endif
709 | 
710 | .thumb_func
711 | qfp_fexp:            @ calculate cosh and sinh using rotation method; add to obtain exp
712 |  push {r4,r5,r14}
713 |  movs r1,#24
714 |  bl qfp_float2fix    @ Q24: covers entire valid input range
715 |  asrs r1,r0,#16      @ Q8
716 |  ldr r2,=#5909       @ log_2(e) Q12
717 |  muls r1,r2          @ estimate exponent of result Q20
718 |  asrs r1,#19         @ Q1
719 |  adds r1,#1          @ rounding
720 |  asrs r1,#1          @ rounded estimate of exponent of result
721 |  push {r1}           @ save for later
722 |  lsls r2,r0,#5       @ Q29
723 |  ldr r0,=#0x162e42ff @ ln(2) Q29
724 |  muls r1,r0          @ accurate contribution of estimated exponent
725 |  subs r2,r1          @ residual to be exponentiated, approximately -.5..+.5 Q29
726 |  ldr r0,=#0x2c9e15ca @ initialise CORDIC x,y with scaling
727 |  movs r1,#0
728 |  adr r3,tab_ch       @ hyperbolic coefficients
729 |  mvns r4,r1          @ m=-1
730 |  bl cordic_rot       @ calculate cosh and sinh
731 |  add r0,r1           @ exp=cosh+sinh
732 |  pop {r2}            @ recover exponent
733 |  b packretns         @ pack result
734 | 
735 | .thumb_func
736 | qfp_fsqrt:           @ calculate sqrt and ln using vector method
737 |  push {r4,r5,r14}
738 |  bl unpackx
739 |  movs r1,r0          @ -ve argument?
740 |  bmi 3f              @ return -Inf, -Inf
741 |  ldr r1,=#0x0593C2B9 @ scale factor for CORDIC
742 |  bl mul0             @ Q29
743 |  asrs r1,r2,#1       @ halve exponent
744 |  bcc 1f
745 |  adds r1,#1          @ was odd: add 1 and shift mantissa
746 |  asrs r0,#1
747 | 1:
748 |  push {r1}           @ save exponent/2 for later
749 |  mov r1,r0
750 |  ldr r3,=#0x0593C2B9 @ re-use constant
751 |  lsls r3,#2
752 |  adds r0,r3          @ "a+1"
753 |  subs r1,r3          @ "a-1"
754 |  movs r2,#0
755 |  adr r3,tab_ch       @ hyperbolic coefficients
756 |  mvns r4,r2          @ m=-1
757 |  bl cordic_vec
758 |  mov r1,r2           @ keep ln result
759 |  pop {r2}            @ retrieve exponent/2
760 | 2:
761 |  movs r3,r2
762 |  b packretns         @ pack sqrt result
763 | 
764 | 3:
765 |  movs r2,#255
766 |  b 2b
767 | 
768 | .thumb_func
769 | qfp_fln:
770 |  push {r4,r5,r14}
771 |  bl qfp_fsqrt            @ get unpacked ln in r1/r3; exponent has been halved
772 |  cmp r3,#70              @ ln(Inf)?
773 |  bgt 2f                  @ return Inf
774 |  rsbs r3,#0
775 |  cmp r3,#70
776 |  bgt 1f                  @ ln(0)? return -Inf
777 | 3:
778 |  ldr r0,=#0x0162e430     @ ln(4) Q24
779 |  muls r0,r3              @ contribution from negated, halved exponent
780 |  adds r1,#8              @ round result of ln
781 |  asrs r1,#4              @ Q24
782 |  subs r0,r1,r0           @ add in contribution from (negated) exponent
783 |  movs r2,#5              @ pack expects Q29
784 |  b packretns
785 | 1:
786 |  mvns r0,r0              @ make result -Inf
787 | 2:
788 |  movs r2,#255
789 |  b packretns
790 | 
791 | .thumb_func
792 | qfp_fatan2:
793 |  push {r4,r5,r14}
794 |  bl unpackxyalign        @ convert to fixed point (ensure common exponent, which is discarded)
795 |  movs r2,#0              @ initial angle
796 |  cmp r0,#0               @ x negative
797 |  bge 5f
798 |  rsbs r0,#0              @ rotate to 1st/4th quadrants
799 |  rsbs r1,#0
800 |  ldr r2,pi_q29           @ pi Q29
801 | 5:
802 |  adr r3,tab_cc           @ circular coefficients
803 |  movs r4,#1              @ m=1
804 |  bl cordic_vec           @ also produces magnitude (with scaling factor 1.646760119), which is discarded
805 |  mov r0,r2               @ result here is -pi/2..3pi/2 Q29
806 |  ldr r2,pi_q29           @ pi Q29
807 |  adds r4,r0,r2           @ attempt to fix -3pi/2..-pi case
808 |  bcs 6f                  @ -pi/2..0? leave result as is
809 |  subs r4,r0,r2           @ <pi? leave as is
810 |  bmi 6f
811 |  subs r0,r4,r2           @ >pi: take off 2pi
812 | 6:
813 |  subs r0,#1              @ fiddle factor so atan2(0,1)==0
814 |  movs r2,#0              @ exponent for pack
815 |  b packretns
816 | 
817 | .align 2
818 | .ltorg
819 | 
820 | @ first entry in following table is pi Q29
821 | pi_q29:
822 | @ circular CORDIC coefficients: atan(2^-i), b0=flag for preventing shift, b1=flag for end of table
823 | tab_cc:
824 | .word 0x1921fb54*4+1     @ no shift before first iteration
825 | .word 0x0ed63383*4+0
826 | .word 0x07d6dd7e*4+0
827 | .word 0x03fab753*4+0
828 | .word 0x01ff55bb*4+0
829 | .word 0x00ffeaae*4+0
830 | .word 0x007ffd55*4+0
831 | .word 0x003fffab*4+0
832 | .word 0x001ffff5*4+0
833 | .word 0x000fffff*4+0
834 | .word 0x0007ffff*4+0
835 | .word 0x00040000*4+0
836 | .word 0x00020000*4+0+2   @ +2 marks end
837 | 
838 | @ hyperbolic CORDIC coefficients: atanh(2^-i), flags as above
839 | tab_ch:
840 | .word 0x1193ea7b*4+0
841 | .word 0x1193ea7b*4+1   @ repeat i=1
842 | .word 0x082c577d*4+0
843 | .word 0x04056247*4+0
844 | .word 0x0200ab11*4+0
845 | .word 0x0200ab11*4+1   @ repeat i=4
846 | .word 0x01001559*4+0
847 | .word 0x008002ab*4+0
848 | .word 0x00400055*4+0
849 | .word 0x0020000b*4+0
850 | .word 0x00100001*4+0
851 | .word 0x00080001*4+0
852 | .word 0x00040000*4+0
853 | .word 0x00020000*4+0
854 | .word 0x00020000*4+1+2 @ repeat i=12
855 | 
856 | .endif
857 | 
858 | qfp_lib_end:
859 | 


--------------------------------------------------------------------------------
/qfplib.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 Mark Owen
 2 | // http://www.quinapalus.com
 3 | // E-mail: qfp@quinapalus.com
 4 | //
 5 | // Thanks to Bill Westfield
 6 | //
 7 | // This file is free software: you can redistribute it and/or modify
 8 | // it under the terms of version 2 of the GNU General Public License
 9 | // as published by the Free Software Foundation.
10 | //
11 | // This file is distributed in the hope that it will be useful,
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | // GNU General Public License for more details.
15 | //
16 | // You should have received a copy of the GNU General Public License
17 | // along with this file.  If not, see <http://www.gnu.org/licenses/> or
18 | // write to the Free Software Foundation, Inc., 51 Franklin Street,
19 | // Fifth Floor, Boston, MA  02110-1301, USA.
20 | 
21 | #ifndef _QFPLIB_H_
22 | #define _QFPLIB_H_
23 | 
24 | #ifdef __cplusplus
25 |   extern "C" {
26 | #endif
27 | 
28 | extern          float qfp_fadd(float x,float y);
29 | extern          float qfp_fsub(float x,float y);
30 | extern          float qfp_fmul(float x,float y);
31 | extern          float qfp_fdiv(float x,float y);
32 | extern          float qfp_fdiv_fast(float x,float y);
33 | extern          int   qfp_float2int(float x);
34 | extern          int   qfp_float2fix(float x,int y);
35 | extern unsigned int   qfp_float2uint(float x);
36 | extern unsigned int   qfp_float2ufix(float x,int y);
37 | extern          float qfp_int2float(int x);
38 | extern          float qfp_fix2float(int x,int y);
39 | extern          float qfp_uint2float(unsigned int x);
40 | extern          float qfp_ufix2float(unsigned int x,int y);
41 | extern          int   qfp_fcmp(float x,float y);
42 | extern          float qfp_fcos(float x);
43 | extern          float qfp_fsin(float x);
44 | extern          float qfp_ftan(float x);
45 | extern          float qfp_fatan2(float y,float x);
46 | extern          float qfp_fexp(float x);
47 | extern          float qfp_fln(float x);
48 | extern          float qfp_fsqrt(float x);
49 | extern          float qfp_fsqrt_fast(float x);
50 | 
51 | #ifdef __cplusplus
52 |   } // extern "C"
53 | #endif
54 | #endif
55 | 


--------------------------------------------------------------------------------