├── .gitignore
├── .tfignore
├── ArmMathM0.atsln
├── ArmMathM0.componentinfo.xml
├── ArmMathM0.cproj
├── Denormals
├── .gitignore
├── Makefile
├── libMathM0den.a
├── makedep.mk
└── options.inc
├── LICENSE.txt
├── NoDenormals
├── .gitignore
├── Makefile
├── libMathM0.a
├── makedep.mk
└── options.inc
├── ReadMe.md
├── include
└── mathm0.h
├── lib-out
├── libMathM0.a
└── libMathM0den.a
└── src
├── common
└── clz_denormal.s
├── double
├── dadd.s
├── ddenormal_result.s
├── ddiv.s
├── dmul.s
├── dop1_normalize.s
├── dop2_normalize.s
├── drsub.s
├── dsub.s
└── sqrt.s
├── float
├── asinf.S
├── atan2f.s
├── atantablef.s
├── cosf.s
├── faddsub.s
├── fdenormal_result.s
├── fdiv.s
├── fmul.s
├── frsub.s
├── sincosf.s
├── sinf.s
├── sqrtf.s
└── tanf.s
└── include
├── ieee.inc
├── macros.inc
└── trigf.inc
/.gitignore:
--------------------------------------------------------------------------------
1 | Thumbs.db
2 | *.obj
3 | *.exe
4 | *.pdb
5 | *.user
6 | *.aps
7 | *.pch
8 | *.vspscc
9 | *_i.c
10 | *_p.c
11 | *.ncb
12 | *.suo
13 | *.sln.docstates
14 | *.tlb
15 | *.tlh
16 | *.bak
17 | *.cache
18 | *.ilk
19 | *.log
20 | *.xlsx
21 | .vs/
22 | [Bb]in
23 | [Dd]ebug*/
24 | *.lib
25 | *.sbr
26 | obj/
27 | [Rr]elease*/
28 | _ReSharper*/
29 | [Tt]est[Rr]esult*
30 | *.vssscc
31 | $tf*/
--------------------------------------------------------------------------------
/.tfignore:
--------------------------------------------------------------------------------
1 | \.git
--------------------------------------------------------------------------------
/ArmMathM0.atsln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Atmel Studio Solution File, Format Version 11.00
4 | VisualStudioVersion = 14.0.23107.0
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{54F91283-7BC4-4236-8FF9-10F437C3AD48}") = "ArmMathM0", "ArmMathM0.cproj", "{DCE6C7E3-EE26-4D79-826B-08594B9AD897}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|ARM = Debug|ARM
11 | Denormals|ARM = Denormals|ARM
12 | NoDenormals|ARM = NoDenormals|ARM
13 | EndGlobalSection
14 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
15 | {DCE6C7E3-EE26-4D79-826B-08594B9AD897}.Debug|ARM.ActiveCfg = Denormals|ARM
16 | {DCE6C7E3-EE26-4D79-826B-08594B9AD897}.Debug|ARM.Build.0 = Denormals|ARM
17 | {DCE6C7E3-EE26-4D79-826B-08594B9AD897}.Denormals|ARM.ActiveCfg = Denormals|ARM
18 | {DCE6C7E3-EE26-4D79-826B-08594B9AD897}.Denormals|ARM.Build.0 = Denormals|ARM
19 | {DCE6C7E3-EE26-4D79-826B-08594B9AD897}.NoDenormals|ARM.ActiveCfg = NoDenormals|ARM
20 | {DCE6C7E3-EE26-4D79-826B-08594B9AD897}.NoDenormals|ARM.Build.0 = NoDenormals|ARM
21 | EndGlobalSection
22 | GlobalSection(SolutionProperties) = preSolution
23 | HideSolutionNode = FALSE
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/ArmMathM0.componentinfo.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | CMSIS
8 | CORE
9 |
10 |
11 | ARM
12 | 5.1.2
13 | C:/Program Files (x86)\Atmel\Studio\7.0\Packs
14 |
15 |
16 |
17 |
18 | C:/Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Documentation\Core\html\index.html
19 |
20 | doc
21 |
22 |
23 |
24 | CMSIS/Documentation/Core/html/index.html
25 |
26 |
27 |
28 |
29 | C:/Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include\
30 |
31 | include
32 |
33 |
34 |
35 | CMSIS/Core/Include/
36 |
37 |
38 |
39 |
40 | CMSIS
41 | C:/Program Files (x86)/Atmel/Studio/7.0/Packs/arm/CMSIS/5.4.0/ARM.CMSIS.pdsc
42 | 5.4.0
43 | true
44 | ARMv6_7_8-M Device
45 |
46 |
47 |
48 | Resolved
49 | Fixed
50 | true
51 |
52 |
53 |
54 |
55 | Device
56 | Startup
57 |
58 |
59 | Atmel
60 | 1.2.0
61 | C:/Program Files (x86)\Atmel\Studio\7.0\Packs
62 |
63 |
64 |
65 |
66 |
67 |
68 | C:/Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include
69 |
70 | include
71 | C
72 |
73 |
74 | samc21/include
75 |
76 |
77 |
78 |
79 | C:/Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include\sam.h
80 |
81 | header
82 | C
83 | 2Rk75rgdSdFbgPkrL7+Wow==
84 |
85 | samc21/include/sam.h
86 |
87 |
88 |
89 |
90 | C:/Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\templates\library.c
91 | template
92 | source
93 | C Lib
94 | 0p1ZCrRdnMJDSphzDtne/w==
95 |
96 | samc21/templates/library.c
97 | Main file (.c)
98 |
99 |
100 |
101 | C:/Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\templates\library.cpp
102 | template
103 | source
104 | C Lib
105 | A6CtkScmRPo5iQcWfSbq4Q==
106 |
107 | samc21/templates/library.cpp
108 | Main file (.cpp)
109 |
110 |
111 |
112 | SAMC21_DFP
113 | C:/Program Files (x86)/Atmel/Studio/7.0/Packs/atmel/SAMC21_DFP/1.2.176/Atmel.SAMC21_DFP.pdsc
114 | 1.2.176
115 | true
116 | ATSAMC21G17A
117 |
118 |
119 |
120 | Resolved
121 | Fixed
122 | true
123 |
124 |
125 |
--------------------------------------------------------------------------------
/ArmMathM0.cproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 2.0
5 | 7.0
6 | com.Atmel.ARMGCC.C
7 | dce6c7e3-ee26-4d79-826b-08594b9ad897
8 | ATSAMC21G17A
9 | none
10 | StaticLibrary
11 | C
12 | lib$(MSBuildProjectName)
13 | .a
14 | $(MSBuildProjectDirectory)\$(Configuration)
15 |
16 |
17 | ArmMathM0
18 | ArmMathM0
19 | ArmMathM0
20 | Native
21 | true
22 | false
23 | true
24 | true
25 |
26 |
27 | true
28 |
29 | 2
30 | 0
31 | 0
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 | True
50 | True
51 | True
52 | True
53 | True
54 |
55 |
56 | DEBUG
57 |
58 |
59 |
60 |
61 | %24(PackRepoDir)\arm\CMSIS\5.4.0\CMSIS\Core\Include\
62 | %24(PackRepoDir)\atmel\SAMC21_DFP\1.2.176\samc21\include
63 |
64 |
65 | Optimize debugging experience (-Og)
66 | True
67 | Maximum (-g3)
68 | True
69 | True
70 | False
71 |
72 |
73 | ../src/include
74 | %24(PackRepoDir)\arm\CMSIS\5.4.0\CMSIS\Core\Include\
75 | %24(PackRepoDir)\atmel\SAMC21_DFP\1.2.176\samc21\include
76 |
77 |
78 | Default (-g)
79 |
80 |
81 | %24(PackRepoDir)\arm\CMSIS\5.4.0\CMSIS\Core\Include\
82 | %24(PackRepoDir)\atmel\SAMC21_DFP\1.2.176\samc21\include
83 |
84 |
85 | Default (-Wa,-g)
86 |
87 |
88 | libMathM0
89 | .a
90 | cmd /c del $(SolutionDir)$(Configuration)\TestDriver.elf
91 | echo F|xcopy $(OutputDirectory)\$(OutputFileName)$(OutputFileExtension) $(MSBuildProjectDirectory)\lib-out\$(OutputFileName)$(OutputFileExtension) /Y /Q
92 |
93 |
94 |
95 |
96 | True
97 | True
98 | True
99 | True
100 | True
101 |
102 |
103 | DEBUG
104 |
105 |
106 |
107 |
108 | %24(PackRepoDir)\arm\CMSIS\5.4.0\CMSIS\Core\Include\
109 | %24(PackRepoDir)\atmel\SAMC21_DFP\1.2.176\samc21\include
110 |
111 |
112 | Optimize debugging experience (-Og)
113 | True
114 | Maximum (-g3)
115 | True
116 | True
117 | False
118 |
119 |
120 | ../src/include
121 | %24(PackRepoDir)\arm\CMSIS\5.4.0\CMSIS\Core\Include\
122 | %24(PackRepoDir)\atmel\SAMC21_DFP\1.2.176\samc21\include
123 |
124 |
125 | Default (-g)
126 |
127 |
128 | %24(PackRepoDir)\arm\CMSIS\5.4.0\CMSIS\Core\Include\
129 | %24(PackRepoDir)\atmel\SAMC21_DFP\1.2.176\samc21\include
130 |
131 |
132 | Default (-Wa,-g)
133 |
134 |
135 | bin\Denormals\
136 | libMathM0den
137 | .a
138 | cmd /c del $(SolutionDir)$(Configuration)\TestDriver.elf
139 | echo F|xcopy $(OutputDirectory)\$(OutputFileName)$(OutputFileExtension) $(MSBuildProjectDirectory)\lib-out\$(OutputFileName)$(OutputFileExtension) /Y /Q
140 |
141 |
142 |
143 |
144 | True
145 | True
146 | True
147 | True
148 | True
149 |
150 |
151 | DEBUG
152 |
153 |
154 |
155 |
156 | %24(PackRepoDir)\arm\CMSIS\5.4.0\CMSIS\Core\Include\
157 | %24(PackRepoDir)\atmel\SAMC21_DFP\1.2.176\samc21\include
158 |
159 |
160 | Optimize debugging experience (-Og)
161 | True
162 | Maximum (-g3)
163 | True
164 | True
165 | False
166 |
167 |
168 | ../src/include
169 | %24(PackRepoDir)\arm\CMSIS\5.4.0\CMSIS\Core\Include\
170 | %24(PackRepoDir)\atmel\SAMC21_DFP\1.2.176\samc21\include
171 |
172 |
173 | Default (-g)
174 |
175 |
176 | %24(PackRepoDir)\arm\CMSIS\5.4.0\CMSIS\Core\Include\
177 | %24(PackRepoDir)\atmel\SAMC21_DFP\1.2.176\samc21\include
178 |
179 |
180 | Default (-Wa,-g)
181 |
182 |
183 | libMathM0
184 | .a
185 | cmd /c del $(SolutionDir)$(Configuration)\TestDriver.elf
186 | echo F|xcopy $(OutputDirectory)\$(OutputFileName)$(OutputFileExtension) $(MSBuildProjectDirectory)\lib-out\$(OutputFileName)$(OutputFileExtension) /Y /Q
187 | bin\Debug\
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 | compile
199 |
200 |
201 | compile
202 |
203 |
204 | compile
205 |
206 |
207 | compile
208 |
209 |
210 | compile
211 |
212 |
213 | compile
214 |
215 |
216 | compile
217 |
218 |
219 | compile
220 |
221 |
222 | compile
223 |
224 |
225 | compile
226 |
227 |
228 | compile
229 |
230 |
231 | compile
232 |
233 |
234 | compile
235 |
236 |
237 | compile
238 |
239 |
240 | compile
241 |
242 |
243 | compile
244 |
245 |
246 | compile
247 |
248 |
249 | compile
250 |
251 |
252 | compile
253 |
254 |
255 | compile
256 |
257 |
258 | compile
259 |
260 |
261 | compile
262 |
263 |
264 | compile
265 |
266 |
267 |
268 |
269 | compile
270 |
271 |
272 | compile
273 |
274 |
275 | compile
276 |
277 |
278 |
279 |
--------------------------------------------------------------------------------
/Denormals/.gitignore:
--------------------------------------------------------------------------------
1 | /src/
2 |
--------------------------------------------------------------------------------
/Denormals/Makefile:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | SHELL := cmd.exe
6 | RM := rm -rf
7 |
8 | USER_OBJS :=
9 |
10 | LIBS :=
11 | PROJ :=
12 |
13 | O_SRCS :=
14 | C_SRCS :=
15 | S_SRCS :=
16 | S_UPPER_SRCS :=
17 | OBJ_SRCS :=
18 | ASM_SRCS :=
19 | PREPROCESSING_SRCS :=
20 | OBJS :=
21 | OBJS_AS_ARGS :=
22 | C_DEPS :=
23 | C_DEPS_AS_ARGS :=
24 | EXECUTABLES :=
25 | OUTPUT_FILE_PATH :=
26 | OUTPUT_FILE_PATH_AS_ARGS :=
27 | AVR_APP_PATH :=$$$AVR_APP_PATH$$$
28 | QUOTE := "
29 | ADDITIONAL_DEPENDENCIES:=
30 | OUTPUT_FILE_DEP:=
31 | LIB_DEP:=
32 | LINKER_SCRIPT_DEP:=
33 |
34 | # Every subdirectory with source files must be described here
35 | SUBDIRS := \
36 | ../src/ \
37 | ../src/common/ \
38 | ../src/double/ \
39 | ../src/float/ \
40 | ../src/include/
41 |
42 |
43 | # Add inputs and outputs from these tool invocations to the build variables
44 | C_SRCS +=
45 |
46 |
47 | PREPROCESSING_SRCS += \
48 | ../src/common/clz_denormal.s \
49 | ../src/double/dadd.s \
50 | ../src/double/ddenormal_result.s \
51 | ../src/double/ddiv.s \
52 | ../src/double/dmul.s \
53 | ../src/double/dop1_normalize.s \
54 | ../src/double/dop2_normalize.s \
55 | ../src/double/drsub.s \
56 | ../src/double/dsub.s \
57 | ../src/double/sqrt.s \
58 | ../src/float/asinf.s \
59 | ../src/float/atan2f.s \
60 | ../src/float/atantablef.s \
61 | ../src/float/cosf.s \
62 | ../src/float/faddsub.s \
63 | ../src/float/fdenormal_result.s \
64 | ../src/float/fdiv.s \
65 | ../src/float/fmul.s \
66 | ../src/float/frsub.s \
67 | ../src/float/sincosf.s \
68 | ../src/float/sinf.s \
69 | ../src/float/sqrtf.s \
70 | ../src/float/tanf.s
71 |
72 |
73 | ASM_SRCS +=
74 |
75 |
76 | OBJS += \
77 | src/common/clz_denormal.o \
78 | src/double/dadd.o \
79 | src/double/ddenormal_result.o \
80 | src/double/ddiv.o \
81 | src/double/dmul.o \
82 | src/double/dop1_normalize.o \
83 | src/double/dop2_normalize.o \
84 | src/double/drsub.o \
85 | src/double/dsub.o \
86 | src/double/sqrt.o \
87 | src/float/asinf.o \
88 | src/float/atan2f.o \
89 | src/float/atantablef.o \
90 | src/float/cosf.o \
91 | src/float/faddsub.o \
92 | src/float/fdenormal_result.o \
93 | src/float/fdiv.o \
94 | src/float/fmul.o \
95 | src/float/frsub.o \
96 | src/float/sincosf.o \
97 | src/float/sinf.o \
98 | src/float/sqrtf.o \
99 | src/float/tanf.o
100 |
101 | OBJS_AS_ARGS += \
102 | src/common/clz_denormal.o \
103 | src/double/dadd.o \
104 | src/double/ddenormal_result.o \
105 | src/double/ddiv.o \
106 | src/double/dmul.o \
107 | src/double/dop1_normalize.o \
108 | src/double/dop2_normalize.o \
109 | src/double/drsub.o \
110 | src/double/dsub.o \
111 | src/double/sqrt.o \
112 | src/float/asinf.o \
113 | src/float/atan2f.o \
114 | src/float/atantablef.o \
115 | src/float/cosf.o \
116 | src/float/faddsub.o \
117 | src/float/fdenormal_result.o \
118 | src/float/fdiv.o \
119 | src/float/fmul.o \
120 | src/float/frsub.o \
121 | src/float/sincosf.o \
122 | src/float/sinf.o \
123 | src/float/sqrtf.o \
124 | src/float/tanf.o
125 |
126 | C_DEPS += \
127 | src/common/clz_denormal.d \
128 | src/double/dadd.d \
129 | src/double/ddenormal_result.d \
130 | src/double/ddiv.d \
131 | src/double/dmul.d \
132 | src/double/dop1_normalize.d \
133 | src/double/dop2_normalize.d \
134 | src/double/drsub.d \
135 | src/double/dsub.d \
136 | src/double/sqrt.d \
137 | src/float/asinf.d \
138 | src/float/atan2f.d \
139 | src/float/atantablef.d \
140 | src/float/cosf.d \
141 | src/float/faddsub.d \
142 | src/float/fdenormal_result.d \
143 | src/float/fdiv.d \
144 | src/float/fmul.d \
145 | src/float/frsub.d \
146 | src/float/sincosf.d \
147 | src/float/sinf.d \
148 | src/float/sqrtf.d \
149 | src/float/tanf.d
150 |
151 | C_DEPS_AS_ARGS += \
152 | src/common/clz_denormal.d \
153 | src/double/dadd.d \
154 | src/double/ddenormal_result.d \
155 | src/double/ddiv.d \
156 | src/double/dmul.d \
157 | src/double/dop1_normalize.d \
158 | src/double/dop2_normalize.d \
159 | src/double/drsub.d \
160 | src/double/dsub.d \
161 | src/double/sqrt.d \
162 | src/float/asinf.d \
163 | src/float/atan2f.d \
164 | src/float/atantablef.d \
165 | src/float/cosf.d \
166 | src/float/faddsub.d \
167 | src/float/fdenormal_result.d \
168 | src/float/fdiv.d \
169 | src/float/fmul.d \
170 | src/float/frsub.d \
171 | src/float/sincosf.d \
172 | src/float/sinf.d \
173 | src/float/sqrtf.d \
174 | src/float/tanf.d
175 |
176 | OUTPUT_FILE_PATH +=libMathM0den.a
177 |
178 | OUTPUT_FILE_PATH_AS_ARGS +=libMathM0den.a
179 |
180 | ADDITIONAL_DEPENDENCIES:=
181 |
182 | OUTPUT_FILE_DEP:= ./makedep.mk
183 |
184 | LIB_DEP+=
185 |
186 | LINKER_SCRIPT_DEP+=
187 |
188 |
189 | # AVR32/GNU C Compiler
190 |
191 |
192 |
193 | # AVR32/GNU Preprocessing Assembler
194 |
195 |
196 |
197 | # AVR32/GNU Assembler
198 | src/common/clz_denormal.o: ../src/common/clz_denormal.s
199 | @echo Building file: $<
200 | @echo Invoking: ARM/GNU Assembler : 6.3.1
201 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
202 | @echo Finished building: $<
203 |
204 |
205 | src/double/dadd.o: ../src/double/dadd.s
206 | @echo Building file: $<
207 | @echo Invoking: ARM/GNU Assembler : 6.3.1
208 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
209 | @echo Finished building: $<
210 |
211 |
212 | src/double/ddenormal_result.o: ../src/double/ddenormal_result.s
213 | @echo Building file: $<
214 | @echo Invoking: ARM/GNU Assembler : 6.3.1
215 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
216 | @echo Finished building: $<
217 |
218 |
219 | src/double/ddiv.o: ../src/double/ddiv.s
220 | @echo Building file: $<
221 | @echo Invoking: ARM/GNU Assembler : 6.3.1
222 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
223 | @echo Finished building: $<
224 |
225 |
226 | src/double/dmul.o: ../src/double/dmul.s
227 | @echo Building file: $<
228 | @echo Invoking: ARM/GNU Assembler : 6.3.1
229 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
230 | @echo Finished building: $<
231 |
232 |
233 | src/double/dop1_normalize.o: ../src/double/dop1_normalize.s
234 | @echo Building file: $<
235 | @echo Invoking: ARM/GNU Assembler : 6.3.1
236 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
237 | @echo Finished building: $<
238 |
239 |
240 | src/double/dop2_normalize.o: ../src/double/dop2_normalize.s
241 | @echo Building file: $<
242 | @echo Invoking: ARM/GNU Assembler : 6.3.1
243 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
244 | @echo Finished building: $<
245 |
246 |
247 | src/double/drsub.o: ../src/double/drsub.s
248 | @echo Building file: $<
249 | @echo Invoking: ARM/GNU Assembler : 6.3.1
250 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
251 | @echo Finished building: $<
252 |
253 |
254 | src/double/dsub.o: ../src/double/dsub.s
255 | @echo Building file: $<
256 | @echo Invoking: ARM/GNU Assembler : 6.3.1
257 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
258 | @echo Finished building: $<
259 |
260 |
261 | src/double/sqrt.o: ../src/double/sqrt.s
262 | @echo Building file: $<
263 | @echo Invoking: ARM/GNU Assembler : 6.3.1
264 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
265 | @echo Finished building: $<
266 |
267 |
268 | src/float/asinf.o: ../src/float/asinf.s
269 | @echo Building file: $<
270 | @echo Invoking: ARM/GNU Assembler : 6.3.1
271 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
272 | @echo Finished building: $<
273 |
274 |
275 | src/float/atan2f.o: ../src/float/atan2f.s
276 | @echo Building file: $<
277 | @echo Invoking: ARM/GNU Assembler : 6.3.1
278 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
279 | @echo Finished building: $<
280 |
281 |
282 | src/float/atantablef.o: ../src/float/atantablef.s
283 | @echo Building file: $<
284 | @echo Invoking: ARM/GNU Assembler : 6.3.1
285 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
286 | @echo Finished building: $<
287 |
288 |
289 | src/float/cosf.o: ../src/float/cosf.s
290 | @echo Building file: $<
291 | @echo Invoking: ARM/GNU Assembler : 6.3.1
292 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
293 | @echo Finished building: $<
294 |
295 |
296 | src/float/faddsub.o: ../src/float/faddsub.s
297 | @echo Building file: $<
298 | @echo Invoking: ARM/GNU Assembler : 6.3.1
299 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
300 | @echo Finished building: $<
301 |
302 |
303 | src/float/fdenormal_result.o: ../src/float/fdenormal_result.s
304 | @echo Building file: $<
305 | @echo Invoking: ARM/GNU Assembler : 6.3.1
306 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
307 | @echo Finished building: $<
308 |
309 |
310 | src/float/fdiv.o: ../src/float/fdiv.s
311 | @echo Building file: $<
312 | @echo Invoking: ARM/GNU Assembler : 6.3.1
313 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
314 | @echo Finished building: $<
315 |
316 |
317 | src/float/fmul.o: ../src/float/fmul.s
318 | @echo Building file: $<
319 | @echo Invoking: ARM/GNU Assembler : 6.3.1
320 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
321 | @echo Finished building: $<
322 |
323 |
324 | src/float/frsub.o: ../src/float/frsub.s
325 | @echo Building file: $<
326 | @echo Invoking: ARM/GNU Assembler : 6.3.1
327 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
328 | @echo Finished building: $<
329 |
330 |
331 | src/float/sincosf.o: ../src/float/sincosf.s
332 | @echo Building file: $<
333 | @echo Invoking: ARM/GNU Assembler : 6.3.1
334 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
335 | @echo Finished building: $<
336 |
337 |
338 | src/float/sinf.o: ../src/float/sinf.s
339 | @echo Building file: $<
340 | @echo Invoking: ARM/GNU Assembler : 6.3.1
341 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
342 | @echo Finished building: $<
343 |
344 |
345 | src/float/sqrtf.o: ../src/float/sqrtf.s
346 | @echo Building file: $<
347 | @echo Invoking: ARM/GNU Assembler : 6.3.1
348 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
349 | @echo Finished building: $<
350 |
351 |
352 | src/float/tanf.o: ../src/float/tanf.s
353 | @echo Building file: $<
354 | @echo Invoking: ARM/GNU Assembler : 6.3.1
355 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
356 | @echo Finished building: $<
357 |
358 |
359 |
360 |
361 |
362 |
363 | ifneq ($(MAKECMDGOALS),clean)
364 | ifneq ($(strip $(C_DEPS)),)
365 | -include $(C_DEPS)
366 | endif
367 | endif
368 |
369 | # Add inputs and outputs from these tool invocations to the build variables
370 |
371 | # All Target
372 | all: $(OUTPUT_FILE_PATH) $(ADDITIONAL_DEPENDENCIES)
373 |
374 |
375 | $(OUTPUT_FILE_PATH): $(OBJS) $(USER_OBJS) $(OUTPUT_FILE_DEP)
376 | @echo Building target: $@
377 | @echo Invoking: ARM/GNU Archiver : 6.3.1
378 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-ar.exe$(QUOTE) -r -o$(OUTPUT_FILE_PATH_AS_ARGS) $(OBJS_AS_ARGS) $(USER_OBJS) $(LIBS)
379 | @echo Finished building target: $@
380 |
381 |
382 |
383 |
384 |
385 |
386 | # Other Targets
387 | clean:
388 | -$(RM) $(OBJS_AS_ARGS) $(EXECUTABLES)
389 | -$(RM) $(C_DEPS_AS_ARGS)
390 | rm -rf "libMathM0den.elf" "libMathM0den.a" "libMathM0den.hex" "libMathM0den.bin" "libMathM0den.lss" "libMathM0den.eep" "libMathM0den.map" "libMathM0den.srec"
391 |
--------------------------------------------------------------------------------
/Denormals/libMathM0den.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimPaterson/ArmMathM0/91b866409651f47093542df41b34701f7d8b7f6b/Denormals/libMathM0den.a
--------------------------------------------------------------------------------
/Denormals/makedep.mk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit or delete the file
3 | ################################################################################
4 |
5 | src\common\clz_denormal.s
6 |
7 | src\double\dadd.s
8 |
9 | src\double\ddenormal_result.s
10 |
11 | src\double\ddiv.s
12 |
13 | src\double\dmul.s
14 |
15 | src\double\dop1_normalize.s
16 |
17 | src\double\dop2_normalize.s
18 |
19 | src\double\drsub.s
20 |
21 | src\double\dsub.s
22 |
23 | src\double\sqrt.s
24 |
25 | src\float\asinf.s
26 |
27 | src\float\atan2f.s
28 |
29 | src\float\atantablef.s
30 |
31 | src\float\cosf.s
32 |
33 | src\float\faddsub.s
34 |
35 | src\float\fdenormal_result.s
36 |
37 | src\float\fdiv.s
38 |
39 | src\float\fmul.s
40 |
41 | src\float\frsub.s
42 |
43 | src\float\sincosf.s
44 |
45 | src\float\sinf.s
46 |
47 | src\float\sqrtf.s
48 |
49 | src\float\tanf.s
50 |
51 |
--------------------------------------------------------------------------------
/Denormals/options.inc:
--------------------------------------------------------------------------------
1 | //****************************************************************************
2 | // options.inc
3 | //
4 | // Created 6/14/2021 4:59:55 PM by Tim Paterson
5 | //
6 | //****************************************************************************
7 |
8 | // Leave option undefined to disable (comment it out)
9 |
10 | // Enabling this option eliminates denormal support, saving code size
11 | //.set NO_DENORMALS, 1
12 |
13 | // This option extends max angle accepted by trig functions to < 32768 radians.
14 | // Otherwise, the max is 64 * pi, about 201 radians.
15 | //.set WIDE_TRIG_RANGE, 1
16 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/NoDenormals/.gitignore:
--------------------------------------------------------------------------------
1 | /src/
2 |
--------------------------------------------------------------------------------
/NoDenormals/Makefile:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | SHELL := cmd.exe
6 | RM := rm -rf
7 |
8 | USER_OBJS :=
9 |
10 | LIBS :=
11 | PROJ :=
12 |
13 | O_SRCS :=
14 | C_SRCS :=
15 | S_SRCS :=
16 | S_UPPER_SRCS :=
17 | OBJ_SRCS :=
18 | ASM_SRCS :=
19 | PREPROCESSING_SRCS :=
20 | OBJS :=
21 | OBJS_AS_ARGS :=
22 | C_DEPS :=
23 | C_DEPS_AS_ARGS :=
24 | EXECUTABLES :=
25 | OUTPUT_FILE_PATH :=
26 | OUTPUT_FILE_PATH_AS_ARGS :=
27 | AVR_APP_PATH :=$$$AVR_APP_PATH$$$
28 | QUOTE := "
29 | ADDITIONAL_DEPENDENCIES:=
30 | OUTPUT_FILE_DEP:=
31 | LIB_DEP:=
32 | LINKER_SCRIPT_DEP:=
33 |
34 | # Every subdirectory with source files must be described here
35 | SUBDIRS := \
36 | ../src/ \
37 | ../src/common/ \
38 | ../src/double/ \
39 | ../src/float/ \
40 | ../src/include/
41 |
42 |
43 | # Add inputs and outputs from these tool invocations to the build variables
44 | C_SRCS +=
45 |
46 |
47 | PREPROCESSING_SRCS += \
48 | ../src/common/clz_denormal.s \
49 | ../src/double/dadd.s \
50 | ../src/double/ddenormal_result.s \
51 | ../src/double/ddiv.s \
52 | ../src/double/dmul.s \
53 | ../src/double/dop1_normalize.s \
54 | ../src/double/dop2_normalize.s \
55 | ../src/double/drsub.s \
56 | ../src/double/dsub.s \
57 | ../src/double/sqrt.s \
58 | ../src/float/asinf.s \
59 | ../src/float/atan2f.s \
60 | ../src/float/atantablef.s \
61 | ../src/float/cosf.s \
62 | ../src/float/faddsub.s \
63 | ../src/float/fdenormal_result.s \
64 | ../src/float/fdiv.s \
65 | ../src/float/fmul.s \
66 | ../src/float/frsub.s \
67 | ../src/float/sincosf.s \
68 | ../src/float/sinf.s \
69 | ../src/float/sqrtf.s \
70 | ../src/float/tanf.s
71 |
72 |
73 | ASM_SRCS +=
74 |
75 |
76 | OBJS += \
77 | src/common/clz_denormal.o \
78 | src/double/dadd.o \
79 | src/double/ddenormal_result.o \
80 | src/double/ddiv.o \
81 | src/double/dmul.o \
82 | src/double/dop1_normalize.o \
83 | src/double/dop2_normalize.o \
84 | src/double/drsub.o \
85 | src/double/dsub.o \
86 | src/double/sqrt.o \
87 | src/float/asinf.o \
88 | src/float/atan2f.o \
89 | src/float/atantablef.o \
90 | src/float/cosf.o \
91 | src/float/faddsub.o \
92 | src/float/fdenormal_result.o \
93 | src/float/fdiv.o \
94 | src/float/fmul.o \
95 | src/float/frsub.o \
96 | src/float/sincosf.o \
97 | src/float/sinf.o \
98 | src/float/sqrtf.o \
99 | src/float/tanf.o
100 |
101 | OBJS_AS_ARGS += \
102 | src/common/clz_denormal.o \
103 | src/double/dadd.o \
104 | src/double/ddenormal_result.o \
105 | src/double/ddiv.o \
106 | src/double/dmul.o \
107 | src/double/dop1_normalize.o \
108 | src/double/dop2_normalize.o \
109 | src/double/drsub.o \
110 | src/double/dsub.o \
111 | src/double/sqrt.o \
112 | src/float/asinf.o \
113 | src/float/atan2f.o \
114 | src/float/atantablef.o \
115 | src/float/cosf.o \
116 | src/float/faddsub.o \
117 | src/float/fdenormal_result.o \
118 | src/float/fdiv.o \
119 | src/float/fmul.o \
120 | src/float/frsub.o \
121 | src/float/sincosf.o \
122 | src/float/sinf.o \
123 | src/float/sqrtf.o \
124 | src/float/tanf.o
125 |
126 | C_DEPS += \
127 | src/common/clz_denormal.d \
128 | src/double/dadd.d \
129 | src/double/ddenormal_result.d \
130 | src/double/ddiv.d \
131 | src/double/dmul.d \
132 | src/double/dop1_normalize.d \
133 | src/double/dop2_normalize.d \
134 | src/double/drsub.d \
135 | src/double/dsub.d \
136 | src/double/sqrt.d \
137 | src/float/asinf.d \
138 | src/float/atan2f.d \
139 | src/float/atantablef.d \
140 | src/float/cosf.d \
141 | src/float/faddsub.d \
142 | src/float/fdenormal_result.d \
143 | src/float/fdiv.d \
144 | src/float/fmul.d \
145 | src/float/frsub.d \
146 | src/float/sincosf.d \
147 | src/float/sinf.d \
148 | src/float/sqrtf.d \
149 | src/float/tanf.d
150 |
151 | C_DEPS_AS_ARGS += \
152 | src/common/clz_denormal.d \
153 | src/double/dadd.d \
154 | src/double/ddenormal_result.d \
155 | src/double/ddiv.d \
156 | src/double/dmul.d \
157 | src/double/dop1_normalize.d \
158 | src/double/dop2_normalize.d \
159 | src/double/drsub.d \
160 | src/double/dsub.d \
161 | src/double/sqrt.d \
162 | src/float/asinf.d \
163 | src/float/atan2f.d \
164 | src/float/atantablef.d \
165 | src/float/cosf.d \
166 | src/float/faddsub.d \
167 | src/float/fdenormal_result.d \
168 | src/float/fdiv.d \
169 | src/float/fmul.d \
170 | src/float/frsub.d \
171 | src/float/sincosf.d \
172 | src/float/sinf.d \
173 | src/float/sqrtf.d \
174 | src/float/tanf.d
175 |
176 | OUTPUT_FILE_PATH +=libMathM0.a
177 |
178 | OUTPUT_FILE_PATH_AS_ARGS +=libMathM0.a
179 |
180 | ADDITIONAL_DEPENDENCIES:=
181 |
182 | OUTPUT_FILE_DEP:= ./makedep.mk
183 |
184 | LIB_DEP+=
185 |
186 | LINKER_SCRIPT_DEP+=
187 |
188 |
189 | # AVR32/GNU C Compiler
190 |
191 |
192 |
193 | # AVR32/GNU Preprocessing Assembler
194 |
195 |
196 |
197 | # AVR32/GNU Assembler
198 | src/common/clz_denormal.o: ../src/common/clz_denormal.s
199 | @echo Building file: $<
200 | @echo Invoking: ARM/GNU Assembler : 6.3.1
201 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
202 | @echo Finished building: $<
203 |
204 |
205 | src/double/dadd.o: ../src/double/dadd.s
206 | @echo Building file: $<
207 | @echo Invoking: ARM/GNU Assembler : 6.3.1
208 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
209 | @echo Finished building: $<
210 |
211 |
212 | src/double/ddenormal_result.o: ../src/double/ddenormal_result.s
213 | @echo Building file: $<
214 | @echo Invoking: ARM/GNU Assembler : 6.3.1
215 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
216 | @echo Finished building: $<
217 |
218 |
219 | src/double/ddiv.o: ../src/double/ddiv.s
220 | @echo Building file: $<
221 | @echo Invoking: ARM/GNU Assembler : 6.3.1
222 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
223 | @echo Finished building: $<
224 |
225 |
226 | src/double/dmul.o: ../src/double/dmul.s
227 | @echo Building file: $<
228 | @echo Invoking: ARM/GNU Assembler : 6.3.1
229 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
230 | @echo Finished building: $<
231 |
232 |
233 | src/double/dop1_normalize.o: ../src/double/dop1_normalize.s
234 | @echo Building file: $<
235 | @echo Invoking: ARM/GNU Assembler : 6.3.1
236 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
237 | @echo Finished building: $<
238 |
239 |
240 | src/double/dop2_normalize.o: ../src/double/dop2_normalize.s
241 | @echo Building file: $<
242 | @echo Invoking: ARM/GNU Assembler : 6.3.1
243 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
244 | @echo Finished building: $<
245 |
246 |
247 | src/double/drsub.o: ../src/double/drsub.s
248 | @echo Building file: $<
249 | @echo Invoking: ARM/GNU Assembler : 6.3.1
250 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
251 | @echo Finished building: $<
252 |
253 |
254 | src/double/dsub.o: ../src/double/dsub.s
255 | @echo Building file: $<
256 | @echo Invoking: ARM/GNU Assembler : 6.3.1
257 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
258 | @echo Finished building: $<
259 |
260 |
261 | src/double/sqrt.o: ../src/double/sqrt.s
262 | @echo Building file: $<
263 | @echo Invoking: ARM/GNU Assembler : 6.3.1
264 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
265 | @echo Finished building: $<
266 |
267 |
268 | src/float/asinf.o: ../src/float/asinf.s
269 | @echo Building file: $<
270 | @echo Invoking: ARM/GNU Assembler : 6.3.1
271 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
272 | @echo Finished building: $<
273 |
274 |
275 | src/float/atan2f.o: ../src/float/atan2f.s
276 | @echo Building file: $<
277 | @echo Invoking: ARM/GNU Assembler : 6.3.1
278 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
279 | @echo Finished building: $<
280 |
281 |
282 | src/float/atantablef.o: ../src/float/atantablef.s
283 | @echo Building file: $<
284 | @echo Invoking: ARM/GNU Assembler : 6.3.1
285 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
286 | @echo Finished building: $<
287 |
288 |
289 | src/float/cosf.o: ../src/float/cosf.s
290 | @echo Building file: $<
291 | @echo Invoking: ARM/GNU Assembler : 6.3.1
292 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
293 | @echo Finished building: $<
294 |
295 |
296 | src/float/faddsub.o: ../src/float/faddsub.s
297 | @echo Building file: $<
298 | @echo Invoking: ARM/GNU Assembler : 6.3.1
299 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
300 | @echo Finished building: $<
301 |
302 |
303 | src/float/fdenormal_result.o: ../src/float/fdenormal_result.s
304 | @echo Building file: $<
305 | @echo Invoking: ARM/GNU Assembler : 6.3.1
306 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
307 | @echo Finished building: $<
308 |
309 |
310 | src/float/fdiv.o: ../src/float/fdiv.s
311 | @echo Building file: $<
312 | @echo Invoking: ARM/GNU Assembler : 6.3.1
313 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
314 | @echo Finished building: $<
315 |
316 |
317 | src/float/fmul.o: ../src/float/fmul.s
318 | @echo Building file: $<
319 | @echo Invoking: ARM/GNU Assembler : 6.3.1
320 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
321 | @echo Finished building: $<
322 |
323 |
324 | src/float/frsub.o: ../src/float/frsub.s
325 | @echo Building file: $<
326 | @echo Invoking: ARM/GNU Assembler : 6.3.1
327 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
328 | @echo Finished building: $<
329 |
330 |
331 | src/float/sincosf.o: ../src/float/sincosf.s
332 | @echo Building file: $<
333 | @echo Invoking: ARM/GNU Assembler : 6.3.1
334 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
335 | @echo Finished building: $<
336 |
337 |
338 | src/float/sinf.o: ../src/float/sinf.s
339 | @echo Building file: $<
340 | @echo Invoking: ARM/GNU Assembler : 6.3.1
341 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
342 | @echo Finished building: $<
343 |
344 |
345 | src/float/sqrtf.o: ../src/float/sqrtf.s
346 | @echo Building file: $<
347 | @echo Invoking: ARM/GNU Assembler : 6.3.1
348 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
349 | @echo Finished building: $<
350 |
351 |
352 | src/float/tanf.o: ../src/float/tanf.s
353 | @echo Building file: $<
354 | @echo Invoking: ARM/GNU Assembler : 6.3.1
355 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-as.exe$(QUOTE) -mcpu=cortex-m0plus -mthumb -I "../src/include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\arm\CMSIS\5.4.0\CMSIS\Core\Include" -I "C:\Program Files (x86)\Atmel\Studio\7.0\Packs\atmel\SAMC21_DFP\1.2.176\samc21\include" -g -o "$@" "$<"
356 | @echo Finished building: $<
357 |
358 |
359 |
360 |
361 |
362 |
363 | ifneq ($(MAKECMDGOALS),clean)
364 | ifneq ($(strip $(C_DEPS)),)
365 | -include $(C_DEPS)
366 | endif
367 | endif
368 |
369 | # Add inputs and outputs from these tool invocations to the build variables
370 |
371 | # All Target
372 | all: $(OUTPUT_FILE_PATH) $(ADDITIONAL_DEPENDENCIES)
373 |
374 |
375 | $(OUTPUT_FILE_PATH): $(OBJS) $(USER_OBJS) $(OUTPUT_FILE_DEP)
376 | @echo Building target: $@
377 | @echo Invoking: ARM/GNU Archiver : 6.3.1
378 | $(QUOTE)C:\Program Files (x86)\Atmel\Studio\7.0\toolchain\arm\arm-gnu-toolchain\bin\arm-none-eabi-ar.exe$(QUOTE) -r -o$(OUTPUT_FILE_PATH_AS_ARGS) $(OBJS_AS_ARGS) $(USER_OBJS) $(LIBS)
379 | @echo Finished building target: $@
380 |
381 |
382 |
383 |
384 |
385 |
386 | # Other Targets
387 | clean:
388 | -$(RM) $(OBJS_AS_ARGS) $(EXECUTABLES)
389 | -$(RM) $(C_DEPS_AS_ARGS)
390 | rm -rf "libMathM0.elf" "libMathM0.a" "libMathM0.hex" "libMathM0.bin" "libMathM0.lss" "libMathM0.eep" "libMathM0.map" "libMathM0.srec"
391 |
--------------------------------------------------------------------------------
/NoDenormals/libMathM0.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimPaterson/ArmMathM0/91b866409651f47093542df41b34701f7d8b7f6b/NoDenormals/libMathM0.a
--------------------------------------------------------------------------------
/NoDenormals/makedep.mk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit or delete the file
3 | ################################################################################
4 |
5 | src\common\clz_denormal.s
6 |
7 | src\double\dadd.s
8 |
9 | src\double\ddenormal_result.s
10 |
11 | src\double\ddiv.s
12 |
13 | src\double\dmul.s
14 |
15 | src\double\dop1_normalize.s
16 |
17 | src\double\dop2_normalize.s
18 |
19 | src\double\drsub.s
20 |
21 | src\double\dsub.s
22 |
23 | src\double\sqrt.s
24 |
25 | src\float\asinf.s
26 |
27 | src\float\atan2f.s
28 |
29 | src\float\atantablef.s
30 |
31 | src\float\cosf.s
32 |
33 | src\float\faddsub.s
34 |
35 | src\float\fdenormal_result.s
36 |
37 | src\float\fdiv.s
38 |
39 | src\float\fmul.s
40 |
41 | src\float\frsub.s
42 |
43 | src\float\sincosf.s
44 |
45 | src\float\sinf.s
46 |
47 | src\float\sqrtf.s
48 |
49 | src\float\tanf.s
50 |
51 |
--------------------------------------------------------------------------------
/NoDenormals/options.inc:
--------------------------------------------------------------------------------
1 | //****************************************************************************
2 | // options.inc
3 | //
4 | // Created 6/14/2021 4:59:55 PM by Tim Paterson
5 | //
6 | //****************************************************************************
7 |
8 | // Leave option undefined to disable (comment it out)
9 |
10 | // Enabling this option eliminates denormal support, saving code size
11 | .set NO_DENORMALS, 1
12 |
13 | // This option extends max angle accepted by trig functions to < 32768 radians.
14 | // Otherwise, the max is 64 * pi, about 201 radians.
15 | //.set WIDE_TRIG_RANGE, 1
16 |
--------------------------------------------------------------------------------
/ReadMe.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimPaterson/ArmMathM0/91b866409651f47093542df41b34701f7d8b7f6b/ReadMe.md
--------------------------------------------------------------------------------
/include/mathm0.h:
--------------------------------------------------------------------------------
1 | //****************************************************************************
2 | // mathm0.h
3 | //
4 | // Created 4/15/2025 6:45:10 PM by Tim Paterson
5 | //
6 | //****************************************************************************
7 |
8 | #pragma once
9 |
10 |
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 |
15 | // Take advantage of sinf() and cosf() computed simultaneously
16 | extern void sincosf(float radians, float *ptrSin, float *ptrCos);
17 |
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 |
--------------------------------------------------------------------------------
/lib-out/libMathM0.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimPaterson/ArmMathM0/91b866409651f47093542df41b34701f7d8b7f6b/lib-out/libMathM0.a
--------------------------------------------------------------------------------
/lib-out/libMathM0den.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimPaterson/ArmMathM0/91b866409651f47093542df41b34701f7d8b7f6b/lib-out/libMathM0den.a
--------------------------------------------------------------------------------
/src/common/clz_denormal.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * clz_denormal.s
4 | *
5 | * Created: 6/23/2021 2:41:16 PM
6 | * Author: Tim Paterson
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 |
15 | .global __clz_denormal
16 | .global __clz_denormal_ext
17 |
18 | //*********************************************************************
19 | // Count Leading Zeros for denormal handling
20 | //
21 | // WARNING!!: This function does not follow the standard
22 | // calling convention!
23 | //
24 | // Entry:
25 | // r4 = argument to count leading zeros, non-zero
26 | // Exit:
27 | // r4 = count of leading zeros, 0 - 31
28 | // r0, r1, r2, r3, r7, r12 preserved
29 | // r5, r6 destroyed
30 | //*********************************************************************
31 |
32 | .func __clz_denormal
33 |
34 | .thumb_func
35 | __clz_denormal:
36 | movs r5, #31
37 | __clz_denormal_ext:
38 | CLZ_EXT r4, r5, r6
39 | bx lr
40 |
41 | .endfunc
42 |
--------------------------------------------------------------------------------
/src/double/dadd.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * dadd.s
4 | *
5 | * Created: 9/17/2021
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 | .global __dadd_saved
18 |
19 | // 64-bit IEEE floating-point add
20 | //
21 | // Entry:
22 | // r1:r0 = op1
23 | // r3:r2 = op2
24 | // Exit:
25 | // r1:r0 = op1 + op2
26 |
27 | .func __dadd
28 |
29 | .ifdef NO_DENORMALS
30 |
31 | .set Op2ZeroExp, ReturnOp1
32 | .set Op1ZeroExp, ReturnOp2
33 |
34 | .else
35 |
36 | Op2ZeroExp:
37 | // r1:r0 = op1
38 | // r3:r2 = op2
39 | // r4 = op1 exponent
40 | // r5 = op2 exponent (zero)
41 | // r7 = bit 0 = sign of op1, bit 31 = sign xor
42 | lsls r6, r3, #1 // clear existing sign
43 | orrs r6, r2
44 | beq ReturnOp1 // op2 is zero, return op1
45 | // __dop2_normalize uses tailored calling convention
46 | // input: r3:r2 = op2
47 | // returns r5 = op2 exponent (< 0)
48 | // all other registers preserved
49 | bl __dop2_normalize
50 | str r2, [sp] // update pushed r2, might be needed
51 | b Op2Normalized
52 |
53 | Op1ZeroExp:
54 | // r1:r0 = op1
55 | // r3:r2 = op2
56 | // r4 = op1 exponent (zero)
57 | // r7 = bit 0 = sign of op1, bit 31 = sign xor
58 | lsls r5, r1, #1 // clear existing sign
59 | orrs r5, r0
60 | beq ReturnOp2 // op1 is zero, return op2
61 | // op1 is denormal, check op2 for zero
62 | lsls r5, r3, #1 // scrape off sign
63 | orrs r5, r2
64 | beq ReturnOp1 // op2 is zero, return op1
65 | // __dop1_normalize uses tailored calling convention
66 | // input: r1:r0 = op1
67 | // returns r4 = op1 exponent (< 0)
68 | // r5 trashed
69 | // all other registers preserved
70 | bl __dop1_normalize
71 | b Op1Normalized
72 |
73 | .endif
74 |
75 | ENTRY_POINT __dadd, __aeabi_dadd
76 | push {r2, r4-r7, lr} // must match __dsub & __drsub, which enter next
77 | __dadd_saved:
78 | lsrs r7, r1, #31 // grab sign of op1
79 | lsrs r4, r3, #31 // sign of op2
80 | eors r4, r7 // see if signs the same
81 | lsls r4, #31 // back to sign position
82 | orrs r7, r4 // combine sign info
83 |
84 | lsls r4, r1, #1 // clear op1 sign
85 | lsrs r4, #MANT_BITS_HI64 + 1 // op1 exponent
86 | beq Op1ZeroExp
87 | Op1Normalized:
88 | lsls r5, r3, #1 // clear op2 sign
89 | lsrs r5, #MANT_BITS_HI64 + 1 // op2 exponent
90 | beq Op2ZeroExp
91 | Op2Normalized:
92 | ldr r6, =#EXP_SPECIAL64 - 1
93 | mov lr, r6
94 | cmp r5, lr
95 | bgt Op2SpclExp
96 | // If op1 is special (and op2 not), just return op1
97 | cmp r4, lr
98 | bgt ReturnOp1
99 |
100 | // r1:r0 = op1
101 | // r3:r2 = op2
102 | // r4 = op1 exponent
103 | // r5 = op2 exponent
104 | // r7 = bit 0 = sign of op1, bit 31 = sign xor
105 | // lr = max allowed exponent
106 |
107 | // Clear exponent, set implied bit
108 | movs r6, #1
109 | lsls r6, #31 // sign position
110 | lsls r1, #EXP_BITS64
111 | lsls r3, #EXP_BITS64
112 | orrs r1, r6
113 | orrs r3, r6
114 | lsrs r1, #EXP_BITS64
115 | lsrs r3, #EXP_BITS64
116 |
117 | subs r6, r4, r5 // op1 exp - op2 exp
118 | bmi Op2Larger
119 | // op1 is larger (or same)
120 | mov r12, r4 // save exponent
121 | subs r6, #32
122 | bhi LongOp2Shift
123 | negs r5, r6 // no. of bits to shift off for sticky bits
124 | adds r6, #32
125 | // 64-bit right shift of r3:r2
126 | movs r4, r3
127 | lsls r4, r5
128 | lsrs r3, r6
129 | lsrs r2, r6
130 | orrs r2, r4
131 | ldr r4, [sp] // recover original low half of op2
132 | lsls r4, r5 // round & sticky bits for what we're shifting off
133 | CheckSign:
134 | orrs r7, r7 // check sign flags
135 | bmi SubOp2 // signs were different, subtract
136 | AddOps:
137 | // signs are the same, add operands
138 | // r1:r0 = op1 mantissa
139 | // r3:r2 = op2 mantissa, aligned to match op1 exponent
140 | // r4 = sticky bits
141 | // r7 = bit 0 is sign of result
142 | // r12 = result exponent
143 | // lr = max allowed exponent
144 | adds r0, r2 // sum it
145 | adcs r1, r3
146 | mov r2, r12 // bring back exponent
147 | // See if carried into the next bit
148 | lsls r5, r1, #EXP_BITS64
149 | bcs AddOverflow
150 | Round:
151 | // r1:r0 = result
152 | // r2 = result exponent
153 | // r4 = sticky bits
154 | // r7 = bit 0 is sign of result
155 | // lr = max allowed exponent
156 | .ifndef NO_DENORMALS
157 | cmp r2, #0
158 | ble TinyExp
159 | .endif
160 | lsls r5, r4, #1 // rounding bit to CY
161 | bcc NoRound // no rounding bit
162 | bne RoundUp // have rounding bit and sticky bits
163 | RoundEven:
164 | // Have rounding bit but no sticky bits, so round even
165 | lsrs r5, r0, #1 // LSB to CY
166 | bcc NoRound // already even
167 | RoundUp:
168 | movs r3, #0
169 | adds r0, #1 // add to rounding bit
170 | adcs r1, r3
171 | NoRound:
172 | subs r2, #1 // adjust for adding implied bit
173 | lsls r2, #MANT_BITS_HI64
174 | adds r1, r2
175 | SetSign:
176 | lsls r7, #31
177 | adds r1, r7
178 | Exit:
179 | pop {r2, r4-r7, pc}
180 |
181 | ReturnOp2:
182 | movs r0, r2
183 | movs r1, r3
184 | ReturnOp1:
185 | pop {r2, r4-r7, pc}
186 |
187 | Op2SpclExp:
188 | // r1:r0 = op1
189 | // r3:r2 = op2
190 | // r4 = op1 exponent
191 | // r5 = op2 exponent
192 | // r6 = 0x80000000 (sign bit position)
193 | // r7 = bit 0 = sign of op1, bit 31 = sign xor
194 | // lr = max allowed exponent
195 | //
196 | // op2 mantissa == 0?
197 | lsls r6, r3, #(EXP_BITS64 + 1)
198 | orrs r6, r2
199 | bne ReturnOp2 // op2 is NAN, return it
200 | // op2 is Infinity
201 | // if (expOp1 == EXP_SPECIAL)
202 | cmp r4, lr
203 | ble ReturnOp2 // op1 not special, return op2
204 | // op1 mantissa == 0?
205 | lsls r6, r1, #(EXP_BITS64 + 1)
206 | orrs r6, r0
207 | bne ReturnOp1 // op1 is NAN, return it
208 | // Both op1 & op2 are infinity. If signs differ, return NAN
209 | eors r3, r1
210 | bpl ReturnOp1 // signs the same, return infinity
211 | // return NAN
212 | ldr r1, =#NAN64
213 | movs r0, #0
214 | b ReturnOp1
215 |
216 | Op2Larger:
217 | // r1:r0 = op1 mantissa
218 | // r3:r2 = op2 mantissa
219 | // r4 = op1 exponent
220 | // r5 = op2 exponent
221 | // r6 = exponent difference (< 0)
222 | // r7 = bit 0 = sign of op1, bit 31 = sign xor
223 | // lr = max allowed exponent
224 | //
225 | // op2 has a larger exponent, so it's bigger for sure
226 | mov r12, r5 // save exponent
227 | negs r5, r6
228 | adds r6, #32
229 | bmi LongOp1Shift
230 | // 64-bit right shift of r1:r0
231 | // r2 was saved on entry so can be used as temp
232 | movs r4, r0
233 | movs r2, r1
234 | lsls r2, r6
235 | lsrs r1, r5
236 | lsrs r0, r5
237 | orrs r0, r2
238 | lsls r4, r6 // round & sticky bits for what we're shifting off
239 | ldr r2, [sp] // recover original r2
240 | Op2CheckSign:
241 | orrs r7, r7 // check sign flags
242 | bpl AddOps
243 | // op2 - op1
244 | adds r7, #1 // flip sign bit in LSB
245 | negs r4, r4 // 0 - sticky bits
246 | sbcs r2, r0
247 | sbcs r3, r1
248 | movs r0, r2
249 | movs r1, r3
250 | b Normalize
251 |
252 | LongOp2Shift:
253 | // r1:r0 = op1 mantissa
254 | // r3:r2 = op2 mantissa
255 | // r6 = exp1 - exp2 - 32 (> 0)
256 | // r7 = bit 0 = sign of op1, bit 31 = sign xor
257 | // r12 = result exponent
258 | // lr = max allowed exponent
259 | cmp r6, #MANT_BITS_HI64 + 3 // include implied, round & sticky bits
260 | bhi UseOp1
261 | negs r5, r6
262 | adds r5, #32 // r5 = 32 - r6
263 | // keep round & sticky bits for what we're shifting off
264 | movs r4, r2
265 | lsrs r4, r6
266 | lsls r2, r5
267 | beq 1f
268 | orrs r4, r3 // non-zero value that doesn't touch rounding bit
269 | 1:
270 | movs r2, r3
271 | lsrs r2, r6
272 | lsls r3, r5
273 | orrs r4, r3
274 | movs r3, #0
275 | b CheckSign
276 |
277 | AddOverflow:
278 | cmp r2, lr // lr = EXP_SPECIAL64 - 1
279 | beq RetInfinity
280 | .ifndef NO_DENORMALS
281 | bhi TinyExp
282 | .endif
283 | adds r2, #1 // adjust exponent
284 | // shift right 1 bit
285 | lsls r3, r1, #31
286 | lsrs r1, #1
287 | lsrs r0, #1 // CY = rounding bit
288 | add r0, r3 // combine without affecting flags
289 | bcc NoRound
290 | cmp r4, #0
291 | bne RoundUp
292 | b RoundEven
293 |
294 | SubOp2:
295 | // op1 - op2
296 | // However, it could be op1 <= op2 with same exponent
297 | negs r4, r4 // 0 - sticky bits
298 | sbcs r0, r2
299 | sbcs r1, r3
300 | bpl Normalize
301 | // Subtracted wrong way.
302 | adds r7, #1 // flip sign bit in LSB
303 | movs r3, #0
304 | mvns r1, r1
305 | negs r0, r0
306 | adcs r1, r3
307 | Normalize:
308 | // flags set according to r1
309 | mov r2, r12 // bring back exponent, flags not affected
310 | bne HaveBits // r1 not zero
311 | orrs r1, r0
312 | orrs r1, r4
313 | beq Exit // return zero result
314 | NormWord:
315 | // shift in bits from r0
316 | subs r2, #MANT_BITS_HI64 + 1
317 | lsrs r1, r0, #31 - MANT_BITS_HI64
318 | lsls r0, #MANT_BITS_HI64 + 1
319 | lsrs r5, r4, #31 - MANT_BITS_HI64
320 | orrs r0, r5
321 | lsls r4, #MANT_BITS_HI64 + 1
322 | cmp r1, #0
323 | beq NormWord
324 | HaveBits:
325 | lsls r5, r1, #EXP_BITS64 + 1 // shift normalization bit into CY
326 | bcs Round
327 | movs r3, #0 // shift counter
328 | // Check for big chunks of leading zeros
329 | // These checks need to leave at least 1 leading zero
330 | // for the final step.
331 | .set NORM, 5
332 | lsrs r6, r5, #32 - NORM
333 | bne NormLoop
334 | LongNorm:
335 | adds r3, #NORM // count
336 | lsls r5, #NORM // normalize a bunch
337 | lsrs r6, r5, #32 - NORM
338 | beq LongNorm
339 | // Finish off bit-by-bit
340 | NormLoop:
341 | adds r3, #1 // count bits to shift
342 | lsls r5, #1 // normalize one bit
343 | bcc NormLoop
344 | // Shift result in r1:r0:r4 left by count in r3
345 | movs r5, r0
346 | lsls r1, r3
347 | lsls r0, r3
348 | negs r6, r3
349 | adds r6, #32
350 | lsrs r5, r6
351 | orrs r1, r5
352 | movs r5, r4
353 | lsrs r5, r6
354 | orrs r0, r5
355 | lsls r4, r3
356 | subs r2, r3 // adjust exponent
357 | bgt Round
358 | TinyExp:
359 |
360 | .ifdef NO_DENORMALS
361 | // return zero of correct sign
362 | movs r0, #0
363 | lsls r1, r7, #31 // zero with sign
364 | pop {r2, r4-r7, pc}
365 | .else
366 | // r1:r0 = result mantissa
367 | // r2 = result exponent
368 | // r4 = sticky bits
369 | // r7 = sign in LSB
370 | subs r2, #1 // helper needs exponent - 1
371 | bl __ddenormal_result
372 | b SetSign
373 | .endif
374 |
375 | LongOp1Shift:
376 | // r1:r0 = op1 mantissa
377 | // r3:r2 = op2 mantissa
378 | // r5 = exp2 - exp1 (> 32)
379 | // r6 = exp1 - exp2 + 32 (< 0)
380 | // r7 = bit 0 = sign of op1, bit 31 = sign xor
381 | // r12 = result exponent
382 | // lr = max allowed exponent
383 | cmp r5, #MANT_BITS64 + 3 // include implied, round & sticky bits
384 | bhi UseOp2
385 | subs r5, #32 // r5 = right shift count
386 | adds r6, #32 // r6 = 32 - r5
387 | // keep round & sticky bits for what we're shifting off
388 | movs r4, r0
389 | lsrs r4, r5
390 | lsls r0, r6
391 | beq 1f
392 | orrs r4, r1 // non-zero value that doesn't touch rounding bit
393 | 1:
394 | movs r0, r1
395 | lsrs r0, r5
396 | lsls r1, r6
397 | orrs r4, r1
398 | movs r1, #0
399 | b Op2CheckSign
400 |
401 | UseOp2:
402 | movs r0, r2
403 | movs r1, r3
404 | lsrs r5, r7, #31 // xor of signs to LSB
405 | eors r7, r5 // sign of op2
406 | UseOp1:
407 | mov r2, r12 // bring back exponent
408 | b NoRound
409 |
410 | RetInfinity:
411 | // return infinity
412 | ldr r1, =#INFINITY64
413 | movs r0, #0
414 | b SetSign
415 |
416 | .endfunc
417 |
--------------------------------------------------------------------------------
/src/double/ddenormal_result.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * ddenormResult.s
4 | *
5 | * Created: 8/8/2021 2:48:57 PM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 | .global __ddenormal_result
18 |
19 | //*********************************************************************
20 | // Denormalize a tiny result
21 | //
22 | // Entry:
23 | // r1:r0 = result
24 | // r2 = biased result exponent - 1 (negative)
25 | // r4 = sticky bits
26 | // Exit:
27 | // r1:r0 = final result w/exponent
28 | //*********************************************************************
29 |
30 | .func __ddenormal_result
31 |
32 | .thumb_func
33 | __ddenormal_result:
34 | negs r3, r2
35 | adds r2, #32
36 | bmi BigShift
37 | movs r5, r1
38 | movs r6, r0
39 | lsrs r1, r3
40 | lsls r5, r2
41 | lsrs r0, r3 // CY is rounding bit
42 | add r0, r5 // CY not affected
43 | RoundTest:
44 | bcc Exit // no rounding, all done
45 | lsls r6, r2 // rounding and sticky bits
46 | lsls r6, #1 // drop rounding bit
47 | orrs r4, r6 // any sticky bits?
48 | bne RoundUp
49 | lsls r4, r0, #31 // test LSB for round even
50 | bpl Exit
51 | RoundUp:
52 | adds r0, #1 // round up
53 | bcc Exit
54 | adds r1, #1
55 | // If this round up caused a carry into the bottom of the
56 | // exponent (leaving the mantissa zero), then we're all
57 | // set up with the smallest normalized number.
58 | Exit:
59 | bx lr
60 |
61 | BigShift:
62 | orrs r4, r0
63 | movs r0, r1
64 | movs r1, #0
65 | adds r2, #32
66 | bmi RetZero
67 | subs r3, #32
68 | movs r6, r0
69 | lsrs r0, r3 // CY is rounding bit
70 | b RoundTest
71 |
72 | RetZero:
73 | movs r0, r1 // both zero
74 | bx lr
75 |
76 | .endfunc
77 |
--------------------------------------------------------------------------------
/src/double/ddiv.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * ddiv.s
4 | *
5 | * Created: 10/17/2021 4:55:10 PM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 64-bit IEEE floating-point divide
19 | //
20 | // Entry:
21 | // r1:r0 = num
22 | // r3:r2 = den
23 | // Exit:
24 | // r1:r0 = num / den
25 | //
26 | // The calculation will use iteration to refine an approximations of the
27 | // reciprocal of the denominator (saw it on Wikipedia).
28 | // The initial guess will be calculated by subtracting the upper mantissa
29 | // bits from the constant 2.92, or 0xBB. See fdiv.s for a full discussion
30 | // of how this constant was derived. It gives a result accurate to about 3.7 bits.
31 | //
32 | // The "p" notation used throughout is the position of the binary point
33 | // (p16 means there are 16 bits to the right).
34 |
35 | .set GuessBase, 0xBB
36 |
37 |
38 | .func __ddiv
39 |
40 | .ifdef NO_DENORMALS
41 |
42 | NumZeroExp:
43 | // r1:r0 = num
44 | // r3:r2 = den
45 | // r4 = num exponent (zero)
46 | // r6 = 0x80000000 (sign bit position)
47 | // r7 = max allowed exponent
48 | // r12 = result sign
49 | lsrs r5, r3, #MANT_BITS_HI64 // den exponent
50 | beq ReturnNan // 0/0, return NAN
51 | cmp r5, r7
52 | ble ZeroResult
53 | // is den NAN?
54 | lsls r4, r3, #(EXP_BITS64 + 1)
55 | orrs r4, r2
56 | bne ReturnDen // yes, return the NAN
57 | ZeroResult:
58 | movs r0, #0
59 | movs r1, #0
60 | b SetSign
61 |
62 | DenZeroExp:
63 | // r1:r0 = num
64 | // r3:r2 = den
65 | // r4 = num exponent
66 | // r5 = den exponent (zero)
67 | // r6 = 0x80000000 (sign bit position)
68 | // r7 = max allowed exponent
69 | // r12 = result sign
70 | cmp r4, r7 // check num exponent
71 | bgt SavedSign // Return whatever num is, Infinity or NAN
72 | b RetInfinity
73 |
74 | .else // NO_DENORMALS
75 |
76 | NumZeroExp:
77 | // r1:r0 = num
78 | // r3:r2 = den
79 | // r4 = num exponent (zero)
80 | // r6 = 0x80000000 (sign bit position)
81 | // r7 = max allowed exponent
82 | // r12 = result sign
83 | movs r4, r1
84 | orrs r4, r0
85 | beq NumIsZero
86 | // __dop1_normalize uses tailored calling convention
87 | // input: r1:r0 = op1
88 | // returns r4 = op1 exponent (< 0)
89 | // r5 trashed
90 | // all other registers preserved
91 | bl __dop1_normalize
92 | str r0, [sp] // update saved value
93 | b NumNormalized
94 |
95 | DenZeroExp:
96 | // r1:r0 = num
97 | // r3:r2 = den
98 | // r4 = num exponent
99 | // r5 = den exponent
100 | // r6 = 0x80000000 (sign bit position)
101 | // r7 = max allowed exponent
102 | // r12 = result sign
103 | movs r5, r3
104 | orrs r5, r2
105 | beq DenIsZero
106 | // __dop2_normalize uses tailored calling convention
107 | // input: r3:r2 = op2
108 | // returns r5 = op2 exponent (< 0)
109 | // all other registers preserved
110 | bl __dop2_normalize
111 | str r2, [sp, #4] // update saved value
112 | b DenNormalized
113 |
114 | DenIsZero:
115 | cmp r4, r7 // check num exponent
116 | bgt SavedSign // Return whatever num is, Infinity or NAN
117 | b RetInfinity
118 |
119 | NumIsZero:
120 | lsrs r5, r3, #MANT_BITS_HI64 // den exponent
121 | bne DenNotZero
122 | movs r4, r3
123 | orrs r4, r2
124 | beq ReturnNan // 0/0, return NAN
125 | DenNotZero:
126 | cmp r5, r7
127 | ble ZeroResult
128 | // is den NAN?
129 | lsls r4, r3, #(EXP_BITS64 + 1)
130 | orrs r4, r2
131 | bne ReturnDen
132 | ZeroResult:
133 | movs r0, #0
134 | movs r1, #0
135 | b SetSign
136 |
137 | .endif // else NO_DENORMALS
138 |
139 | DenSpclExp:
140 | // r1:r0 = num
141 | // r3:r2 = den
142 | // r4 = num exponent
143 | // r5 = den exponent
144 | // r6 = 0x80000000 (sign bit position)
145 | // r7 = max allowed exponent
146 | // r12 = result sign
147 | //
148 | // mantissa == 0?
149 | lsls r6, r3, #(EXP_BITS64 + 1)
150 | orrs r6, r2
151 | bne ReturnDen // den is NAN, return it
152 | // Den is Infinity
153 | cmp r4, r7 // num special?
154 | ble ZeroResult // zero if den is infinity & num normal
155 | ReturnNan:
156 | ldr r1, =#NAN64
157 | movs r0, #0
158 | b SetSign
159 |
160 | ReturnDen:
161 | movs r0, r2
162 | movs r1, r3
163 | SavedSign:
164 | b SetSign
165 |
166 |
167 | ENTRY_POINT __ddiv, __aeabi_ddiv
168 | push {r0, r2, r4-r7, lr}
169 | // compute final sign
170 | movs r6, #1
171 | lsls r6, #31 // sign position
172 | movs r7, r3
173 | eors r7, r1
174 | ands r7, r6 // final sign
175 | mov r12, r7
176 |
177 | // r1:r0 = num
178 | // r3:r2 = den
179 | // r6 = 0x80000000 (sign bit position)
180 | // r12 = result sign
181 |
182 | // clear signs
183 | bics r1, r6
184 | bics r3, r6
185 |
186 | ldr r7, =#EXP_SPECIAL64 - 1
187 | lsrs r4, r1, #MANT_BITS_HI64 // num exponent
188 | beq NumZeroExp
189 | NumNormalized:
190 | lsrs r5, r3, #MANT_BITS_HI64 // den exponent
191 | beq DenZeroExp
192 | DenNormalized:
193 | cmp r5, r7
194 | bgt DenSpclExp
195 | cmp r4, r7
196 | bgt SavedSign // just return num if special
197 |
198 | // r1:r0 = num
199 | // r3:r2 = den
200 | // r4 = num exponent
201 | // r5 = den exponent
202 | // r12 = result sign
203 |
204 | subs r4, r5 // compute exponent, unbiased
205 | mov lr, r4 // save exponent
206 |
207 | // Clear exponent, set implied bit
208 | lsls r1, #EXP_BITS64
209 | lsls r3, #EXP_BITS64
210 | orrs r1, r6
211 | orrs r3, r6
212 | // add lower bits to upper words
213 | lsrs r6, r0, #32 - EXP_BITS64
214 | orrs r1, r6
215 | lsrs r6, r2, #32 - EXP_BITS64
216 | orrs r3, r6
217 |
218 | // r0 = num p52
219 | // r1 = num p31 (lower bits overlap r0)
220 | // r2 = den p52
221 | // r3 = den p31 (lower bits overlap r2)
222 | // r12 = result sign
223 | // lr = result exponent
224 | //
225 | // Compute guess for 1/den = (K - den)/2. K is nearly 3.
226 | // den in [1, 2).
227 | lsrs r4, r3, #15 // den p16
228 | movs r5, #GuessBase
229 | lsls r5, #25 - 15 // MSB one bit left of den
230 | subs r5, r4 // x p17, < 1
231 | lsrs r5, #1 // x p16
232 |
233 | // Use iteration for refining the guess for 1/den. This algorithm
234 | // cubes the error (triples the number of bits) on each iteration.
235 | // (Newton-Raphson squares the error/doubles the bits per iteration.)
236 | //
237 | // next = x - x*(d*x - 1) + x*(d*x - 1)^2
238 | //
239 | // refactored as:
240 | //
241 | // next = x - x*((d*x - 1) - (d*x - 1)^2)
242 | //
243 | // d*x is very close to 1. We calculate it p32 or greater so the
244 | // leading 1, if present, just drops off. If it is less than 1, we
245 | // treat the result as a signed (now negative) number, also
246 | // effectively subtracting 1.
247 |
248 | // r0 = num p52
249 | // r1 = num p31 (lower bits overlap r0)
250 | // r2 = den p52
251 | // r3 = den p31 (lower bits overlap r2)
252 | // r4 = den p16
253 | // r5 = x p16
254 | // r12 = result sign
255 | // lr = result exponent
256 | muls r4, r5 // d*x - 1 p32, call it e (error)
257 | asrs r6, r4, #16 // e p16
258 | muls r6, r6 // e^2 p32
259 | subs r4, r6 // e - e^2 p32
260 | asrs r4, #16 // e - e^2 p16
261 | muls r4, r5 // x*(e - e^2) p32
262 | asrs r4, #16 // x*(e - e^2) p16
263 | subs r5, r4 // x - x*(e - e^2) p16
264 |
265 | // We have about 11 bits for x = 1/den, meaning e = d*x - 1 has
266 | // 11 leading zeros.
267 | lsrs r4, r3, #2 // den p29
268 | lsrs r5, #5 // x p11
269 | muls r4, r5 // d*x - 1 p40, call it e (error)
270 | asrs r6, r4, #15 // e p25
271 | muls r6, r6 // e^2 p50
272 | lsrs r6, #10 // e^2 p40
273 | subs r4, r6 // e - e^2 p40
274 | asrs r4, #11 // e - e^2 p29
275 | muls r4, r5 // x*(e - e^2) p40
276 | asrs r4, #9 // x*(e - e^2) p31
277 | lsls r5, #20 // x p31
278 | subs r5, r4 // x - x*(e - e^2) p31
279 |
280 | // r0 = num p52
281 | // r1 = num p31 (lower bits overlap r0)
282 | // r2 = den p52
283 | // r3 = den p31 (lower bits overlap r2)
284 | // r5 = x p31 (reciprocal estimate)
285 | // r12 = result sign
286 | // lr = result exponent
287 | //
288 | // compute quotient
289 | // q0 = x*(num hi32), rough quotient (27+ bits)
290 | // result p31 * p31 = p62, lower 32 bits discarded for p30
291 | // lowest partial product not needed
292 |
293 | lsrs r6, r5, #16 // xH
294 | uxth r7, r1 // numL
295 | muls r7, r6 // xH * numL = mid 1
296 | lsrs r4, r1, #16 // numH
297 | muls r6, r4 // xH * numH = hi
298 | lsrs r1, r7, #16 // hi half of mid 1
299 | adds r6, r1
300 | uxth r1, r5 // xL
301 | muls r1, r4 // xL * numH = mid 2
302 | uxth r7, r7 // lo half of mid1
303 | adds r1, r7 // sum mids
304 | lsrs r1, #16
305 | adds r4, r6, r1 // q0 = x*num p30
306 |
307 | // r0 = num p52
308 | // r2 = den p52
309 | // r3 = den p31 (lower bits overlap r2)
310 | // r4 = q0 p30
311 | // r5 = x p31 (reciprocal estimate)
312 | // r12 = result sign
313 | // lr = result exponent
314 | //
315 | // Compute q0*den exactly, except upper bits aren't needed since
316 | // they will be the same as num. We already tossed upper bits of num.
317 |
318 | mul32x32 r4, r2, r7, r2, r1, r6, r0 // r2:r7 = lo product
319 |
320 | ldr r0, [sp] // recover num p52
321 | lsrs r1, r3, #EXP_BITS64 // den p20
322 | muls r1, r4 // hi product, upper bits discarded
323 | adds r2, r1 // r2:r7 = q0*den p82
324 | // rem = num - q0*den, exact remainder from q0
325 | lsls r2, #7
326 | lsrs r7, #32 - 7
327 | orrs r2, r7
328 | lsls r7, r0, #5 // num p57
329 | subs r1, r7, r2 // rem p57
330 |
331 | // r0 = num p52
332 | // r1 = rem p57
333 | // r3 = den p31
334 | // r4 = q0 p30
335 | // r5 = x p31 (reciprocal estimate)
336 | // r12 = result sign
337 | // lr = result exponent
338 | //
339 | // q1 = x*rem, quotient from remainder (approx rem/den)
340 | // Note this macro multiplies unsigned * signed.
341 |
342 | mul32x32s r5, r1, r5, r1, r2, r6, r7 // r1 = q1 p56
343 |
344 | // quo = q0 + q1
345 | lsrs r5, r4, #6 // q0 p24
346 | lsls r4, #32 - 6 // q0 p56
347 | asrs r6, r1, #31 // sign extend q1
348 | adds r4, r1
349 | adcs r5, r6 // r5:r4 = quo p56
350 |
351 | // r0 = num p52
352 | // r3 = den p31
353 | // r5:r4 = quo p56
354 | // r12 = result sign
355 | // lr = result exponent
356 |
357 | mov r2, lr // unbiased exponent
358 | lsls r6, r5, #8 // normalized?
359 | bcs Normalized
360 | // shift quo & num for normalization
361 | lsl64const r4, r5, 1
362 | lsls r0, #1
363 | subs r2, #1 // adjust exponent
364 | Normalized:
365 | ldr r6, =#EXP_BIAS64
366 | adds r2, r6 // add bias
367 | subs r2, #1 // biased exponent - 1
368 | adds r6, r6 // max exponent (0x7FE)
369 | lsls r7, r2, #MANT_BITS_HI64 // exponent final position
370 | mov lr, r7
371 | cmp r2, r6 // r6 = max exponent
372 | bhs BigExp // catches exp < 0 too
373 |
374 | // Result quotient is accurate, but rounding is tricky because
375 | // the error, no matter how small, can straddle a rounding boundary.
376 | // First check to see if it does by looking at the rounding bit and
377 | // the guard bit below it:
378 | //
379 | // 00 - never round up
380 | // 01 - maybe round up
381 | // 10 - maybe round up
382 | // 11 - always round up
383 | //
384 | // This is tested by adding 1 to the guard bit. This will leave the
385 | // rounding and guard bits:
386 | //
387 | // 01 - never round up
388 | // 10 - maybe round up
389 | // 11 - maybe round up
390 | // 00 - already rounded up
391 | //
392 | // So if the round bit ends up 1, we need to calculate the final
393 | // remainder for rounding.
394 |
395 | adds r4, #4 // add to guard bit in quo
396 | bcc 1f
397 | adds r5, #1
398 | 1:
399 | lsrs r6, r4, #4 // check if rounding needed
400 | bcc NoRound
401 |
402 | // r0 = num p52
403 | // r3 = den p31
404 | // r5:r4 = quo p56
405 | // r12 = result sign
406 | // lr = exponent in final position
407 | //
408 | // Compute rem = num - quo*den, except upper bits aren't needed
409 | // (they're zero). We already tossed upper bits of num.
410 | // If rem >= den / 2, then round up.
411 | // Including the rounding bit in quo, which is 1, we're computing
412 | // num - (quo + 0.5)*den = rem - den/2, so a non-negative result
413 | // means round up.
414 | lsls r0, #24 // num p76
415 | Remainder:
416 | lsrs r4, #3
417 | lsls r4, #3 // zero out bits below rounding bit
418 | lsrs r3, #11 // restore den p20
419 | muls r3, r4 // denH * quoL p76
420 | subs r0, r3
421 | movs r1, r5 // quoH p24
422 | ldr r2, [sp, #4] // denL p52
423 | muls r1, r2 // quoH * denL p76
424 | subs r0, r1
425 | mul32x32 r2, r4, r2, r3, r1, r6, r7 // r3:r2 = quoL * denL p108
426 | negs r2, r2
427 | sbcs r0, r3
428 | RemTest:
429 | bmi NoRound
430 | .ifndef NO_DENORMALS
431 | beq RoundEven // round even if remainder is zero
432 | .endif
433 | // If the mantissa is all ones, this will round up into the exponent
434 | // field, incrementing it correctly. If that in turn becomes the max
435 | // exponent, it will be correctly formatted as infinity.
436 | RoundUp:
437 | adds r4, #0x10 // round up
438 | bcc NoRound
439 | adds r5, #1
440 | NoRound:
441 | // r5:r4 = quo p56
442 | // r12 = result sign
443 | // lr = exponent in final position
444 | lsrs r0, r4, #4
445 | lsrs r1, r5, #4
446 | lsls r5, #32 - 4
447 | orrs r0, r5
448 | add r1, lr // combine exponent
449 | SetSign:
450 | add r1, r12 // combine sign
451 | pop {r2-r7, pc}
452 |
453 | .ifndef NO_DENORMALS
454 | RoundEven:
455 | cmp r2, #0 // check low half of remainder
456 | bne RoundUp
457 | // Remainder is exactly zero. We're halfway, so round even.
458 | lsrs r0, r4, #5 // final LSB to CY
459 | bcs RoundUp
460 | b NoRound
461 | .endif
462 |
463 | RetInfinity:
464 | // Build infinity
465 | ldr r1, =#INFINITY64
466 | movs r0, #0
467 | b SetSign
468 |
469 | BigExp:
470 | // r0 = num p52
471 | // r2 = result exponent - 1
472 | // r3 = den p31
473 | // r5:r4 = quo p55
474 | // r12 = result sign
475 | // lr = exponent in final position
476 | bge RetInfinity
477 | .ifdef NO_DENORMALS
478 | // See if it could round up
479 | adds r6, r2, #1 // was exponent -1?
480 | bne ReturnZero
481 | adds r4, #0x10 // round up
482 | bcs 1f
483 | adds r4, #4 // try a bigger nudge
484 | 1:
485 | adcs r5, r6 // r6 == 0
486 | lsls r3, r5, #EXP_BITS64 - 4
487 | bcs NoRound // it rounded up
488 | ReturnZero:
489 | b ZeroResult
490 | .else
491 | // Denormalize
492 | //
493 | // If we're losing lots of bits, we'll just round with the ones
494 | // we have using the shared denormalizer. Otherwise, we'll adjust
495 | // precision and go through the remainder calculation.
496 | negs r6, r2 // count to denormalize by
497 | cmp r6, #32
498 | bgt DenormHelp
499 | // 64-bit right shift by count in r6
500 | adds r2, #32
501 | lsrs r4, r6
502 | movs r7, r5
503 | lsrs r5, r6
504 | lsls r7, r2
505 | orrs r4, r7
506 |
507 | movs r1, #0
508 | mov lr, r1 // exponent is zero
509 |
510 | // See if we need to compute remainder for rounding
511 | adds r4, #4 // add to guard bit in quo
512 | bcc 1f
513 | adds r5, #1
514 | 1:
515 | lsrs r7, r4, #4 // check if rounding needed
516 | bcc NoRound
517 | // Effectively shift num in r0 right the same amount by reducing
518 | // it's normal left shift of 24.
519 | subs r2, #32 - 24
520 | blt LongDenorm // whoops, really need to shift num right
521 | lsls r0, r2
522 | b Remainder
523 |
524 | LongDenorm:
525 | // shift num right instead of left
526 | negs r6, r2
527 | adds r2, #32
528 | movs r7, r0
529 | lsls r7, r2 // save low bits of num
530 | lsrs r0, r6
531 | // start our own version of remainder calc
532 | lsrs r4, #3
533 | lsls r4, #3 // zero out bits below rounding bit
534 | lsrs r3, #11 // restore den p20
535 | muls r3, r4 // denH * quoL p76
536 | subs r0, r3
537 | ldr r2, [sp, #4] // denL p52
538 | // quoH is zero, so skip that multiply
539 | push {r6, r7}
540 | mul32x32 r2, r4, r2, r3, r1, r6, r7 // r3:r2 = quoL * denL p108
541 | pop {r6, r7}
542 | // r6 = amount num shifted right
543 | // r7 = num extension shifted off
544 | subs r2, r7, r2
545 | sbcs r0, r3 // calculate final remainder
546 | // Upper bits of rem are not valid because we shifted in zeros
547 | // when num was shifted right. Discard those bits.
548 | lsls r0, r6 // upper bits invalid
549 | b RemTest
550 |
551 | DenormHelp:
552 | lsrs r0, r4, #4
553 | lsrs r1, r5, #4
554 | lsls r5, #32 - 4
555 | orrs r0, r5
556 | lsls r4, #32 - 4 // sticky bits
557 | bl __ddenormal_result
558 | b SetSign
559 | .endif
560 |
561 | .endfunc
562 |
--------------------------------------------------------------------------------
/src/double/dmul.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * dmul.s
4 | *
5 | * Created: 10/7/2021 4:48:03 PM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 96-bit left shift macro
19 | .macro lsl96 lo, mid, hi, cnt, tmp
20 | lsls \hi, #\cnt
21 | lsrs \tmp, \mid, #32 - \cnt
22 | orrs \hi, \tmp
23 | lsls \mid, #\cnt
24 | lsrs \tmp, \lo, #32 - \cnt
25 | orrs \mid, \tmp
26 | lsls \lo, #\cnt
27 | .endm
28 |
29 | // 64-bit IEEE floating-point multiply
30 | //
31 | // Entry:
32 | // r1:r0 = op1
33 | // r3:r2 = op2
34 | // Exit:
35 | // r1:r0 = op1 * op2
36 |
37 | .func __dmul
38 |
39 | .ifdef NO_DENORMALS
40 |
41 | Op1ZeroExp:
42 | // r1:r0 = op1
43 | // r3:r2 = op2
44 | // r4 = op1 exponent (zero)
45 | // r7 = max allowed exponent
46 | lsrs r4, r3, #MANT_BITS_HI64 // op2 exponent
47 | Op2ZeroExp:
48 | // r1:r0 = op1
49 | // r3:r2 = op2
50 | // r4 = exponent of other operand
51 | // r7 = max allowed exponent
52 | //
53 | // zero * infinity or zero * NAN?
54 | cmp r4, r7 // check other op
55 | bhi ReturnNan
56 | movs r0, #0
57 | movs r1, #0
58 | b SavedSign
59 |
60 | ReturnNan:
61 | ldr r1, =#NAN64
62 | movs r0, #0
63 | b SavedSign
64 |
65 | .else // NO_DENORMALS
66 |
67 | ZeroResult:
68 | movs r0, #0
69 | movs r1, #0
70 | b SavedSign
71 |
72 | Op2ChkZero:
73 | // op1 is special
74 | orrs r2, r3
75 | beq ReturnNan
76 | b SavedSign // not zero, return op1
77 |
78 | Op1ChkZero:
79 | // op2 is special
80 | orrs r0, r1
81 | bne ReturnOp2 // not zero, return op2
82 | ReturnNan:
83 | ldr r1, =#NAN64 // 0*infinity or NAN, return NAN
84 | movs r0, #0
85 | b SavedSign
86 |
87 | Op2ZeroExp:
88 | // r1:r0 = op1
89 | // r3:r2 = op2
90 | // r4 = op1 exponent
91 | // r5 = op2 exponent (zero)
92 | // r6 = 0x80000000 (sign bit position)
93 | // r7 = max allowed exponent
94 | // r12 = result sign
95 | cmp r4, r7
96 | bgt Op2ChkZero // op1 special exponent
97 | movs r5, r3
98 | orrs r5, r2
99 | beq ZeroResult // op2 is zero
100 | // __dop2_normalize uses tailored calling convention
101 | // input: r3:r2 = op2
102 | // returns r5 = op2 exponent (< 0)
103 | // all other registers preserved
104 | bl __dop2_normalize
105 | b Op2Normalized
106 |
107 | Op1ZeroExp:
108 | // r1:r0 = op1
109 | // r3:r2 = op2
110 | // r4 = op1 exponent (zero)
111 | // r6 = 0x80000000 (sign bit position)
112 | // r7 = max allowed exponent
113 | // r12 = result sign
114 | lsrs r5, r3, #MANT_BITS_HI64 // op2 exponent
115 | cmp r5, r7
116 | bgt Op1ChkZero // op2 special exponent
117 | movs r4, r1
118 | orrs r4, r0
119 | beq ZeroResult // op1 is zero
120 | // __dop1_normalize uses tailored calling convention
121 | // input: r1:r0 = op1
122 | // returns r4 = op1 exponent (< 0)
123 | // r5 trashed
124 | // all other registers preserved
125 | bl __dop1_normalize
126 | b Op1Normalized
127 |
128 | .endif // else NO_DENORMALS
129 |
130 | Op2SpclExp:
131 | // mantissa == 0?
132 | lsls r6, r3, #(EXP_BITS64 + 1)
133 | orrs r6, r2
134 | beq Op2Inf
135 | ReturnOp2:
136 | movs r0, r2
137 | movs r1, r3
138 | b SavedSign
139 |
140 | Op2Inf:
141 | // op2 is Infinity
142 | // if (expOp1 == EXP_SPECIAL)
143 | cmp r4, r7
144 | ble ReturnOp2 // op1 not special, return op2
145 | SavedSign:
146 | add r1, r12
147 | pop {r4-r7, pc}
148 |
149 |
150 | ENTRY_POINT __dmul, __aeabi_dmul
151 | push {r4-r7, lr}
152 | // compute final sign
153 | movs r6, #1
154 | lsls r6, #31 // sign position
155 | movs r7, r3
156 | eors r7, r1
157 | ands r7, r6 // final sign
158 | mov r12, r7
159 |
160 | // r1:r0 = op1
161 | // r3:r2 = op2
162 | // r6 = 0x80000000 (sign bit position)
163 | // r12 = result sign
164 |
165 | // clear signs
166 | bics r1, r6
167 | bics r3, r6
168 |
169 | ldr r7, =#EXP_SPECIAL64 - 1
170 | lsrs r4, r1, #MANT_BITS_HI64 // op1 exponent
171 | beq Op1ZeroExp
172 | Op1Normalized:
173 | lsrs r5, r3, #MANT_BITS_HI64 // op2 exponent
174 | beq Op2ZeroExp
175 | Op2Normalized:
176 | cmp r5, r7
177 | bgt Op2SpclExp
178 | cmp r4, r7
179 | bgt SavedSign // just return op1 if special
180 |
181 | // r1:r0 = op1
182 | // r3:r2 = op2
183 | // r4 = op1 exponent
184 | // r5 = op2 exponent (zero)
185 | // r6 = 0x80000000 (sign bit position)
186 | // r7 = max allowed exponent
187 | // r12 = result sign
188 |
189 | adds r4, r5 // compute exponent
190 |
191 | // Clear exponent, set implied bit
192 | lsls r1, #EXP_BITS64
193 | lsls r3, #EXP_BITS64
194 | orrs r1, r6
195 | orrs r3, r6
196 | lsrs r1, #EXP_BITS64
197 | lsrs r3, #EXP_BITS64
198 |
199 | // compute 106-bit product in r1:r0:r4:r5
200 | push {r0, r4} // op1 lo and save exponent
201 | mul32x32 r0, r2, r0, r4, r5, r6, r7 // r4:r0 lo
202 | mov lr, r0 // lowest (sticky) bits
203 | mul32x32 r2, r1, r2, r0, r5, r6, r7 // r0:r2 mid1
204 |
205 | // Use two 11-bit by 21-bit multplies for top partial product into r1:r5
206 | lsrs r5, r3, #11 // upper 11 (really 10) bits
207 | muls r5, r1 // 30 or 31-bit result
208 | lsls r6, r3, #32 - 11
209 | lsrs r6, #32 - 11 // lower 11 bits
210 | muls r6, r1
211 | // upper product shift left by 11 bits (but leaving only 9 or 10)
212 | lsrs r1, r5, #32 - 11
213 | lsls r5, #11
214 | // combine partial products
215 | adds r5, r6
216 | bcc 1f
217 | adds r1, #1
218 | 1:
219 | // combine mid1 (r0:r2) with r1:r5:r4 into r1:r0:r4
220 | adds r4, r2
221 | adcs r0, r5
222 | bcc 2f
223 | adds r1, #1
224 | 2:
225 | pop {r2} // restore op1 lo
226 | mul32x32 r2, r3, r2, r3, r5, r6, r7 // r3:r2 mid2
227 | adds r4, r2
228 | adcs r0, r3
229 | bcc 3f
230 | adds r1, #1
231 | 3:
232 | mov r5, lr // get low bits back
233 | // full result in r1:r0:r4:r5
234 | pop {r2} // restore exponent
235 | // start normalization
236 | lsls r3, r1, #23
237 | bcs NormCy
238 | // we need one extra left shift
239 | adds r4, r4
240 | adcs r0, r0
241 | adcs r1, r1
242 | subs r2, #1 // adjust exponent
243 | NormCy:
244 | lsl96 r4, r0, r1, 11, r3
245 | CheckExp:
246 | ldr r6, =#EXP_BIAS64
247 | subs r2, r6 // remove double bias
248 | lsls r6, #1 // max exponent (0x7FE)
249 | cmp r2, r6
250 | bhs BigExp // too big or negative
251 |
252 | // r1:r0 = result mantissa
253 | // r2 = exponent - 1
254 | // r4 = round & sticky bits
255 | // r5 = more sticky bits
256 | // r12 = sign bit
257 | lsls r4, #1 // extract rounding bit to CY
258 | bcc NoRound
259 | orrs r4, r5 // any sticky bits?
260 | bne RoundUp
261 | // round even
262 | lsrs r3, r0, #1 // LSB to CY
263 | bcc NoRound
264 | RoundUp:
265 | adds r0, #1
266 | bcc NoRound
267 | adds r1, #1
268 | NoRound:
269 | lsls r2, #MANT_BITS_HI64
270 | adds r1, r2
271 | SetSign:
272 | add r1, r12
273 | pop {r4-r7, pc}
274 |
275 | BigExp:
276 | // r1:r0 = result mantissa
277 | // r2 = result exponent
278 | // r4 = round & sticky bits
279 | // r5 = more sticky bits
280 | // r12 = sign bit
281 | bge RetInfinity
282 | .ifdef NO_DENORMALS
283 | // See if it could round up
284 | adds r6, r2, #1 // was exponent -1?
285 | bne ReturnZero
286 | adds r0, #1 // round up LSB
287 | adcs r1, r6 // r6 == 0
288 | lsls r3, r1, #EXP_BITS64
289 | bcs NoRound // it rounded up
290 | ReturnZero:
291 | movs r0, #0
292 | movs r1, #0
293 | .else
294 | orrs r4, r5 // combine all sticky bits
295 | bl __ddenormal_result
296 | .endif
297 | b SetSign
298 |
299 | RetInfinity:
300 | // return infinity
301 | ldr r1, =#INFINITY64
302 | movs r0, #0
303 | b SetSign
304 |
305 |
306 | .endfunc
307 |
--------------------------------------------------------------------------------
/src/double/dop1_normalize.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * dop1_normalize.s
4 | *
5 | * Created: 10/10/2021 11:03:01 AM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 | .global __dop1_normalize
18 |
19 | //*********************************************************************
20 | // Normalize a denormlized number passed in r1:r0 (op1)
21 | //
22 | // Entry:
23 | // r1:r0 = op1
24 | // Exit:
25 | // r1:r0 = op1 fully normalized
26 | // r4 = op1 exponent (< 0)
27 | // r5 trashed
28 | // all other registers preserved
29 | //*********************************************************************
30 |
31 | .func __dop1_normalize
32 |
33 | .thumb_func
34 | __dop1_normalize:
35 | push {r6, r7, lr}
36 | // see which word has first non-zero bit
37 | movs r5, #MANT_BITS_HI64
38 | lsls r4, r1, #1 // clear sign
39 | bne DenormClz
40 | adds r5, #31
41 | movs r4, r0
42 | DenormClz:
43 | // __clz_denormal_ext uses tailored calling convention
44 | // r4 = input to count leading zeros
45 | // r5 = max count
46 | // r0 - r3, r7 preserved
47 | // r5, r6 trashed
48 | bl __clz_denormal_ext // Get leading zeros in op1
49 | adds r5, r4, #1 // shift count
50 | negs r4, r4 // op1 exponent
51 | // 64-bit shift macro
52 | //.macro lsl64 lo, hi, cnt, tmp1, tmp2
53 | lsl64 r0, r1, r5, r6, r7
54 | pop {r6, r7, pc}
55 |
--------------------------------------------------------------------------------
/src/double/dop2_normalize.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * dop2_normalize.s
4 | *
5 | * Created: 10/10/2021 11:03:01 AM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 | .global __dop2_normalize
18 |
19 | //*********************************************************************
20 | // Normalize a denormlized number passed in r3:r2 (op2)
21 | //
22 | // Entry:
23 | // r3:r2 = op2
24 | // Exit:
25 | // r3:r2 = op2 fully normalized
26 | // r5 = op2 exponent (< 0)
27 | // all other registers preserved
28 | //*********************************************************************
29 |
30 | .func __dop2_normalize
31 |
32 | .thumb_func
33 | __dop2_normalize:
34 | push {r4, r6, r7, lr}
35 | // see which word has first non-zero bit
36 | movs r5, #MANT_BITS_HI64
37 | lsls r4, r3, #1 // clear sign
38 | bne DenormClz
39 | adds r5, #31
40 | movs r4, r2
41 | DenormClz:
42 | // __clz_denormal_ext uses tailored calling convention
43 | // r4 = input to count leading zeros
44 | // r5 = max count
45 | // r0 - r3, r7 preserved
46 | // r5, r6 trashed
47 | bl __clz_denormal_ext // Get leading zeros in op2
48 | negs r5, r4 // op2 exponent
49 | adds r4, #1 // shift count
50 | // 64-bit shift macro
51 | //.macro lsl64 lo, hi, cnt, tmp1, tmp2
52 | lsl64 r2, r3, r4, r6, r7
53 | pop {r4, r6, r7, pc}
54 |
--------------------------------------------------------------------------------
/src/double/drsub.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * drsub.s
4 | *
5 | * Created: 9/17/2021
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 64-bit IEEE floating-point subtract reverse
19 | //
20 | // Entry:
21 | // r1:r0 = op1
22 | // r3:r2 = op2
23 | // Exit:
24 | // r1:r0 = op2 - op1
25 |
26 | FUNC_START __drsub, __aeabi_drsub
27 | push {r2, r4-r7, lr} // must match __dadd
28 | movs r4, #1
29 | lsls r4, #31
30 | eors r1, r4 // invert sign of op1
31 | b __dadd_saved
32 |
33 | .endfunc
34 |
--------------------------------------------------------------------------------
/src/double/dsub.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * dsub.s
4 | *
5 | * Created: 9/17/2021
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 64-bit IEEE floating-point subtract
19 | //
20 | // Entry:
21 | // r1:r0 = op1
22 | // r3:r2 = op2
23 | // Exit:
24 | // r1:r0 = op1 - op2
25 |
26 | FUNC_START __dsub, __aeabi_dsub
27 | push {r2, r4-r7, lr} // must match __dadd
28 | movs r4, #1
29 | lsls r4, #31
30 | eors r3, r4 // invert sign of op2
31 | b __dadd_saved
32 |
33 | .endfunc
34 |
--------------------------------------------------------------------------------
/src/double/sqrt.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * sqrt.s
4 | *
5 | * Created: 11/15/2021 11:48:59 AM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | //*********************************************************************
19 | // macro to compute 64-bit square of 32-bit number
20 | // pl can be same as x
21 |
22 | .macro sqr32 x, pl, ph, t1, t2
23 | uxth \t1, \x
24 | muls \t1, \t1 // t1 = low product
25 | uxth \ph, \x
26 | lsrs \t2, \x, #16
27 | muls \ph, \t2 // ph = mid product
28 | muls \t2, \t2 // t2 = hi product
29 | // add 2*mid product
30 | lsls \pl, \ph, #17
31 | lsrs \ph, \ph, #15
32 | adds \pl, \t1
33 | adcs \ph, \t2
34 | .endm
35 |
36 |
37 | // 64-bit IEEE floating-point square root
38 | //
39 | // Entry:
40 | // r1:r0 = input
41 | // Exit:
42 | // r1:r0 = root
43 | //
44 | // The calculation will use Newton-Raphson iteration on inverse square root.
45 | // The initial guess will be calculated by subtracting the upper mantissa bits
46 | // from one of two constants -- one for [1, 2) and the other for [2, 4). The
47 | // values were determined using a spreadsheet.
48 | //
49 | // [1, 2) Mlo = 1.2109375; Y0 = Mlo - X / 4; in hex, 0x9B p7 (0x9B / 0x80)
50 | // [2, 4) Mhi = 0.96875; Y0 = Mhi - X / 8; in hex, 0x7C p7 (0x7C / 0x80)
51 | //
52 | // The guess will have more than 4 bits of accuracy, allowing 4 iterations to
53 | // get to the required accuracy. The notation p7 means there are 7 bits to the
54 | // right of the binary point, and this notation is used throughout the comments.
55 | //
56 | // Mark Owen demonstrates in Qfplib (http://www.quinapalus.com) some clever
57 | // arrangements that simplify the code for an iteration. First, the common
58 | // representation of an iteration is refactored (x = input, y = guess for
59 | // 1/sqrt(x)):
60 | //
61 | // next = 1.5*y - x*y^3/2 = y - y*(x*y^2 - 1)/2
62 | //
63 | // Note that since y is a guess for 1/sqrt(x), the inner term x*y^2 will
64 | // be close to 1. By computing this so the binary point is left of the
65 | // 32-bit word, the integer portion just falls off.
66 |
67 | .set Mlo, 0x9B // magic number for lo range, [1, 2)
68 | .set Mhi, 0x7C // magic number for hi range, [2, 4)
69 |
70 |
71 | .func __sqrt
72 |
73 | .ifndef NO_DENORMALS
74 | ZeroExp:
75 | // Is input zero?
76 | movs r2, r1
77 | orrs r2, r0
78 | beq Exit
79 | Denormal:
80 | // r1:r0 = input
81 | //
82 | // __dop1_normalize uses tailored calling convention
83 | // input: r1:r0 = op1
84 | // returns r4 = op1 exponent (< 0)
85 | // r5 trashed
86 | // all other registers preserved
87 | bl __dop1_normalize
88 | b Normalized
89 | .endif
90 |
91 | Special:
92 | lsls r3, r1, #1 // strip sign
93 | bcc Exit // if not set, return input, +infinity or NAN
94 | .ifdef NO_DENORMALS
95 | lsrs r3, #MANT_BITS_HI64 + 1 // exponent tells if it's zero
96 | .else
97 | orrs r3, r0 // must be all zero to be zero
98 | .endif
99 | beq Exit // input is -0, return it
100 | // negative input, return NAN
101 | ldr r1, =#NAN64
102 | movs r0, #0
103 | Exit:
104 | pop {r4-r7, pc}
105 |
106 |
107 | ENTRY_POINT __sqrt, sqrt
108 | push {r4-r7, lr}
109 | ldr r2, =INFINITY64
110 | cmp r1, r2
111 | bhs Special // catch negative, infinity, NAN
112 | lsrs r4, r1, #MANT_BITS_HI64 // input exponent
113 | .ifdef NO_DENORMALS
114 | beq Exit // input == 0, return it
115 | .else
116 | beq ZeroExp
117 | .endif
118 | Normalized:
119 | lsls r1, #EXP_BITS64
120 | lsrs r1, #EXP_BITS64 // clear out exponent bits
121 | // Set implied bit
122 | movs r2, #1
123 | lsls r2, #MANT_BITS_HI64 // normalize, clearing exponent
124 | orrs r1, r2
125 |
126 | // r1:r0 = input p52 (r1 p20)
127 | // r4 = exponent
128 |
129 | movs r3, #Mlo // assume [1, 2)
130 | lsrs r2, r1, #15 // save top bits p5
131 |
132 | // Result exponent is current exponent / 2
133 | // Double the bias before halving. Implied bit position will get
134 | // added at end, so counteract it as well.
135 | ldr r5, =#EXP_BIAS64 - 2
136 | adds r4, r5
137 | asrs r4, #1 // exp >>= 1
138 | bcc 1f // was it even?
139 | lsl64const r0, r1, 1 // if not, shift left 1
140 | movs r3, #Mhi // input interval [2, 4)
141 | 1:
142 | // Compute guess by subtracting upper bits from magic number in r3
143 | subs r3, r2
144 | lsls r4, #MANT_BITS_HI64
145 | mov r12, r4 // save final exponent
146 |
147 | // First iteration
148 | // r1:r0 = input p52 (r1 p20) interval [1, 4)
149 | // r3 = guess p7, accurate to 4 bits
150 | // r12 = final exponent
151 | lsrs r2, r1, #2 // x p18
152 | muls r2, r3 // x*y, p25
153 | muls r2, r3 // x*y^2 p32
154 | // As described above, we now view r2 as signed and really have
155 | // x*y^2 - 1, p32
156 | asrs r2, #9 // p23
157 | muls r2, r3 // y*(x*y^2 - 1) p30 = y*(x*y^2 - 1)/2 p31
158 | lsls r3, #24 // y p31
159 | subs r3, r2 // y - y*(x*y^2 - 1)/2 p31
160 | lsrs r3, #15 // p16
161 |
162 | // Do it again.
163 | movs r2, r3 // y p16
164 | muls r2, r2 // y^2 p32
165 | lsrs r2, #14 // y^2 p18
166 | lsrs r5, r1, #2 // x p18
167 | muls r2, r5 // x*y^2 p36 => x*y^2 - 1 p36
168 | asrs r2, #15 // p21
169 | muls r2, r3 // y*(x*y^2 - 1) p37 = y*(x*y^2 - 1)/2 p38
170 | asrs r2, #22 // p16
171 | subs r3, r2 // y - y*(x*y^2 - 1)/2 p16
172 | // if result is exactly 1, reduce it so next y^2 doesn't overflow
173 | lsrs r2, r3, #16 // integer bit to LSB
174 | subs r3, r2 // if non-zero, subtract 1 to get 0xFFFF
175 |
176 | // Third iteration needs to preserve more accuracy
177 | // r1:r0 = x p52 (r1 p20, up to 2 integer bits)
178 | // r3 = y p16
179 | // r12 = final exponent
180 | movs r2, r3 // y p16
181 | muls r3, r2 // y^2 p32
182 | // collect upper 32 bits of input
183 | lsls r1, #10
184 | lsrs r5, r0, #32 - 10
185 | orrs r1, r5 // x p30
186 |
187 | mul32x32 r3, r1, r3, r4, r5, r6, r7 // r4:r3 = x*y^2 p62
188 | // shift off the leading 1 and upper fraction bits
189 | lsrs r3, #20
190 | lsls r4, #32 - 20
191 | orrs r3, r4 // x*y^2 - 1 p42
192 | // 32x16 multiply of above term with y p16
193 | // low 16 tossed, so result is p42*p16=p58 >> 16 = p42
194 | uxth r4, r3
195 | muls r4, r2
196 | asrs r3, #16
197 | muls r3, r2
198 | lsrs r4, #16
199 | adds r3, r4 // y*(x*y^2 - 1) p42 = y*(x*y^2 - 1)/2 p43
200 | asrs r3, #12 // y*(x*y^2 - 1)/2 p31
201 | lsls r2, #15 // y p31
202 | subs r2, r3 // y - y*(x*y^2 - 1)/2 p31
203 |
204 | // For the fourth iteration, we refactor again, taking into account
205 | // that we don't want y (the next guess), but x*y (the actual root).
206 | // So it becomes:
207 | //
208 | // result = x*y - x*y*(x*y^2 - 1)/2 = x*y - y*((x*y)^2 - x)/2
209 | // = x*y + y*(x - (x*y)^2)/2
210 |
211 | // r0 = x low bits, p52
212 | // r1 = x hi bits, p30 (10 bits overlap r0)
213 | // r2 = y p31
214 | // r12 = final exponent
215 | mul32x32hi r1, r2, r3, r5, r6, r7
216 | sqr32 r3, r4, r5, r6, r7 // r5:r4 = (x*y)^2 p58
217 | lsls r6, r0, #6 // lo x p58
218 | lsrs r7, r1, #4 // hi x p26
219 | subs r6, r4
220 | sbcs r7, r5 // x - (x*y)^2 p58
221 | // use lo result, but pull in a few upper bits
222 | lsrs r6, #4
223 | lsls r7, #32 - 4
224 | orrs r6, r7 // x - (x*y)^2 p54
225 | mul32sx32 r6, r2, r6, r2, r4, r5, r7 // y*(x - (x*y)^2 p85 = (y*(x - (x*y)^2)/2 p86
226 |
227 | // r2 = (y*(x - (x*y)^2)/2 p54
228 | // r3 = x*y p29
229 | // r12 = final exponent
230 | lsrs r4, r3, #9 // x*y hi bits p20
231 | lsls r3, #32 - 9 // r4:r3 = x*y p52
232 |
233 | // Result will be accurate, but rounding is tricky because
234 | // the error, no matter how small, can straddle a rounding boundary.
235 | // First check to see if it does by looking at the rounding bit and
236 | // the guard bit below it:
237 | //
238 | // 00 - never round up
239 | // 01 - maybe round up
240 | // 10 - maybe round up
241 | // 11 - always round up
242 | //
243 | // This is tested by adding 1 to the guard bit. This will leave the
244 | // rounding and guard bits:
245 | //
246 | // 01 - never round up
247 | // 10 - maybe round up
248 | // 11 - maybe round up
249 | // 00 - already rounded up
250 | //
251 | // So if the round bit ends up 1, we need to calculate the final
252 | // remainder for rounding.
253 |
254 | adds r2, #1 // add to guard bit at p54
255 | asrs r1, r2, #31 // sign extend
256 | asrs r2, #2 // low bits p52
257 | bcs ComputeRemainder
258 | adds r0, r2, r3
259 | adcs r1, r4
260 | add r1, r12
261 | pop {r4-r7, pc}
262 |
263 | ComputeRemainder:
264 | // Add 1/2 LSB to result, then see if that's too big or too small by
265 | // squaring it and comparing with x. Only low bits need comparing, the
266 | // upper ones must be the same.
267 | adds r2, r3
268 | adcs r1, r4 // r1:r2 = unrounded root
269 | movs r5, r1
270 | lsls r3, r2, #1
271 | adcs r5, r5 // r5:r3 = root p53 (r5 p21)
272 | adds r3, #1 // bump by half a bit
273 | // root * root requires mid and low products
274 | muls r5, r3 // mid product p74
275 | sqr32 r3, r3, r4, r6, r7 // r4:r3 = (root + 0.5)^2 p106 (r4 p74)
276 | adds r4, r5 // sum mid + upper lo
277 | adds r4, r5 // mid used twice
278 | lsls r3, r0, #22 // x p74 (10 bits left)
279 | subs r4, r3 // (root + 0.5)^2 - x
280 | asrs r4, #31 // extend sign of result
281 | subs r0, r2, r4 // add 1 if negative
282 | sbcs r1, r4
283 | add r1, r12
284 | pop {r4-r7, pc}
285 |
286 | .endfunc
287 |
--------------------------------------------------------------------------------
/src/float/asinf.S:
--------------------------------------------------------------------------------
1 | //****************************************************************************
2 | // asinf.s
3 | //
4 | // Created 9/2/2024 4:09:38 PM by Tim
5 | //
6 | //****************************************************************************
7 |
8 | .syntax unified
9 | .cpu cortex-m0plus
10 | .thumb
11 |
12 | .include "macros.inc"
13 | .include "ieee.inc"
14 | .include "options.inc"
15 | .include "trigf.inc"
16 |
17 |
18 | // 32-bit floating-point arc sine
19 | //
20 | // Entry:
21 | // r0 = sine of angle
22 | // Exit:
23 | // r0 = angle
24 | //
25 | // The calculation will use CORDIC rotation of the input vector to
26 | // accumulate the angle required to bring y = 0. This is done only
27 | // in the first octant, so argument reduction makes y <= x, x > 0,
28 | // y > 0.
29 | //
30 | // The calculations are semi-fixed point. y is kept normalized so
31 | // as not to lose precision, and shifted during each rotation to line
32 | // up with x. If x >> y, the result will be small and a different
33 | // table of angles [atan(2^-i)] with higher precision is used.
34 | //
35 | // Rotations are used until the angle being rotated by is so small that
36 | // tan(x) = x (for 24-bit precision), so atan(y/x) = y/x. Then we do
37 | // one last rotation by y/x.
38 | //
39 | // The "p" notation used throughout is the position of the binary point
40 | // (p16 means there are 16 bits to the right).
41 |
42 |
43 | .func asinf
44 |
45 | SpecialExp:
46 | // If argument is NAN, return it.
47 | // If it's infinity, make a new NAN and return it for both.
48 | lsls r2, r0, #(EXP_BITS32 + 1) // mantissa == 0?
49 | bne ReturnOp // input is NAN, return it
50 | ReturnNan:
51 | ldr r0, =#NAN32
52 | ReturnOp:
53 | pop {r4-r7, pc}
54 |
55 |
56 | ENTRY_POINT __asinfM0, asinftim
57 | push {r4-r7, lr}
58 | lsls r1, r0, #1 // clear input sign
59 | lsrs r1, #MANT_BITS32 + 1 // isolate exponent
60 | cmp r1, #EXP_SPECIAL32
61 | beq SpecialExp
62 | asrs r7, r0, #31 // save input sign
63 | movs r3, #1
64 | lsls r3, #MANT_BITS32 // implied bit position
65 | orrs r3, r0
66 | lsls r3, #EXP_BITS32 // isolate mantissa
67 | subs r1, EXP_BIAS32
68 | bge ReturnNan // oops, fails on +/-1
69 | negs r1, r1
70 | // UNDONE: check for small numbers
71 | lsrs r3, r1
72 | movs r0, #0 // y = 0
73 | MOV_IMM r1, 0x80000000 // x = 1
74 | movs r2, #0 // angle = 0
75 | movs r5, #1 // shift count
76 | ldr r4, =#__sineAtanTable
77 | mov r12, r7
78 | // Let's do some CORDIC vector rotations!
79 | // r0 = y p31
80 | // r1 = x p31
81 | // r2 = angle p31
82 | // r3 = target sine p31
83 | // r4 = ptr to table of angles [atan(2^-i)]
84 | // r5 = iteration i (and shift count)
85 | // r12 = octant info
86 | RotLoop:
87 | movs r7, r0
88 | lsrs r7, r5 // y * 2^-i
89 | movs r6, r1
90 | lsrs r6, r5 // x * 2^-i
91 | cmp r3, r0
92 | bhs FlipSign
93 | // round 1
94 | adds r1, r7 // x += y * 2^-i
95 | subs r0, r6 // y -= x * 2^-i
96 | // round 2
97 | movs r7, r0
98 | lsrs r7, r5 // y * 2^-i
99 | movs r6, r1
100 | lsrs r6, r5 // x * 2^-i
101 | adds r1, r7 // x += y * 2^-i
102 | subs r0, r6 // y -= x * 2^-i
103 | // accumulate angle
104 | ldmia r4!, {r6} // next atan()
105 | subs r2, r6 // new angle
106 | b AdjustSine
107 |
108 | FlipSign:
109 | subs r1, r7 // x -= y * 2^-i
110 | adds r0, r6 // y += x * 2^-i
111 | // round 2
112 | movs r7, r0
113 | lsrs r7, r5 // y * 2^-i
114 | movs r6, r1
115 | lsrs r6, r5 // x * 2^-i
116 | subs r1, r7 // x -= y * 2^-i
117 | adds r0, r6 // y += x * 2^-i
118 | // accumulate angle
119 | ldmia r4!, {r6} // next atan()
120 | adds r2, r6 // new angle
121 | AdjustSine:
122 | movs r6, r3
123 | lsrs r6, r5
124 | lsrs r6, r5
125 | adds r3, r6 // sine += sine >> (2 * shift)
126 | adds r5, #1 // shift count
127 | cmp r5, #SINE_ATAN_TABLE_ENTRIES
128 | bls RotLoop
129 | movs r1, #EXP_BIAS32 + 1 // exponent
130 | NormLoop:
131 | subs r1, #1
132 | adds r2, r2
133 | bcc NormLoop // until we shift off MSB
134 | // combine
135 | lsls r1, #MANT_BITS32 // position exponent
136 | lsrs r0, r2, #EXP_BITS32 + 1 // position mantissa
137 | adcs r0, r1 // combine exponent and rounding bit
138 | mov r3, r12
139 | lsls r3, #31 // move LSB to sign
140 | orrs r0, r3
141 | pop {r4-r7, pc}
142 |
143 | .endfunc
144 |
--------------------------------------------------------------------------------
/src/float/atan2f.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * atan2f.s
4 | *
5 | * Created: 7/3/2023 2:42:46 PM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 | .include "trigf.inc"
17 |
18 |
19 | // 32-bit floating-point arc tangent
20 | //
21 | // Entry:
22 | // r0 = y-coordinate
23 | // r1 = x-coordinate
24 | // Exit:
25 | // r0 = arc tangent of y/x
26 | //
27 | // The calculation will use CORDIC rotation of the input vector to
28 | // accumulate the angle required to bring y = 0. This is done only
29 | // in the first octant, so argument reduction makes y <= x, x > 0,
30 | // y > 0.
31 | //
32 | // The calculations are semi-fixed point. y is kept normalized so
33 | // as not to lose precision, and shifted during each rotation to line
34 | // up with x. If x >> y, the result will be small and a different
35 | // table of angles [atan(2^-i)] with higher precision is used.
36 | //
37 | // Rotations are used until the angle being rotated by is so small that
38 | // tan(x) = x (for 24-bit precision), so atan(y/x) = y/x. Then we do
39 | // one last rotation by y/x.
40 | //
41 | // The "p" notation used throughout is the position of the binary point
42 | // (p16 means there are 16 bits to the right).
43 |
44 |
45 | .set RECIPROCAL_GUESS_LO, 0xBB // about 2.92 p6 - see fdiv.s
46 | .set RECIPROCAL_GUESS_HI, 0xB6 // about 5.69 p5 - see below
47 | .set ODD_OCTANT_FLAG, 4
48 | .set SHIFT_START, 1
49 |
50 | SET_FLOAT PI_OVER_TWO_FLOAT, 0, 0, PI_MANTISSA_FLOAT
51 | SET_FLOAT PI_FLOAT, 0, 1, PI_MANTISSA_FLOAT
52 |
53 |
54 | .func __atan2f
55 |
56 | SpecialExpY:
57 | // r0 = |y|
58 | // r1 = |x|
59 | // r2 = y biased exponent
60 | // r5 = 0x80000000 (sign bit position)
61 | // r6 = sign of y in bit 0
62 | // r7 = sign of x in bit 0
63 | //
64 | // y is special, x unknown. Check for NAN.
65 | lsls r5, r0, #EXP_BITS32 + 1
66 | bne SetSignY // return NAN with original sign
67 | // y is infinity
68 | lsrs r3, r1, #MANT_BITS32 // isolate x exponent
69 | cmp r3, #EXP_SPECIAL32
70 | bne Yaxis
71 | lsls r5, r1, #EXP_BITS32 + 1 // is x NAN?
72 | bne Xnan
73 | // both are infinity
74 | ReturnNan:
75 | MOV_IMM r0, NAN32
76 | pop {r4-r7, pc}
77 |
78 | ZeroExpY:
79 | // r0 = |y|
80 | // r1 = |x|
81 | // r2 = y biased exponent
82 | // r5 = 0x80000000 (sign bit position)
83 | // r6 = sign of y in bit 0
84 | // r7 = sign of x in bit 0
85 | //
86 | // y exponent is zero, x unknown.
87 |
88 | .ifndef NO_DENORMALS
89 | lsls r4, r0, #EXP_BITS32
90 | beq Yzero
91 | // y is denormal, so normalize it
92 |
93 | // __clz_denormal uses tailored calling convention
94 | // r4 = input to count leading zeros
95 | // r0 - r3, r7, r12 preserved
96 | // r5, r6 trashed
97 | mov r12, r6
98 | bl __clz_denormal // Get leading zeros in y
99 | mov r6, r12
100 | negs r2, r4 // y biased exponent
101 | adds r2, #1
102 | lsls r0, r4 // normalize y
103 | // restore r5
104 | movs r5, #1
105 | lsls r5, #31 // sign position
106 | b YNormalized
107 | .endif
108 |
109 | Yzero:
110 | // y is zero, x unknown.
111 | .ifndef NO_DENORMALS
112 | cmp r1, #0
113 | beq ReturnNan // both 0, return NAN
114 | lsrs r3, r1, #MANT_BITS32 // isolate x exponent
115 | .else
116 | lsrs r3, r1, #MANT_BITS32 // isolate x exponent
117 | beq ReturnNan // both 0, return NAN
118 | .endif
119 | cmp r3, #EXP_SPECIAL32
120 | bne Xaxis
121 | lsls r5, r1, #EXP_BITS32 + 1 // is x NAN?
122 | beq Xaxis
123 | Xnan:
124 | lsls r7, #31
125 | orrs r1, r7
126 | movs r0, r1
127 | Exit:
128 | pop {r4-r7, pc}
129 |
130 | SpecialExpX:
131 | // r0 = |y|
132 | // r1 = |x|
133 | // r2 = y biased exponent
134 | // r3 = x biased exponent
135 | // r5 = 0x80000000 (sign bit position)
136 | // r6 = sign of y in bit 0
137 | // r7 = sign of x in bit 0
138 | //
139 | // x is special and y is not zero or special. Check for infinity.
140 | lsls r2, r1, #EXP_BITS32 + 1
141 | bne Xnan
142 | Xaxis:
143 | // x is infinity or y is 0. Return 0 for x > 0, pi * sgn(y) for x < 0
144 | movs r0, r7 // get sign of x, set flags
145 | beq SetSignY
146 | ldr r0, =#PI_FLOAT
147 | SetSignY:
148 | lsls r6, #31
149 | orrs r0, r6
150 | pop {r4-r7, pc}
151 |
152 |
153 | ZeroExpX:
154 | // r0 = |y|
155 | // r1 = |x|
156 | // r2 = y biased exponent
157 | // r3 = x biased exponent
158 | // r5 = 0x80000000 (sign bit position)
159 | // r6 = sign of y in bit 0
160 | // r7 = sign of x in bit 0
161 | //
162 | // x is zero and y is not zero or special.
163 |
164 | .ifndef NO_DENORMALS
165 | lsls r4, r1, #EXP_BITS32
166 | beq Yaxis
167 | // x is denormal, so normalize it
168 |
169 | // __clz_denormal uses tailored calling convention
170 | // r4 = input to count leading zeros
171 | // r0 - r3, r7, r12 preserved
172 | // r5, r6 trashed
173 | mov r12, r6
174 | bl __clz_denormal // Get leading zeros in x
175 | mov r6, r12
176 | negs r3, r4 // x biased exponent
177 | adds r3, #1
178 | lsls r1, r4 // normalize x
179 | // restore r5
180 | movs r5, #1
181 | lsls r5, #31 // sign position
182 | b XNormalized
183 | .endif
184 |
185 | Yaxis:
186 | // x is zero or y is infinity. Return pi/2 signed as y.
187 | ldr r0, =#PI_OVER_TWO_FLOAT
188 | b SetSignY
189 |
190 |
191 | ENTRY_POINT __atanfM0, atanf
192 | MOV_IMM r1, ONE32
193 | //
194 | // Fall into __atan2f
195 | //
196 | ENTRY_POINT __atan2fM0, atan2f
197 | push {r4-r7, lr}
198 | lsrs r6, r0, #31 // save y input sign
199 | lsrs r7, r1, #31 // save x input sign
200 | movs r5, #1
201 | lsls r5, #31 // sign bit, implied bit when normalized
202 | bics r0, r5 // clear y sign
203 | bics r1, r5 // clear x sign
204 | lsrs r2, r0, #MANT_BITS32 // isolate y exponent
205 | beq ZeroExpY
206 | cmp r2, #EXP_SPECIAL32
207 | beq SpecialExpY
208 | YNormalized:
209 | lsrs r3, r1, #MANT_BITS32 // isolate x exponent
210 | beq ZeroExpX
211 | cmp r3, #EXP_SPECIAL32
212 | beq SpecialExpX
213 | XNormalized:
214 | subs r3, r2 // exponent difference
215 | .ifndef NO_DENORMALS
216 | blt SwapXY
217 | bgt YlessThanX
218 | .endif
219 | cmp r0, r1
220 | ble YlessThanX
221 | SwapXY:
222 | SWAP r0, r1
223 | adds r6, #ODD_OCTANT_FLAG
224 | negs r3, r3
225 | YlessThanX:
226 | lsls r7, #1 // x sign to bit 1
227 | orrs r7, r6 // combine signs
228 | mov r12, r7
229 | lsls r0, EXP_BITS32
230 | orrs r0, r5 // normalize y and set implied bit
231 | lsls r1, EXP_BITS32
232 | orrs r1, r5 // normalize x and set implied bit
233 | lsrs r0, #1 // y p30
234 | lsrs r1, #1 // x p30
235 | cmp r3, #-TAN_X_EQUALS_X_EXP
236 | bhi SmallAtan // no rotations needed
237 |
238 | // Let's do some CORDIC vector rotations!
239 | ldr r4, =#__fullAtanTable
240 | movs r6, #ATAN_TABLE_END_OFFSET
241 | adds r6, r4
242 | lsls r3, #1 // double the shift for y
243 | beq SkipFirstRotation
244 | cmp r3, #2*SMALL_ATAN_TABLE_START_I
245 | blt IndexTable
246 | adds r4, SMALL_ATAN_TABLE_OFFSET - SMALL_ATAN_TABLE_START_I * 4
247 | adds r6, SMALL_ATAN_TABLE_END_OFFSET
248 | IndexTable:
249 | // Skip over one entry in the table for each count of exp. diffference
250 | adds r4, r3
251 | adds r4, r3 // skip exp. dif table entries (4 bytes each)
252 | // Perform first rotation inline
253 | movs r7, r0
254 | lsrs r7, r3 // account for scaling
255 | subs r0, r1 // y -= x * 2^-0
256 | adds r1, r7 // x += y * 2^-0
257 | SkipFirstRotation:
258 | ldmia r4!, {r2} // initial atan()
259 | movs r5, #SHIFT_START
260 | mov lr, r6
261 | // r0 = y p30
262 | // r1 = x p30
263 | // r2 = z p32 (current angle)
264 | // r3 = scale factor - exp. dif * 2
265 | // r4 = ptr to table of angles [atan(2^-i)]
266 | // r5 = iteration i (and shift count)
267 | // r12 = octant info
268 | // lr = end of table
269 | RotLoop:
270 | movs r7, r0
271 | asrs r7, r3 // account for scaling
272 | asrs r7, r5 // y * 2^-i
273 | movs r6, r1
274 | lsrs r6, r5 // x * 2^-i
275 | adds r5, #1
276 | cmp r0, #0
277 | blt TooSmall
278 | adds r1, r7 // x += y * 2^-i
279 | subs r0, r6 // y -= x * 2^-i
280 | ldmia r4!, {r6} // next atan()
281 | adds r2, r6 // new angle
282 | cmp r4, lr
283 | bne RotLoop
284 | b LoopDone
285 |
286 | SmallAtan:
287 | movs r2, #0 // initialize angle to zero
288 | b ComputeYoverX
289 |
290 | TooSmall:
291 | subs r1, r7 // x -= y * 2^-i
292 | adds r0, r6 // y += x * 2^-i
293 | ldmia r4!, {r6} // next atan()
294 | subs r2, r6 // new angle
295 | cmp r4, lr
296 | bne RotLoop
297 | LoopDone:
298 | lsrs r3, #1 // restore exponent difference
299 |
300 | // We're close enough so atan(y/x) = y/x.
301 | //
302 | // Of course, division isn't fun. We'll use Newton-Raphson
303 | // iteration to calculate the reciprocal of x and multiply that
304 | // by y for y/x. You can find more details in fdiv.s.
305 | //
306 | // The maximum initial vector length occurs when
307 | // x = y = (2 - 1 ULP), for a length of sqrt(2^2 + 2^2) = 2.83.
308 | // Every rotation by atan(2^-i) increases the vector length by
309 | // 1/cos(2^-i), which is a factor of 1.164 if all rotations are
310 | // done. This gives a max final vector length of about 3.2935.
311 | // This means we have to calculate the reciprocal of x over a
312 | // longer range, or actually two ranges, [1, 2) and [2, 3.3).
313 | //
314 | // We'll use the method used in fdiv.s to make an initial guess
315 | // for the lower range: guess g = (2.92-x)/2. For the upper range,
316 | // we use use g = (K-x)/8, computing K as follows to miminize error:
317 | //
318 | // error e = 1 - x*g = 1 - x*(K-x)/8 = 1 - x*K/8 + x^2/8
319 | //
320 | // Derivative to find max: e' = x/4 - K/8 = 0 => x = K/2
321 | // So e(max) = e(K/2) = 1 - K^2/16 + K^2/32 = 1 - K^2/32
322 | //
323 | // Choose K so that -e(max) = e(L), where L = upper limit 3.293:
324 | //
325 | // K^2/32 - 1 = 1 - L*K/8 + L^2/8 => K^2/32 + L*K/8 - 2 - L^2/8 = 0
326 | //
327 | // One of whose solutions is 5.69214. Round to 8 bits gives 0xB6 p5,
328 | // which is 5.6875, and max error is 0.0144, more than 6 bits, vs.
329 | // 3.68 bits for the [1, 2) range.
330 | //
331 | // r0 = y p30
332 | // r1 = x p30
333 | // r2 = z p32 or p38
334 | // r3 = exponent difference
335 | // r12 = octant info
336 | ComputeYoverX:
337 |
338 | lsrs r5, r1, #13 // 1 <= x < 3.3, p17
339 | // See if x >= 2
340 | cmp r1, #0 // was MSB set?
341 | bge SmallX
342 | movs r4, #RECIPROCAL_GUESS_HI
343 | lsls r4, #12 // approx. 5.69 p17
344 | subs r4, r5 // 1/x < 1 guess, p20
345 | lsrs r4, #5 // guess p15
346 | b Reciprocal
347 |
348 | SmallX:
349 | movs r4, #RECIPROCAL_GUESS_LO
350 | lsls r4, #11 // approx. 2.92 p17
351 | subs r4, r5 // 1/x < 1 guess, p18
352 | lsrs r4, #3 // guess p15
353 | Reciprocal:
354 | // Start Newton-Raphson iterations
355 | // error e = guess*x - 1
356 | // next = guess - guess*e
357 | //
358 | // r5 = x p17
359 | // r4 = guess p15
360 | muls r5, r4 // e = guess * x - 1 p32
361 | asrs r5, #15 // e p17
362 | muls r5, r4 // guess * e p32
363 | asrs r5, #17 // guess * e p15
364 | subs r4, r5 // guess -= guess * e = new guess p15
365 |
366 | // round two, gets us to 15 bits
367 | lsrs r5, r1, #13 // x p17
368 | muls r5, r4 // e = x*guess - 1 p32
369 | asrs r5, #15 // e p17
370 | muls r5, r4 // guess*e p32
371 | asrs r5, #17 // guess*e p15
372 | subs r4, r5 // final approximation of 1/x p15
373 |
374 | // r0 = y p30
375 | // r1 = x p30
376 | // r2 = z p32 or p38
377 | // r3 = exponent difference
378 | // r4 = approximation of 1/x p15
379 | // r12 = octant info
380 | //
381 | // q0 = y*guess, rough quotient (14+ bits)
382 | // rem = y - q0*x, exact remainder from q0
383 | // q1 = guess*rem, quotient from remainder (approx rem/x)
384 | // quo = q0 + q1
385 |
386 | asrs r6, r0, #15 // y p15
387 | muls r6, r4 // y*guess = approx quotient q0 p30
388 | asrs r6, #15 // q0 p15
389 | lsrs r5, r1, #2 // x p28
390 | muls r5, r6 // x*q0 p43
391 | lsls r0, #13 // y p43
392 | subs r5, r0, r5 // y - x*q0 = rem p43
393 | asrs r5, #14 // rem p29
394 | muls r5, r4 // rem*guess = q1 p44
395 | asrs r5, #13 // q1 p31
396 | lsls r6, #16 // q0 p31
397 | adds r0, r6, r5 // q = q0 + q1 p31
398 |
399 | // r0 = y/x p31
400 | // r2 = z p32 or p38
401 | // r3 = exponent difference
402 | // r12 = octant info
403 | // if exponent difference > SMALL_ATAN_TABLE_START_I, z is p38
404 |
405 | movs r4, #EXP_BIAS32
406 | subs r3, #1
407 | bgt TestSmallTable
408 | negs r3, r3
409 | lsls r0, r3 // align y/x with z
410 | b Sum
411 |
412 | TestSmallTable:
413 | subs r4, r3
414 | cmp r3, #SMALL_ATAN_TABLE_START_I - 1
415 | blt Align
416 | subs r3, #SMALL_ATAN_TABLE_SHIFT
417 | Align:
418 | lsls r2, r3 // align y/x with z
419 | Sum:
420 | adds r2, r0 // z + y/x, final atan mantissa
421 |
422 | // r2 = z (angle) p32
423 | // r12 = octant info
424 | mov r7, r12 // get octant info
425 |
426 | // Octant info:
427 | // bit 0 = orignal sign of y => copy to result
428 | // bit 1 = orignal sign of x
429 | // bit 2 = |y| > |x|
430 | //
431 | // bits y>x sgn(x) action
432 | //--------------------------
433 | // 00x n + none
434 | // 01x n - subtract from pi
435 | // 10x y + subtract from pi/2
436 | // 11x y - add pi/2
437 | //
438 | cmp r7, #2
439 | blt NormLoop
440 | ldr r0, =#PI_MANTISSA // p30 for pi
441 | lsrs r3, r7, #2 // isolate octant bit 2
442 | lsrs r0, r3 // if 1xx, convert to pi/2
443 | movs r5, #EXP_BIAS32 + 2
444 | subs r5, r4
445 | movs r4, #EXP_BIAS32 + 2
446 | .ifndef NO_DENORMALS
447 | // exponent difference can exceed 8 bits
448 | cmp r5, #32 // shifting to zero?
449 | blt 1f
450 | movs r2, #0
451 | 1:
452 | .endif
453 | lsrs r2, r5 // z p30
454 | cmp r7, #6
455 | bge AddCorrection
456 | negs r2, r2 // subtract instead
457 | AddCorrection:
458 | adds r2, r0
459 |
460 | // Normalize the z result in r2.
461 | NormLoop:
462 | subs r4, #1
463 | adds r2, r2
464 | bcc NormLoop // until we shift off MSB
465 |
466 | // r2 = z
467 | // r4 = exponent of z
468 | // r7 = octant info
469 | PositionExp:
470 | lsls r0, r4, #MANT_BITS32 // position exponent
471 | beq RoundChk
472 | bmi TinyResult
473 | Pack:
474 | lsrs r2, #EXP_BITS32 + 1 // position mantissa
475 | adcs r0, r2 // combine exponent, inc. rounding bit
476 | SetSign:
477 | lsls r7, #31 // original sign of y
478 | orrs r0, r7 // sign of result is sign of y
479 | pop {r4-r7, pc}
480 |
481 | .ifdef NO_DENORMALS
482 | RoundChk:
483 | // see if if could round up to a normal number
484 | movs r4, 1 // exponent if round up works
485 | MOV_IMM r3, 0x200
486 | adds r2, r3
487 | beq PositionExp
488 | TinyResult:
489 | .else
490 | RoundChk:
491 | TinyResult:
492 | // result is tiny
493 | negs r4, r4
494 | cmp r4, #25 // shifting to zero?
495 | bhs ReturnZero
496 | lsrs r2, #1 // make room for mantissa MSB, no longer implied
497 | movs r1, #1
498 | lsls r1, #31
499 | orrs r2, r1 // set result MSB
500 | lsrs r2, r4 // denormalize
501 | movs r0, #0 // zero exponent
502 | b Pack
503 |
504 | .endif
505 | ReturnZero:
506 | movs r0, #0
507 | b SetSign
508 |
509 | .endfunc
510 |
--------------------------------------------------------------------------------
/src/float/atantablef.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * atantablef.s
4 | *
5 | * Created: 7/28/2023 12:40:29 PM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 | .include "trigf.inc"
17 |
18 |
19 | // Tables of arc tangents for sinf() and atan2f() functions. Both
20 | // functions switch to a table with higher precision when the
21 | // argument is small.
22 | //
23 | // The "p" notation used throughout is the position of the binary point
24 | // (p16 means there are 16 bits to the right).
25 |
26 | .global __fullAtanTable
27 | .global __sineAtanTable
28 |
29 | .align 2
30 |
31 | __fullAtanTable:
32 | .word 0
33 | __sineAtanTable:
34 | // tan(2^-i), i = 1 .. 13, p32
35 | .word 0x76B19C16
36 | .word 0x3EB6EBF2
37 | .word 0x1FD5BA9B
38 | .word 0xFFAADDC
39 | .word 0x7FF556F
40 | .word 0x3FFEAAB
41 | .word 0x1FFFD55
42 | .word 0xFFFFAB
43 | .word 0x7FFFF5
44 | .word 0x3FFFFF
45 | .word 0x200000
46 | .word 0x100000
47 | .word 0x80000
48 | AtanTableEnd:
49 |
50 | SmallAtanTable:
51 | // tan(2^-i), i = 7 .. 13, p38
52 | .word 0x7FFF5557
53 | SineSmallAtanTable:
54 | .word 0x3FFFEAAB
55 | .word 0x1FFFFD55
56 | .word 0xFFFFFAB
57 | .word 0x7FFFFF5
58 | .word 0x3FFFFFF
59 | .word 0x2000000
60 | SmallAtanTableEnd:
61 |
62 | // Verify constants in trigf.inc
63 |
64 | .if ATAN_TABLE_END_OFFSET != AtanTableEnd - __fullAtanTable
65 | .error "Error: ATAN_TABLE_END_OFFSET constant in trigf.inc does not match actual offset."
66 | .endif
67 |
68 | .if SMALL_ATAN_TABLE_OFFSET != SmallAtanTable - __fullAtanTable
69 | .error "Error: SMALL_ATAN_TABLE_OFFSET constant in trigf.inc does not match actual offset."
70 | .endif
71 |
72 | .if SMALL_ATAN_TABLE_END_OFFSET != SmallAtanTableEnd - AtanTableEnd
73 | .error "Error: SMALL_ATAN_TABLE_END_OFFSET constant in trigf.inc does not match actual offset."
74 | .endif
75 |
76 | .if SMALL_SINE_ATAN_TABLE_OFFSET != SineSmallAtanTable - __sineAtanTable
77 | .error "Error: SMALL_SINE_ATAN_TABLE_OFFSET constant in trigf.inc does not match actual offset."
78 | .endif
79 |
80 | .if SINE_ATAN_TABLE_ENTRIES != (AtanTableEnd - __sineAtanTable) / 4
81 | .error "Error: SINE_ATAN_TABLE_ENTRIES constant in trigf.inc does not match actual count."
82 | .endif
83 |
84 | .if SMALL_SINE_ATAN_TABLE_ENTRIES != (SmallAtanTableEnd - SineSmallAtanTable) / 4
85 | .error "Error: SMALL_SINE_ATAN_TABLE_ENTRIES constant in trigf.inc does not match actual count."
86 | .endif
87 |
--------------------------------------------------------------------------------
/src/float/cosf.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * cosf.s
4 | *
5 | * Created: 6/22/2023 9:52:01 AM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 32-bit floating-point cosine
19 | //
20 | // Entry:
21 | // r0 = input angle in radians
22 | // Exit:
23 | // r0 = cosine
24 | //
25 | // This simply calls __sinf, which return sinf() in r0 and cosf() in r1.
26 |
27 | FUNC_START __cosfM0, cosf
28 | push {lr}
29 | bl __sinfM0
30 | movs r0, r1
31 | pop {pc}
32 |
33 | .endfunc
34 |
--------------------------------------------------------------------------------
/src/float/faddsub.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * faddsub.s
4 | *
5 | * Created: 8/6/2021 12:33:35 PM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 32-bit IEEE floating-point add & subtract
19 | //
20 | // Entry:
21 | // r0 = op1
22 | // r1 = op2
23 | // Exit:
24 | // r0 = op1 + op2
25 | // or
26 | // r0 = op1 - op2
27 |
28 | FUNC_START __fsub, __aeabi_fsub
29 | movs r2, #1
30 | lsls r2, #31
31 | eors r1, r2 // flip sign for subtract
32 | //
33 | // fall into fadd
34 | //
35 | ENTRY_POINT __fadd, __aeabi_fadd
36 |
37 | .ifdef NO_DENORMALS
38 |
39 | // Handle easy cases before saving registers
40 | //
41 | // If one of the operands is zero, just return the
42 | // other -- including if it's NAN or infinity.
43 | lsls r3, r1, #1 // clear op2 sign
44 | lsrs r3, #MANT_BITS32 + 1 // op2 exponent
45 | beq ReturnOp1
46 | lsls r2, r0, #1 // clear op1 sign
47 | lsrs r2, #MANT_BITS32 + 1 // op1 exponent
48 | beq ReturnOp2
49 |
50 | cmp r3, #EXP_SPECIAL32
51 | beq Op2SpclExp
52 | // If op1 is special (and op2 not), just return op1
53 | cmp r2, #EXP_SPECIAL32
54 | beq ReturnOp1
55 |
56 | push {r4, r5, r7}
57 |
58 | movs r5, #1
59 | lsls r5, #31 // sign position
60 | lsrs r7, r0, #31 // grab sign of op1
61 | movs r4, r0
62 | eors r4, r1 // see if signs the same
63 | ands r4, r5 // isolate sign bit
64 | orrs r7, r4 // combine sign info
65 |
66 | .else
67 |
68 | push {r4, r5, r7, lr}
69 |
70 | movs r5, #1
71 | lsls r5, #31 // sign position
72 | lsrs r7, r0, #31 // grab sign of op1
73 | movs r4, r0
74 | eors r4, r1 // see if signs the same
75 | ands r4, r5 // isolate sign bit
76 | orrs r7, r4 // combine sign info
77 |
78 | lsls r2, r0, #1 // clear op1 sign
79 | lsls r3, r1, #1 // clear op2 sign
80 | lsrs r2, #MANT_BITS32 + 1 // op1 exponent
81 | beq Op1ZeroExp
82 | Op1Normalized:
83 | lsrs r3, #MANT_BITS32 + 1 // op2 exponent
84 | beq Op2ZeroExp
85 | Op2Normalized:
86 |
87 | cmp r3, #EXP_SPECIAL32
88 | beq Op2SpclExp
89 | // If op1 is special (and op2 not), just return op1
90 | cmp r2, #EXP_SPECIAL32
91 | beq ReturnOp1
92 |
93 | .endif
94 |
95 | // r0 = op1
96 | // r1 = op2
97 | // r2 = op1 exponent
98 | // r3 = op2 exponent
99 | // r5 = 0x80000000 (sign bit position)
100 | // r7 = bit 0 = sign of op1, bit 31 = sign xor
101 |
102 | // Clear exponent, set implied bit
103 | lsls r0, #EXP_BITS32
104 | lsls r1, #EXP_BITS32
105 | orrs r0, r5
106 | orrs r1, r5
107 | // align so bit 30 is MSB
108 | lsrs r0, #1
109 | lsrs r1, #1
110 |
111 | subs r5, r2, r3 // op1 exp - op2 exp
112 | negs r4, r5 // op2 exp - op1 exp
113 | bmi Op1Larger
114 | // op2 is larger
115 | movs r2, r3 // save exponent
116 | cmp r4, #MANT_BITS32 + 3 // include implied, round & sticky bits
117 | bhi UseOp2
118 | adds r5, #32 // no. of bits to shift off for sticky bits
119 | movs r3, r0
120 | lsls r3, r5 // sticky bits
121 | lsrs r0, r4 // align op1
122 | CheckSign:
123 | orrs r7, r7 // check sign flags
124 | bmi SubOp1 // signs were different, subtract
125 | AddOps:
126 | // signs are the same, add operands
127 | // r0 = op1 mantissa, MSB at bit 30
128 | // r1 = op2 mantissa, aligned to match r0 exponent
129 | // r2 = result exponent
130 | // r3 = sticky bits
131 | // r7 = bit 0 is sign of result
132 | adds r0, r1 // sum it
133 | // Operand MSB was at bit 30, leaving room for it to carry into
134 | // bit 31. A one-bit normalization will be needed if it didn't.
135 | bmi Round
136 | Norm1bit:
137 | subs r2, #1 // adjust exponent
138 | lsls r0, #1 // normalize
139 | Round:
140 | // r0 = result, left justified
141 | // r2 = result exponent - 1
142 | // r3 = sticky bits
143 | // r7 = bit 0 is sign of result
144 | cmp r2, #EXP_SPECIAL32 - 1
145 | bhs BigExp
146 | lsls r4, r0, #25 // look at everything below rounding bit
147 | Align:
148 | lsrs r0, #8 // normal alignment, rounding bit to CY
149 | bcc Aligned // if CY not set, no rounding needed
150 | orrs r3, r4 // any sticky bits?
151 | bne RoundUp
152 | lsls r3, r0, #31 // check LSB for even
153 | bpl Aligned // if even, leave it
154 | RoundUp:
155 | adds r0, #1 // add to rounding bit
156 | Aligned:
157 | lsls r2, #MANT_BITS32
158 | adds r0, r2
159 | SetSign:
160 | lsls r7, #31
161 | adds r0, r7
162 | Exit:
163 | .ifdef NO_DENORMALS
164 | pop {r4, r5, r7}
165 | ReturnOp1:
166 | bx lr
167 |
168 | ReturnOp2:
169 | movs r0, r1
170 | bx lr
171 | .else
172 | pop {r4, r5, r7, pc}
173 |
174 | ReturnOp2:
175 | movs r0, r1
176 | ReturnOp1:
177 | pop {r4, r5, r7, pc}
178 |
179 | .endif
180 |
181 | Op1Larger:
182 | // op1 has a larger exponent, so it's bigger for sure
183 | cmp r5, #MANT_BITS32 + 3 // implied, round & sticky bits
184 | bhi Norm1bit // op1 is result
185 | adds r4, #32 // no. of bits to be shifted off
186 | movs r3, r1
187 | lsls r3, r4 // sticky bits
188 | lsrs r1, r5 // align op2
189 | orrs r7, r7 // check sign flags
190 | bpl AddOps
191 | // op1 - op2
192 | negs r3, r3 // 0 - sticky bits
193 | sbcs r0, r1
194 | b Normalize
195 |
196 | UseOp2:
197 | movs r0, #0
198 | b CheckSign
199 |
200 | SubOp1:
201 | // op2 - op1
202 | // However, it could be op2 <= op1 with same exponent
203 | adds r7, #1 // flip sign bit in LSB
204 | negs r3, r3 // 0 - sticky bits
205 | sbcs r1, r0
206 | movs r0, r1
207 | beq Exit // Return zero result
208 | bpl Normalize
209 | // Subtracted wrong way. Can't be any sticky bits
210 | adds r7, #1 // flip sign bit in LSB
211 | negs r0, r0
212 | Normalize:
213 | // Check for big chunks of leading zeros
214 | .set NORM1, 12
215 | lsrs r1, r0, #31 - NORM1
216 | bne 1f
217 | subs r2, #NORM1 // adjust exponent
218 | lsls r0, #NORM1 // normalize a bunch
219 | 1:
220 | .set NORM2, 6
221 | lsrs r1, r0, #31 - NORM2
222 | bne 2f
223 | subs r2, #NORM2 // adjust exponent
224 | lsls r0, #NORM2 // normalize a bunch
225 | 2:
226 | // Finish off bit-by-bit
227 | NormLoop:
228 | subs r2, #1 // adjust exponent
229 | lsls r0, #1 // normalize one bit
230 | bpl NormLoop
231 | b Round
232 |
233 | Op2SpclExp:
234 | // r0 = op1
235 | // r1 = op2
236 | // r2 = op1 exponent
237 | // r3 = op2 exponent
238 | //
239 | // op2 mantissa == 0?
240 | lsls r3, r1, #(EXP_BITS32 + 1)
241 | bne ReturnOp2 // op2 is NAN, return it
242 | // op2 is Infinity
243 | // if (expOp1 == EXP_SPECIAL)
244 | cmp r2, #EXP_SPECIAL32
245 | bne ReturnOp2 // op1 not special, return op2
246 | // op1 mantissa == 0?
247 | lsls r3, r0, #(EXP_BITS32 + 1)
248 | bne ReturnOp1 // op1 is NAN, return it
249 | // Both op1 & op2 are infinity. If signs differ, return NAN
250 | eors r1, r0
251 | bpl ReturnOp1 // signs the same, return infinity
252 | // return NAN
253 | ldr r0, =#NAN32
254 | b ReturnOp1
255 |
256 | BigExp:
257 | // r0 = result, left justified
258 | // r2 = result exponent - 1
259 | // r3 = sticky bits
260 | // r7 = bit 0 is sign of result
261 | bge RetInfinity
262 | .ifdef NO_DENORMALS
263 | // return zero of correct sign
264 | lsls r0, r7, #31 // zero with sign
265 | b Exit
266 | .else
267 | // r0 = result mantissa left justified
268 | // r2 = result exponent - 1
269 | // r3 = sticky bits
270 | // r7 = bit 0 is sign of result
271 | bl __fdenormal_result
272 | b SetSign
273 | .endif
274 |
275 | RetInfinity:
276 | // Build infinity
277 | movs r0, #EXP_SPECIAL32
278 | lsls r0, #MANT_BITS32
279 | b SetSign
280 |
281 | .ifndef NO_DENORMALS
282 |
283 | Op2ZeroExp:
284 | // r0 = op1
285 | // r1 = op2
286 | // r2 = op1 exponent
287 | // r3 = op2 exponent
288 | // r5 = 0x80000000 (sign bit position)
289 | // r7 = sign info
290 | lsls r4, r1, #1 // clear existing sign
291 | beq ReturnOp1
292 | // op2 is denormal, so normalize it
293 |
294 | // __clz_denormal uses tailored calling convention
295 | // r4 = input to count leading zeros
296 | // r0 - r3, r7 preserved
297 | // r5, r6 trashed
298 | bl __clz_denormal // Get leading zeros in op2
299 | subs r4, #EXP_BITS32
300 | negs r3, r4 // op2 exponent
301 | adds r4, #1
302 | lsls r1, r4
303 | // restore r5
304 | movs r5, #1
305 | lsls r5, #31 // sign position
306 | b Op2Normalized
307 |
308 | Op1ZeroExp:
309 | // r0 = op1
310 | // r1 = op2
311 | // r2 = op1 exponent
312 | // r3 = op2 exponent
313 | // r5 = 0x80000000 (sign bit position)
314 | // r7 = sign info
315 | lsls r4, r0, #1 // clear existing sign
316 | beq ReturnOp2
317 | // op1 is denormal, check op2 for zero
318 | lsls r6, r1, #1 // scrape off sign
319 | beq ReturnOp1
320 |
321 | // __clz_denormal uses tailored calling convention
322 | // r4 = input to count leading zeros
323 | // r0 - r3, r7 preserved
324 | // r5, r6 trashed
325 | bl __clz_denormal // Get leading zeros in op1
326 | subs r4, #EXP_BITS32
327 | negs r2, r4 // op1 exponent
328 | adds r4, #1
329 | lsls r0, r4
330 | // restore r5
331 | movs r5, #1
332 | lsls r5, #31 // sign position
333 | b Op1Normalized
334 |
335 | .endif
336 |
337 | .endfunc
338 |
--------------------------------------------------------------------------------
/src/float/fdenormal_result.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * fdenormResult.s
4 | *
5 | * Created: 8/8/2021 2:48:57 PM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 | .global __fdenormal_result
18 |
19 | //*********************************************************************
20 | // Denormalize a tiny result
21 | //
22 | // Entry:
23 | // r0 = result, left justified (with trailing sticky bits)
24 | // r2 = biased result exponent - 1 (negative)
25 | // r3 = sticky bits
26 | // Exit:
27 | // r0 = final result w/exponent, right justified
28 | // r1 destroyed
29 | //*********************************************************************
30 |
31 | .func __fdenormal_result
32 |
33 | .thumb_func
34 | __fdenormal_result:
35 | // There are 7 sticky bits in r0 below the rounding bit.
36 | // Keep these along with the bits we shift out denormalizing.
37 | adds r2, #32 - 7
38 | movs r1, r0
39 | lsls r1, r2
40 | // We'll shift right all the way to normal alignment,
41 | // shifting the rounding bit into the CY
42 | subs r2, #32 + 1
43 | negs r2, r2
44 | lsrs r0, r2
45 | bcc Exit // CY not set means no rounding bit
46 | // Round, checking for round-even if exactly halfway
47 | orrs r3, r1 // test sticky bits
48 | bne RoundUp // sticky set, round up
49 | lsls r1, r0, #31 // test LSB for round even
50 | bpl Exit
51 | RoundUp:
52 | adds r0, #1 // round up
53 | // If this round up caused a carry into the bottom of the
54 | // exponent (leaving the mantissa zero), then we're all
55 | // set up with the smallest normalized number.
56 | Exit:
57 | bx lr
58 |
59 | .endfunc
60 |
--------------------------------------------------------------------------------
/src/float/fdiv.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * fdiv.s
4 | *
5 | * Created: 8/20/2021 1:20:56 PM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 32-bit IEEE floating-point divide
19 | //
20 | // Entry:
21 | // r0 = num
22 | // r1 = den
23 | // Exit:
24 | // r0 = num / den
25 | //
26 | // The calculation will use Newton-Raphson iteration on reciprocal.
27 | //
28 | // The "p" notation used throughout is the position of the binary point
29 | // (p16 means there are 16 bits to the right).
30 | //
31 | // The initial guess will be calculated by subtracting the upper mantissa
32 | // bits from the constant 2.92, or 0xBB. The value was determined as follows:
33 | //
34 | // Define d as denominator and x as the guess for 1/d. We want to find K
35 | // so that x = (K-d)/2 has minimal error over d in [1, 2), error defined as
36 | //
37 | // e = |1 - d*x| = |1 - d*(K-d)/2| = |1 - K*d/2 + d^2/2|.
38 | //
39 | // A max is found by setting the derivative to zero:
40 | //
41 | // e' = -K/2 + d = 0, thus d = K/2 at max error. The error at this point is:
42 | // e(max) = e(K/2) = 1 - K^2/4 + K^2/8 = 1 - K^2/8
43 | //
44 | // The endpoints can also provide a max error:
45 | //
46 | // e(1) = 1 - K/2 + 1/2 = 1.5 - K/2 = (3 - K)/2
47 | // e(2) = 1 - K + 4/2 = 3 - K
48 | //
49 | // We'll use e(2) since e(1) has half the error. So we find where the two
50 | // errors are equal and opposite, e(max) = -e(2):
51 | //
52 | // 1 - K^2/8 = K - 3 => K^2/8 + K - 4 = 0
53 | //
54 | // One of whose solutions is 2.9282, which is 0x2ED9F p16.
55 | //
56 | // The max error with this constant is:
57 | //
58 | // e = 3 - K = 0.0718; or an accuracy of -log2(e) = 3.80 bits.
59 | //
60 | // Using only 8 bits to produce the constant K gives 0xBB p6. Converted
61 | // back to decimal that is 2.91875, which increases max error to 0.0781,
62 | // or an accuracy of 3.68 bits.
63 | //
64 | // Note that the guess could be improved by restricting it to >= 0.5
65 | // and calculating a different corresponding K. However, there is
66 | // sufficient accuracy without the added steps.
67 |
68 |
69 | .set GuessBase, 0xBB
70 |
71 |
72 | FUNC_START __fdiv, __aeabi_fdiv
73 | .ifdef NO_DENORMALS
74 | push {r4, r5}
75 | .else
76 | push {r4-r6, lr}
77 | .endif
78 | // compute final sign
79 | movs r5, #1
80 | lsls r5, #31 // sign position
81 | movs r3, r1
82 | eors r3, r0
83 | ands r3, r5 // final sign
84 | mov r12, r3
85 |
86 | // clear signs
87 | bics r0, r5
88 | bics r1, r5
89 |
90 | lsrs r2, r0, #MANT_BITS32 // num exponent
91 | beq NumZeroExp
92 | NumNormalized:
93 | lsrs r3, r1, #MANT_BITS32 // den exponent
94 | beq DenZeroExp
95 | DenNormalized:
96 |
97 | // r0 = num
98 | // r1 = den
99 | // r2 = num exponent
100 | // r3 = den exponent
101 | // r5 = 0x80000000 (sign bit position)
102 | // r12 = final sign
103 |
104 | cmp r3, #EXP_SPECIAL32
105 | beq DenSpclExp
106 | cmp r2, #EXP_SPECIAL32
107 | beq SetSign // return num if special
108 |
109 | subs r2, r3 // compute exponent, unbiased
110 | adds r2, #EXP_BIAS32 - 2 // r2 = biasd exponent - 1
111 |
112 | // Clear exponent, set implied bit
113 | lsls r0, #EXP_BITS32
114 | lsls r1, #EXP_BITS32
115 | orrs r0, r5
116 | orrs r1, r5
117 |
118 | // Compute guess for 1/den = (K - den)/2. K is nearly 3.
119 | // den in [1, 2). The "p" notation is the position of the
120 | // binary point (p16 means there are 16 bits to the right).
121 | lsrs r3, r1, #15 // den p16
122 | movs r4, #GuessBase
123 | lsls r4, #25 - 15 // MSB one bit left of den
124 | subs r4, r3 // x p17, < 1
125 | lsrs r4, #1 // x p16
126 |
127 | // Use Newton-Raphson iteration for refining the guess for 1/den.
128 | // Using this method, the error is squared (number of bits doubled)
129 | // on each iteration, and will require two iterations to get 15 bits.
130 | // (There is another method that converges faster [cube error/triple
131 | // the bits], but it doesn't help because 1 iteration wouldn't be
132 | // enough and it's more work.) One iteration (d = den, x = 1/den guess):
133 | //
134 | // next = x - x*(d*x - 1)
135 | //
136 | // d*x is very close to 1. We calculate it p32 so the leading 1,
137 | // if present, just drops off. If it is less than 1, we treat the
138 | // result as a signed (now negative) number, also effectively
139 | // subtracting 1.
140 |
141 | // r0 = num p31
142 | // r1 = den p31
143 | // r2 = exponent - 1
144 | // r3 = den p16
145 | // r4 = x p16
146 | muls r3, r4 // d*x - 1 p32, call it e (error)
147 | asrs r3, #16 // e p16
148 | muls r3, r4 // x*e p32
149 | asrs r3, #16 // x*e p16
150 | subs r4, r3 // x - x*e p16
151 |
152 | // round two, gets us to 15 bits
153 | lsrs r3, r1, #15 // den p16
154 | muls r3, r4 // d*x - 1 p32, e
155 | asrs r3, #16 // e p16
156 | muls r3, r4 // x*e p32
157 | asrs r3, #16 // x*e p16
158 | subs r4, r3 // x - x*e p16
159 |
160 | // compute quotient
161 | // r0 = num p31
162 | // r1 = den p31
163 | // r2 = exponent - 1
164 | // r4 = x p16 (reciprocal estimate)
165 | //
166 | // q0 = x*num, rough quotient (14+ bits)
167 | // rem = num - q0*den, exact remainder from q0
168 | // q1 = x*rem, quotient from remainder (approx rem/den)
169 | // quo = q0 + q1
170 | lsrs r3, r0, #16 // num p15
171 | muls r3, r4 // num*x = approx quotient q0 p31
172 | lsrs r3, #16 // q0 p15
173 | lsrs r5, r1, #8 // den p23
174 | muls r5, r3 // den*q0 p38
175 | .ifndef NO_DENORMALS
176 | movs r6, r0 // save num p31 for denormal case
177 | .endif
178 | lsls r0, #7 // num p38
179 | subs r5, r0, r5 // num - den*q0 = rem p38
180 | asrs r5, #10 // rem p28
181 | muls r5, r4 // rem*x = q1 p44
182 | asrs r5, #14 // q1 p30
183 | lsls r3, #15 // q0 p30
184 | adds r3, r5 // q = q0 + q1 p30
185 |
186 | // Result quotient is very accurate, but rounding is tricky because
187 | // the error, no matter how small, can straddle a rounding boundary.
188 | // First check to see if it does by looking at the rounding bit and
189 | // the guard bit below it:
190 | //
191 | // 00 - never round up
192 | // 01 - maybe round up
193 | // 10 - maybe round up
194 | // 11 - always round up
195 | //
196 | // This is tested by adding 1 to the guard bit. This will leave the
197 | // rounding and guard bits:
198 | //
199 | // 01 - never round up
200 | // 10 - maybe round up
201 | // 11 - maybe round up
202 | // 00 - already rounded up
203 | //
204 | // So if the round bit ends up 1, we need to calculate the final
205 | // remainder for rounding.
206 | //
207 | // The positions of these bits depends on whether the quotient
208 | // came out normalized.
209 |
210 | // r0 = num p38
211 | // r1 = den p31
212 | // r2 = exponent - 1
213 | // r3 = quo p30
214 | // r4 = x p16
215 | // r6 = num p31 if denormal build
216 | lsls r5, r3, #2 // Normalized?
217 | bcs Normalized
218 | cmp r2, #EXP_SPECIAL32 - 1
219 | bhs BigExpNorm // catches exp < 0 too
220 | // Set up to compute remainder for rounding
221 | adds r3, #0x10 // add to guard bit
222 | lsrs r5, r3, #6 // quo p23 normalized
223 | bcc Aligned // Not near rounding boundary
224 | lsrs r3, #5 // quo p25
225 | lsls r0, #10 // num p48
226 | b Remainder
227 |
228 | Normalized:
229 | adds r2, #1 // bump exponent
230 | cmp r2, #EXP_SPECIAL32 - 1
231 | bhs BigExp // catches exp < 0 too
232 | // Set up to compute remainder for rounding
233 | adds r3, #0x20 // add to guard bit
234 | lsrs r5, r3, #7 // quo p23
235 | bcc Aligned // Not near rounding boundary
236 | lsrs r3, #6 // quo p24
237 | lsls r0, #9 // num p47
238 | Remainder:
239 | // rem = num - quo*den
240 | // If rem >= den / 2, then round up.
241 | // Including the rounding bit in quo, which is 1, we're computing
242 | // num - (quo + 0.5)*den = rem - den / 2, so a non-negative result
243 | // means round up.
244 | lsrs r1, #8 // den p23
245 | muls r1, r3 // den*quo p47/48
246 | subs r0, r1 // remainder p47/48
247 | bmi Aligned
248 | RoundUp:
249 | // If the mantissa is all ones, this will round up into the exponent
250 | // field, incrementing it correctly. If that in turn becomes the max
251 | // exponent, it will be correctly formatted as infinity.
252 | adds r5, #1 // round up
253 | Aligned:
254 | lsls r2, #MANT_BITS32
255 | AddExp:
256 | adds r0, r2, r5
257 | SetSign:
258 | add r0, r12 // combine sign bit
259 | .ifdef NO_DENORMALS
260 | pop {r4, r5}
261 | bx lr
262 | .else
263 | pop {r4-r6, pc}
264 | .endif
265 |
266 | DenSpclExp:
267 | // r0 = num
268 | // r1 = den
269 | // r2 = num exponent
270 | // r3 = den exponent
271 | // r5 = 0x80000000 (sign bit position)
272 | // r12 = final sign
273 | //
274 | // mantissa == 0?
275 | lsls r4, r1, #(EXP_BITS32 + 1)
276 | bne ReturnDen // den is NAN, return it
277 | // Den is Infinity
278 | // if (expNum == EXP_SPECIAL)
279 | cmp r2, #EXP_SPECIAL32
280 | bne ZeroResult // zero if den is infinity, num normal
281 | ReturnNan:
282 | ldr r0, =#NAN32 // num is infinity or NAN
283 | b SetSign
284 |
285 | ReturnDen:
286 | movs r0, r1
287 | b SetSign
288 |
289 | .ifdef NO_DENORMALS
290 |
291 | BigExpNorm:
292 | // r0 = num p38
293 | // r1 = den p31
294 | // r2 = exponent
295 | // r3 = quo p29
296 | // r4 = x p16
297 | bge RetInfinity
298 | lsls r3, #1 // quo p30
299 | b RoundChk
300 |
301 | BigExp:
302 | // r0 = num p38
303 | // r1 = den p31
304 | // r2 = exponent
305 | // r3 = quo p30
306 | // r4 = x p16
307 | bge RetInfinity
308 | RoundChk:
309 | // See if it could round up
310 | adds r2, #1 // was exponent -1?
311 | bne ZeroResult
312 | // Try rounding it up
313 | adds r3, #0x80 // treat LSB as rounding bit
314 | bpl ZeroResult
315 | lsrs r0, r3, #8 // quo p23
316 | b SetSign
317 |
318 | NumZeroExp:
319 | lsrs r3, r1, #MANT_BITS32 // den exponent
320 | beq ReturnNan // 0/0, return NAN
321 | cmp r3, #EXP_SPECIAL32
322 | bne ZeroResult
323 | lsls r4, r1, #(EXP_BITS32 + 1) // is den NAN?
324 | bne ReturnDen // yes, return the NAN
325 | ZeroResult:
326 | movs r0, #0
327 | b SetSign
328 |
329 | DenZeroExp:
330 | cmp r2, #EXP_SPECIAL32 // check num exponent
331 | beq SetSign // Return whatever num is, Infinity or NAN
332 | RetInfinity:
333 | // Build infinity
334 | movs r0, #EXP_SPECIAL32
335 | lsls r0, #MANT_BITS32
336 | b SetSign
337 |
338 | .else // NO_DENORMALS
339 |
340 | BigExp:
341 | // r1 = den p31
342 | // r2 = exponent
343 | // r3 = quo p30
344 | // r4 = x p16
345 | // r6 = num p31
346 | blt DenormRound
347 | b RetInfinity
348 |
349 | BigExpNorm:
350 | // r1 = den p31
351 | // r2 = exponent
352 | // r3 = quo p29
353 | // r4 = x p16
354 | // r6 = num p31
355 | bge RetInfinity
356 | lsls r3, #1 // quo p30
357 | lsls r6, #1 // num p32
358 | DenormRound:
359 | // Set up to compute remainder for rounding
360 | // r1 = den p31
361 | // r2 = exponent
362 | // r3 = quo p30
363 | // r4 = x p16
364 | // r6 = num p31
365 | negs r0, r2
366 | lsrs r3, r0
367 | adds r3, #0x20 // add to guard bit
368 | lsrs r5, r3, #7 // shift off rounding bit
369 | bcc DenormNoRound
370 | // Calculate the remainder, which requires adjustments to
371 | // the binary point of the num and/or den to retain precision.
372 | lsrs r3, #6 // clear off below rounding bit
373 | // In the mainline remainder calculation, we shift num
374 | // left to p47. Here we reduce that by the amount of
375 | // denormalization, but not below zero.
376 | adds r2, #16
377 | bmi AdjDen
378 | lsls r6, r2 // adjust num binary point
379 | lsrs r1, #8 // den p23
380 | b CalcRem
381 | AdjDen:
382 | // So we're leaving num at p31. In the mainline calc, we
383 | // shift den right 8 bits to p23 (keeping all significant
384 | // bits). But we've shifted so much off the quotient we
385 | // need to reduce that to leave quo * den at p48.
386 | adds r2, #8
387 | bmi DenormNoRound // underflow, leave as zero
388 | lsrs r1, r2 // adjust den binary point
389 | CalcRem:
390 | muls r1, r3 // den*quo
391 | movs r2, #0 // exponent will be zero
392 | subs r6, r1 // remainder
393 | bmi AddExp
394 | bne RoundUp
395 | // Remainder was zero, round even
396 | lsrs r1, r5, #1 // move LSB into CY
397 | adcs r5, r2 // r2 == 0, so add CY
398 | b AddExp
399 |
400 | DenormNoRound:
401 | movs r0, r5
402 | b SetSign
403 |
404 | DenZeroExp:
405 | // r0 = num, not zero
406 | // r1 = den, sign cleared
407 | // r2 = num exponent
408 | // r3 = den exponent
409 | // r5 = 0x80000000 (sign bit position)
410 | // r12 = final sign
411 | lsls r4, r1, #1
412 | beq DenIsZero
413 | // den is denormal, so normalize it
414 |
415 | // __clz_denormal uses tailored calling convention
416 | // r4 = input to count leading zeros
417 | // r0 - r3, r7 preserved
418 | // r5, r6 trashed
419 | bl __clz_denormal // Get leading zeros in den
420 | subs r4, #EXP_BITS32
421 | negs r3, r4 // den exponent
422 | adds r4, #1
423 | lsls r1, r4
424 | // restore r5
425 | movs r5, #1
426 | lsls r5, #31 // sign position
427 | b DenNormalized
428 |
429 | NumZeroExp:
430 | // r0 = num
431 | // r1 = den
432 | // r2 = num exponent
433 | // r5 = 0x80000000 (sign bit position)
434 | // r12 = final sign
435 | lsls r4, r0, #1
436 | beq NumIsZero
437 | // num is denormal, so normalize it
438 |
439 | // __clz_denormal uses tailored calling convention
440 | // r4 = input to count leading zeros
441 | // r0 - r3, r7 preserved
442 | // r5, r6 trashed
443 | bl __clz_denormal // Get leading zeros in num
444 | subs r4, #EXP_BITS32
445 | negs r2, r4 // num exponent
446 | adds r4, #1
447 | lsls r0, r4
448 | // restore r5
449 | movs r5, #1
450 | lsls r5, #31 // sign position
451 | b NumNormalized
452 |
453 | DenIsZero:
454 | cmp r2, #EXP_SPECIAL32 // check num exponent
455 | beq SetSign // Return whatever num is, Infinity or NAN
456 | RetInfinity:
457 | // Build infinity
458 | movs r0, #EXP_SPECIAL32
459 | lsls r0, #MANT_BITS32
460 | b SetSign
461 |
462 | NumIsZero:
463 | lsrs r3, r1, #MANT_BITS32 // den exponent
464 | bne DenNotZero
465 | cmp r1, #0
466 | beq ReturnNan // 0/0, return NAN
467 | DenNotZero:
468 | cmp r3, #EXP_SPECIAL32
469 | bne ZeroResult
470 | // is den NAN?
471 | lsls r4, r1, #(EXP_BITS32 + 1)
472 | bne ReturnDen
473 | ZeroResult:
474 | movs r0, #0
475 | b SetSign
476 |
477 | .endif // else NO_DENORMALS
478 |
479 | .endfunc
480 |
--------------------------------------------------------------------------------
/src/float/fmul.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * fmul.s
4 | *
5 | * Created: 6/23/2021 5:37:15 PM
6 | * Author: Tim Paterson
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 32-bit IEEE floating-point multiply
19 | //
20 | // Entry:
21 | // r0 = op1
22 | // r1 = op2
23 | // Exit:
24 | // r0 = op1 * op2
25 |
26 | FUNC_START __fmul, __aeabi_fmul
27 | SAVE_REG r4-r6
28 | // compute final sign
29 | movs r5, #1
30 | lsls r5, #31 // sign position
31 | movs r3, r1
32 | eors r3, r0
33 | ands r3, r5 // final sign
34 | mov r12, r3
35 |
36 | lsls r3, r1, #1 // clear op2 sign
37 | lsls r2, r0, #1 // clear op1 sign
38 | lsrs r2, #MANT_BITS32 + 1 // op1 exponent
39 | beq Op1ZeroExp
40 | Op1Normalized:
41 | lsrs r3, #MANT_BITS32 + 1 // op2 exponent
42 | beq Op2ZeroExp
43 | Op2Normalized:
44 |
45 | // r0 = op1
46 | // r1 = op2
47 | // r2 = op1 exponent
48 | // r3 = op2 exponent
49 | // r5 = 0x80000000 (sign bit position)
50 | // r12 = final sign
51 |
52 | cmp r3, #EXP_SPECIAL32
53 | beq Op2SpclExp
54 | cmp r2, #EXP_SPECIAL32
55 | beq Op1SpclExp
56 |
57 | adds r2, r3 // compute exponent
58 |
59 | // Clear exponent, set implied bit
60 | lsls r0, #EXP_BITS32
61 | lsls r1, #EXP_BITS32
62 | orrs r0, r5
63 | orrs r1, r5
64 | lsrs r0, #8
65 |
66 | // Muliply and accumulate partial products
67 | // r0 = op1 right justified
68 | // r1 = op2 left justified
69 | // r2 = final exponent, double biased
70 | // r12 = final sign
71 | lsls r5, r1, #16
72 | lsrs r5, #24 // low 8 bits of op2
73 | muls r5, r0 // low 32-bit product
74 | lsls r3, r5, #16 // keep low 16 bits
75 | lsrs r5, #16 // align to position
76 | // second partial product
77 | lsls r4, r1, #8
78 | lsrs r4, #24 // middle 8 bits of op2
79 | muls r4, r0 // mid 32-bit product
80 | lsls r6, r4, #24 // keep low 8 bits
81 | lsrs r4, #8 // align to position
82 | adds r3, r6
83 | adcs r5, r4 // accumulate partial products
84 | // third partial product
85 | lsrs r1, #24 // top 8 bits of op2
86 | muls r0, r1 // top product
87 | adds r0, r5 // accumulate
88 | bmi 1f // is it normalized?
89 | lsls r0, #1 // normalize
90 | subs r2, #1 // adjust exponent
91 | 1:
92 | subs r2, #EXP_BIAS32 // r2 = biasd exponent - 1
93 | cmp r2, #EXP_SPECIAL32 - 1
94 | bhs BigExp
95 |
96 | // r0 = result, left justified
97 | // r2 = result exponent - 1
98 | // r3 = sticky bits
99 | // r12 = final sign
100 | // check low bits
101 | lsls r4, r0, #25 // look at everything below rounding bit
102 | Align:
103 | lsrs r0, #8 // normal alignment, rounding bit to CY
104 | bcc Aligned // if CY not set, no rounding needed
105 | orrs r3, r4 // any sticky bits?
106 | bne RoundUp
107 | lsls r3, r0, #31 // check LSB for even
108 | bpl Aligned // if even, leave it
109 | RoundUp:
110 | adds r0, #1 // add to rounding bit
111 | Aligned:
112 | lsls r2, #MANT_BITS32
113 | adds r0, r2
114 | SetSign:
115 | add r0, r12 // combine sign bit
116 | EXIT r4-r6
117 |
118 | Op2SpclExp:
119 | // r0 = op1
120 | // r1 = op2
121 | // r2 = op1 exponent
122 | // r3 = op2 exponent
123 | // r5 = 0x80000000 (sign bit position)
124 | // r12 = final sign
125 | //
126 | // mantissa == 0?
127 | lsls r6, r1, #(EXP_BITS32 + 1)
128 | bne ReturnOp2 // op2 is NAN, return it
129 | // Op2 is Infinity
130 | // if (expOp1 == EXP_SPECIAL)
131 | cmp r2, #EXP_SPECIAL32
132 | beq Op1SpclExp
133 | ReturnOp2:
134 | movs r0, r1
135 | Op1SpclExp:
136 | // r0 = op1
137 | // r1 = op2, not special
138 | // r2 = op1 exponent
139 | // r3 = op2 exponent
140 | // r5 = 0x80000000 (sign bit position)
141 | // r12 = final sign
142 | bics r0, r5 // clear existing sign
143 | // Return whatever op1 is, Infinity or NAN
144 | b SetSign
145 |
146 | BigExp:
147 | bge RetInfinity
148 | .ifdef NO_DENORMALS
149 | // See if it could round up
150 | adds r2, #1 // was exponent -1?
151 | bne ZeroResult
152 | lsrs r0, #1 // make room if rounds up
153 | adds r0, #0x80 // treat LSB at rounding bit
154 | bmi Align
155 | b ZeroResult
156 | .else
157 | // r0 = result mantissa left justified
158 | // r2 = result exponent - 1
159 | // r3 = sticky bits
160 | // r12 = final sign
161 | bl __fdenormal_result
162 | b SetSign
163 | .endif
164 |
165 | RetInfinity:
166 | // Build infinity
167 | movs r0, #EXP_SPECIAL32
168 | lsls r0, #MANT_BITS32
169 | b SetSign
170 |
171 | .ifdef NO_DENORMALS
172 |
173 | Op1ZeroExp:
174 | lsrs r2, r3, #MANT_BITS32 + 1// op2 exponent
175 | Op2ZeroExp:
176 | // zero * infinity or zero * NAN?
177 | cmp r2, #EXP_SPECIAL32 // check exponent
178 | beq ReturnNan
179 | ZeroResult:
180 | movs r0, #0
181 | b SetSign
182 |
183 | ReturnNan:
184 | ldr r0, =#NAN32
185 | b SetSign
186 |
187 | .else
188 |
189 | Op2ChkZero:
190 | // op1 is special
191 | lsls r1, #1 // clear sign bit
192 | bne Op1SpclExp // not zero, return op1
193 | ReturnNan:
194 | ldr r0, =#NAN32 // 0*infinity or NAN, return NAN
195 | b SetSign
196 |
197 | Op1ChkZero:
198 | // op2 is special
199 | lsls r0, #1 // clear sign bit
200 | bne ReturnOp2 // not zero, return op2
201 | b ReturnNan
202 |
203 | Op2ZeroExp:
204 | // r0 = op1
205 | // r1 = op2
206 | // r2 = op1 exponent
207 | // r3 = op2 exponent
208 | // r5 = 0x80000000 (sign bit position)
209 | // r12 = final sign
210 | cmp r2, #EXP_SPECIAL32 // check op1 exponent
211 | beq Op2ChkZero
212 | lsls r4, r1, #1 // clear existing sign
213 | beq ZeroResult
214 | // op2 is denormal, so normalize it
215 |
216 | // __clz_denormal uses tailored calling convention
217 | // r4 = input to count leading zeros
218 | // r0 - r3, r7 preserved
219 | // r5, r6 trashed
220 | bl __clz_denormal // Get leading zeros in op2
221 | subs r4, #EXP_BITS32
222 | negs r3, r4 // op2 exponent
223 | adds r4, #1
224 | lsls r1, r4
225 | // restore r5
226 | movs r5, #1
227 | lsls r5, #31 // sign position
228 | b Op2Normalized
229 |
230 | ZeroResult:
231 | movs r0, #0
232 | b SetSign
233 |
234 | Op1ZeroExp:
235 | // r0 = op1
236 | // r1 = op2
237 | // r2 = op1 exponent
238 | // r3 = op2 exponent
239 | // r5 = 0x80000000 (sign bit position)
240 | // r12 = final sign
241 | lsrs r4, r3, #MANT_BITS32 + 1 // op2 exponent
242 | cmp r4, #EXP_SPECIAL32
243 | beq Op1ChkZero
244 | lsls r4, r0, #1 // clear existing sign
245 | beq ZeroResult
246 | // op1 is denormal, so normalize it
247 |
248 | // __clz_denormal uses tailored calling convention
249 | // r4 = input to count leading zeros
250 | // r0 - r3, r7 preserved
251 | // r5, r6 trashed
252 | bl __clz_denormal // Get leading zeros in op1
253 | subs r4, #EXP_BITS32
254 | negs r2, r4 // op1 exponent
255 | adds r4, #1
256 | lsls r0, r4
257 | // restore r5
258 | movs r5, #1
259 | lsls r5, #31 // sign position
260 | b Op1Normalized
261 |
262 | .endif
263 |
264 | .endfunc
265 |
--------------------------------------------------------------------------------
/src/float/frsub.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * frsub.s
4 | *
5 | * Created: 9/17/2021
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 32-bit IEEE floating-point subtract reverse
19 | //
20 | // Entry:
21 | // r0 = op1
22 | // r1 = op2
23 | // Exit:
24 | // r0 = op2 - op1
25 |
26 | FUNC_START __frsub, __aeabi_frsub
27 | movs r2, #1
28 | lsls r2, #31
29 | eors r0, r2 // flip sign for subtract
30 | b __fadd
31 |
32 | .endfunc
33 |
--------------------------------------------------------------------------------
/src/float/sincosf.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * sincosf.s
4 | *
5 | * Created: 6/22/2023 9:57:54 AM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 32-bit floating-point sine and cosine
19 | //
20 | // Entry:
21 | // r0 = input angle in radians
22 | // r1 = pointer to location to store sin()
23 | // r2 = pointer to location to store cos()
24 | // Exit:
25 | // None.
26 | //
27 | // This simply calls __sinf, which return sinf() in r0 and cosf() in r1.
28 |
29 | FUNC_START __sincosfM0, sincosf
30 | push {r1, r2, lr}
31 | bl __sinfM0
32 | pop {r2, r3}
33 | str r0, [r2]
34 | str r1, [r3]
35 | pop {pc}
36 |
37 | .endfunc
38 |
--------------------------------------------------------------------------------
/src/float/sinf.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * sinf.s
4 | *
5 | * Created: 6/6/2023 5:46:38 PM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 | .include "trigf.inc"
17 |
18 |
19 | // 32-bit floating-point sine
20 | //
21 | // Entry:
22 | // r0 = input angle in radians
23 | // Exit:
24 | // r0 = sine
25 | // r1 = cosine
26 | //
27 | // The calculation will use CORDIC rotation on an input range reduced to
28 | // [0, pi/4], which produces both the sine and cosine. Fixed-point math
29 | // is used, which loses relative accuracy for small input. To counter
30 | // this, there is a separate calculation for small angles.
31 | //
32 | // If the angle is very small, we can just return sin(x) = x and cos(x) = 1.
33 | // The series for cos(x) starts with
34 | // cos(x) = 1 - x^2/2 + ...
35 | // So we return cos(x) = 1 (and sin(x) = x) if x^2/2 < 2^-25, or x < 2^-12.
36 | //
37 | // The "p" notation used throughout is the position of the binary point
38 | // (p16 means there are 16 bits to the right).
39 |
40 |
41 | .set FINE_REDUCTION_MAX_BITS, 8
42 | .set FINE_REDUCTION_MAX, (1 << FINE_REDUCTION_MAX_BITS) - 1 // max multiples of pi/4 for lossless reduction
43 | .set PI_HI, PI_MANTISSA >> FINE_REDUCTION_MAX_BITS
44 | LSR PI_MID32, PI_MANTISSA_LO, PI_MANTISSA, 2 * FINE_REDUCTION_MAX_BITS// shift into position
45 | .set PI_MID, PI_MID32 & ((1 << (32 - FINE_REDUCTION_MAX_BITS)) - 1)
46 | .set PI_LO, (PI_MANTISSA_LO >> FINE_REDUCTION_MAX_BITS) & ((1 << FINE_REDUCTION_MAX_BITS) - 1)
47 | .set SMALL_ANGLE_SHIFT, 7 // shift in y for small angles
48 | .set SHIFT_START, 2 // first rotation is atan(2^-2)
49 | .set SHIFT_END, SINE_ATAN_TABLE_ENTRIES - 1 + SHIFT_START - 1
50 | .set SMALL_SHIFT_START, 2
51 | .set SMALL_SHIFT_END, SMALL_SINE_ATAN_TABLE_ENTRIES - 1 + SMALL_SHIFT_START - 1
52 | .set SCALE, 0xDBD95B20 // product of cosines, p32
53 | .set SMALL_SCALE, 0xFFFF5560 // product of cosines in small table, p38
54 | .ifdef WIDE_TRIG_RANGE
55 | .set MAX_VALID_EXP, 14 // less than 2^15 radians allowed
56 | .else
57 | .set MAX_VALID_EXP, 7 // less than 2^8 radians allowed
58 | .endif
59 |
60 |
61 | .func __sinfM0
62 |
63 | SpecialExp:
64 | // If argument is NAN, return it for both sin() and cos().
65 | // If it's infinity, make a new NAN and return it for both.
66 | lsls r2, r0, #(EXP_BITS32 + 1) // mantissa == 0?
67 | bne ReturnOp // input is NAN, return it
68 | .ifndef WIDE_TRIG_RANGE
69 | BigReduction:
70 | .endif
71 | ReturnNan:
72 | ldr r0, =#NAN32
73 | ReturnOp:
74 | movs r1, r0
75 | pop {r4-r7, pc}
76 |
77 |
78 | .ifdef WIDE_TRIG_RANGE
79 |
80 | BigReduction:
81 | // r1 = unbiased exponent + 1, >= 0, <= 15
82 | // r2 = input mantissa, MSB set
83 | // r3 = floor(input/(pi/4)) p0
84 | // r4 = input >> 16
85 | // r5 = 31 - r1
86 | // r6 = 4/pi * input p31
87 | // r7 = input sign
88 | //
89 | // Reduction is > FINE_REDUCTION_MAX * pi/4. Calculate it
90 | // more exactly with 32x24 multiply of input*(4/pi).
91 | ldr r3, =#ONE_OVER_PI_MANTISSA
92 | uxth r0, r3
93 | muls r0, r4 // 4/pi lo16 * input hi16
94 | lsrs r4, r2, #8
95 | uxtb r4, r4
96 | lsrs r3, #8
97 | muls r3, r4 // 4/pi hi24 * input lo8
98 | // sum partial products
99 | adds r3, r0
100 | bcc SumIt
101 | MOV_IMM r4, 0x10000
102 | adds r6, r4
103 | SumIt:
104 | lsrs r3, #16
105 | adds r3, r6 // complete input * 4/pi = input/(pi/4)
106 | lsrs r3, r5 // floor(input/(pi/4)) p0
107 |
108 | // now create that exact multiple of pi/4
109 | // use a 16x64 multiply to get exact result
110 | mov r12, r7
111 | ldr r0, =#PI_MANTISSA_LO
112 | uxth r4, r0 // low half pi lo
113 | muls r4, r3
114 | lsls r6, r4, #16 // extended result
115 | lsrs r4, #16 // in position
116 | lsrs r0, #16 // high half pi lo
117 | muls r0, r3
118 | adds r4, r0 // sum bottom two partial products
119 | ldr r0, =#PI_MANTISSA
120 | uxth r7, r0 // low half pi hi
121 | muls r7, r3
122 | lsrs r0, #16 // high half of pi hi
123 | muls r0, r3
124 | lsrs r5, r7, #16 // align for sum
125 | lsls r7, #16
126 | adds r4, r7
127 | adcs r0, r5
128 | // r0:r4:r6 = multiple of pi/4
129 | // set up normalization shift count
130 | movs r5, #16
131 | subs r5, r1
132 | lsl96short r6, r4, r0, r5, r7 // 64-bit left shift to normalize
133 | mov r7, r12
134 | // r0:r4 = multiple of pi/4, p32
135 | // r1 = unbiased exponent + 1, >= 0
136 | // r2 = input mantissa, MSB set
137 | // r3 = quotient
138 | // r7 = input sign, 0 or -1
139 | b HaveReductionProduct
140 |
141 | .endif // WIDE_TRIG_RANGE
142 |
143 |
144 | ENTRY_POINT __sinfM0, sinf
145 | push {r4-r7, lr}
146 | lsls r1, r0, #1 // clear input sign
147 | lsrs r1, #MANT_BITS32 + 1 // isolate exponent
148 | cmp r1, #EXP_SPECIAL32
149 | beq SpecialExp
150 | asrs r7, r0, #31 // save input sign
151 | movs r2, #1
152 | lsls r2, #MANT_BITS32 // implied bit position
153 | orrs r2, r0
154 | lsls r2, #EXP_BITS32 // isolate mantissa
155 | movs r4, #0 // extend mantissa
156 | subs r1, EXP_BIAS32 - 1 // r1 = unbiased exp. + 1
157 | blt FullyReduced // already < 0.5 radians?
158 | cmp r1, #MAX_VALID_EXP + 1
159 | bgt ReturnNan // if immense, return NAN to say we can't do it
160 |
161 | // r1 = unbiased exponent + 1, >= 0, <= 15
162 | // r2 = input mantissa, MSB set
163 | // r7 = input sign
164 | //
165 | // Calculate the number of multiples of pi/4
166 | ldr r3, =#ONE_OVER_PI_MANTISSA >> 16 // 4/pi p15
167 | lsrs r4, r2, #16 // input p16
168 | muls r3, r4 // p31
169 | movs r5, #31
170 | subs r5, r1
171 | .ifdef WIDE_TRIG_RANGE
172 | movs r6, r3
173 | .endif
174 | lsrs r3, r5 // floor(input/(pi/4)) p0
175 | cmp r3, #FINE_REDUCTION_MAX
176 | bhi BigReduction
177 | ldr r0, =#PI_HI
178 | muls r0, r3
179 | ldr r4, =#PI_MID
180 | muls r4, r3 // partial products overlap by FINE_REDUCTION_MAX_BITS
181 | lsrs r6, r4, #32 - FINE_REDUCTION_MAX_BITS
182 | adds r0, r6
183 | lsls r4, #FINE_REDUCTION_MAX_BITS
184 | ldr r6, =#PI_LO
185 | muls r6, r3
186 | adds r4, r6 // r0:r4 = pi/4 reduction
187 | bcc 1f
188 | adds r0, #1 // propagate carry
189 | 1:
190 | // r0:r4 = multiple of pi/4
191 | // r1 = unbiased exponent + 1, >= 0
192 | // r2 = input mantissa, MSB set
193 | // r3 = quotient
194 | // r5 = 31 - r1
195 | // r7 = input sign, 0 or -1
196 | subs r5, #31 - FINE_REDUCTION_MAX_BITS // = FINE_REDUCTION_MAX_BITS - r1
197 | lsl64short r4, r0, r5, r6 // 64-bit left shift to normalize
198 | HaveReductionProduct:
199 | // remove pi/4 multiples, exactly
200 | negs r4, r4
201 | sbcs r2, r0
202 | // r1 = unbiased exponent + 1, >= 0
203 | // r2:r4 = input mod pi/4
204 | // r3 = quotient (octant)
205 | // r7 = input sign, 0 or -1
206 | lsl64short r4, r2, r1, r6
207 | ldr r0, =#PI_MANTISSA // MSB set, p32 for pi/4
208 | ldr r5, =#PI_MANTISSA_LO
209 | // Our calculation of the number of pi/4 mulitples could be short 1.
210 | //
211 | // r0 - pi/4 mantissa p32
212 | // r2:r4 = reduced input p32
213 | // r3 = tentative octant (zero-based)
214 | // r7 = input sign, 0 or -1
215 | cmp r2, r0
216 | blo CheckOctant
217 | adds r3, #1
218 | subs r4, r5
219 | sbcs r2, r0
220 | CheckOctant:
221 | // if odd-numbered octant, subtract from pi/4
222 | lsrs r6, r3, #1
223 | bcc SaveOctant
224 | subs r4, r5, r4
225 | sbcs r0, r2
226 | movs r2, r0
227 | SaveOctant:
228 | eors r7, r3 // invert octant if negative
229 | movs r1, #0 // exponent + 1 for reduced angle
230 | FullyReduced:
231 | // Range reduction could have introduced any number of leading
232 | // zeros. See if there are enough to divert to the small angle
233 | // loop.
234 | //
235 | // r1 = unbiased exponent + 1, <= 0
236 | // r2:r4 = input mantissa
237 | // r7 = octant info
238 | mov r12, r7 // save octant info
239 | ldr r3, =#__sineAtanTable
240 | lsrs r5, r2, #32 - SMALL_ANGLE_SHIFT
241 | beq SmallAngleUnnormal
242 | // input domain [0, pi/4]
243 | // r1 = unbiased exponent + 1
244 | // r2 = mantissa
245 | // r12 = octant info
246 | //
247 | // round the reduced angle in r2:r4
248 | lsls r4, #1
249 | bcc 1f
250 | adds r2, #1
251 | 1:
252 | adds r4, r1, #SMALL_ANGLE_SHIFT - 1
253 | blt SmallAngle
254 | negs r1, r1
255 | lsrs r2, r1 // mantissa p32
256 |
257 | // The first CORDIC rotation is based on whether the input angle
258 | // is positive or negative. Since our range reduction ensures
259 | // it's positive, we can hard-code the first rotation. Also,
260 | // since input <= pi/4, we can skip over the tan() == 1 rotation.
261 | // So the first rotation from the table is tan() == 0.25, a
262 | // shift of 2 bits.
263 |
264 | ldr r1, =#SCALE
265 | lsrs r0, r1, #1
266 | movs r4, #SHIFT_START - 1 // we increment first thing
267 | ldmia r3!, {r5} // tan() = 0.5 rotation
268 | subs r2, r5
269 | movs r5, #0
270 | // r0 = y p32 (becomes sin)
271 | // r1 = x p32 (becomes cos)
272 | // r2 = z p32 (current error in angle)
273 | // r3 = ptr to table of angles [atan(2^-i)]
274 | // r4 = iteration i (and shift count)
275 | // r5 = extended y p64
276 | // r12 = octant info
277 | RotLoop:
278 | adds r4, #1
279 | movs r6, #32
280 | subs r6, r4
281 | movs r7, r1
282 | lsls r7, r6
283 | movs r6, r1
284 | lsrs r6, r4 // r6:r7 = r1 >> r4 = x * 2^-i
285 | cmp r2, #0
286 | blt TooSmall
287 | adds r5, r7
288 | mov r7, r0
289 | adcs r0, r6 // y += x * 2^-i
290 | lsrs r7, r4 // y * 2^-i
291 | subs r1, r7 // x -= y * 2^-i
292 | ldmia r3!, {r6} // next atan()
293 | subs r2, r6 // new angle
294 | cmp r4, #SHIFT_END
295 | bne RotLoop
296 | b LoopDone
297 |
298 | TooSmall:
299 | subs r5, r7
300 | mov r7, r0
301 | sbcs r0, r6 // y -= x * 2^-i
302 | lsrs r7, r4 // y * 2^-i
303 | adds r1, r7 // x += y * 2^-i
304 | ldmia r3!, {r6} // next atan()
305 | adds r2, r6 // new angle
306 | cmp r4, #SHIFT_END
307 | bne RotLoop
308 | LoopDone:
309 | // r0 = y p32 (becomes sin)
310 | // r1 = x p32 (becomes cos)
311 | // r2 = z p32 (current error in angle)
312 | // r5 = extended y p64
313 | // r12 = octant info
314 | //
315 | // We have used CORDIC to get us x (cos) and y (sin) of an angle
316 | // very close to our target. The remaining error angle z is so
317 | // small that (in 24-bit precision) cos(z) = 1 and sin(z) = z.
318 | // This allows us to simplify the rotation formula:
319 | // x' = x*cos(z) - y*sin(z) => x' = x - y*z
320 | // y' = x*sin(z) + y*cos(z) => y' = x*z + y
321 | // So we can compute that last rotation in one shot.
322 | // |z| <= last table entry, 2^-13 (20 bits incl. sign, p32)
323 | // 0 < y <= sqrt(2), sqrt(2) <= x < 1
324 |
325 | // Final computation of y for sine
326 | lsrs r4, r1, #20 // keep 12 of 32 bits, p12
327 | muls r4, r2 // high product, p44
328 | lsls r6, r1, #12 // trim off the bits we just used
329 | lsrs r6, #20 // next 12 bits, p24
330 | muls r6, r2 // low product, p56
331 | lsls r7, r6, #8 // low p64
332 | asrs r6, #24 // low p32
333 | lsls r3, r4, #20 // hi p64
334 | asrs r4, #12 // hi p32
335 | adds r3, r7
336 | adcs r4, r6 // r4:r3 = adjustment p64
337 | lsrs r7, r0, #16 // Keep 16 bits of y before we hammer it, p16
338 | adds r5, r3
339 | adcs r0, r4
340 |
341 | // Final computation of x for cosine
342 | asrs r6, r2, #6 // Keep 14 bits of z, p26
343 | muls r7, r6 // p42
344 |
345 | movs r2, #EXP_BIAS32 // exponent
346 | // Normalize the y result. Minimum value 2^-6.
347 | Normalize:
348 | // r0 = y
349 | // r1 = x p32 uncorrected
350 | // r2 = exponent of y
351 | // r5 = extended y p63
352 | // r7 = correction for x p42
353 | // r12 = octant info
354 | asrs r7, #10 // p32
355 | subs r1, r7
356 | NormLoop:
357 | subs r2, #1
358 | adds r5, r5
359 | adcs r0, r0
360 | bcc NormLoop // until we shift off MSB
361 |
362 | // r0 = y
363 | // r1 = x, sqrt(2) <= x < 1, p32
364 | // r2 = exponent of y
365 | // r12 = octant info
366 | movs r4, #0
367 | lsrs r1, #EXP_BITS32 // position mantissa
368 | adcs r1, r4 // add rounding bit
369 | movs r4, #EXP_BIAS32 - 2 // implied bit will add 1
370 | lsls r4, #MANT_BITS32 // position exponent
371 | adds r1, r4 // combine exponent
372 | CombineSine:
373 | // r0 = y, fully left justified w/o implied bit
374 | // r1 = fully completed cosine
375 | // r2 = exponent of y
376 | // r12 = octant info
377 | lsls r2, #MANT_BITS32 // position exponent
378 | lsrs r0, #EXP_BITS32 + 1 // position mantissa
379 | adcs r0, r2 // combine exponent and rounding bit
380 |
381 | // Correct the result for original octant
382 | // Bits 0-2: octant number
383 | // octant | swap | sin | cos
384 | // 0 | no | + | +
385 | // 1 | yes | + | +
386 | // 2 | yes | + | -
387 | // 3 | no | + | -
388 | // 4 | no | - | -
389 | // 5 | yes | - | -
390 | // 6 | yes | - | +
391 | // 7 | no | - | +
392 | // ^ ^ ^
393 | // (octant+1) & 2 | | |
394 | // octant & 4 | |
395 | // (octant+2) & 4 |
396 | //
397 | // r0 = sine
398 | // r1 = cosine
399 | // r12 = octant info
400 |
401 | mov r3, r12
402 | adds r4, r3, 1 // add to octant bit 0
403 | lsrs r4, #2 // octant bit 1 to CY
404 | bcc SetSigns
405 | // swap sin and cos
406 | SWAP r0, r1
407 | SetSigns:
408 | adds r4, r3, 2 // add to octant bit 1
409 | lsrs r4, #2 // isolate octant bit 2
410 | lsls r4, #31
411 | orrs r1, r4 // set sign of cosine
412 | lsrs r4, r3, #2 // isolate octant bit 2
413 | lsls r4, #31
414 | orrs r0, r4 // set sign of sine
415 | pop {r4-r7, pc}
416 |
417 | ZeroSine:
418 | movs r1, #0 // set exponent to zero too
419 | b QuickExit
420 |
421 | QuickExitUnnormal:
422 | // r1 = unbiased exponent + 1
423 | // r2:r4 = mantissa, at least -COS_X_EQUALS_1_EXP leading zeros
424 | // r12 = octant info
425 | subs r1, #-COS_X_EQUALS_1_EXP
426 | lsl64const r4, r2, -COS_X_EQUALS_1_EXP, r6 // get rid of some leading zeros
427 | beq ZeroSine
428 | bmi QuickExit
429 | SmallNormLoop:
430 | subs r1, #1
431 | adds r4, r4
432 | adcs r2, r2
433 | bpl SmallNormLoop
434 | QuickExit:
435 | // r1 = unbiased exponent + 1
436 | // r2 = normalized mantissa (MSB set)
437 | // r12 = octant info
438 | lsls r0, r2, #1 // clear off implied bit
439 | adds r1, #EXP_BIAS32 - 1
440 | movs r2, r1
441 | MOV_IMM r1, ONE32
442 | // r0 = y, fully left justified w/o implied bit
443 | // r1 = fully completed cosine
444 | // r2 = exponent of y
445 | // r12 = octant info
446 | b CombineSine
447 |
448 | SmallAngleUnnormal:
449 | // r2:r4 = mantissa, at least SMALL_ANGLE_SHIFT leading zeros
450 | // r3 = __sineAtanTable pointer
451 | // r12 = octant info
452 | //
453 | // Check for more leading zeros for quick exit
454 | lsrs r5, r2, #32 + COS_X_EQUALS_1_EXP
455 | beq QuickExitUnnormal
456 | lsl64const r4, r2, SMALL_ANGLE_SHIFT - 1, r6
457 | // round the reduced angle in r2:r4
458 | lsls r4, #1
459 | bcc SmallStartCordic
460 | adds r2, #1
461 | b SmallStartCordic
462 |
463 | SmallAngle:
464 | // r2 = normalized mantissa (MSB set)
465 | // r3 = __sineAtanTable pointer
466 | // r4 = unbiased exponent + SMALL_ANGLE_SHIFT, < 0
467 | // r12 = octant info
468 | negs r4, r4
469 | cmp r4, #-COS_X_EQUALS_1_EXP - SMALL_ANGLE_SHIFT
470 | bgt QuickExit
471 | lsrs r2, r4 // mantissa p38
472 |
473 | SmallStartCordic:
474 | // r2 = normalized mantissa (MSB set)
475 | // r3 = __sineAtanTable pointer
476 | // r12 = octant info
477 | //
478 | // Like the main CORDIC loop, we can hard code the first rotation
479 | // since the angle is always positive.
480 | ldr r1, =#SMALL_SCALE
481 | lsrs r0, r1, #1
482 | adds r3, #SMALL_SINE_ATAN_TABLE_OFFSET
483 | movs r4, #SMALL_SHIFT_START - 1 // we increment first thing
484 | ldmia r3!, {r5} // tan() = 2^-8 rotation
485 | subs r2, r5
486 | // r0 = y p39 (becomes sin)
487 | // r1 = x p32 (becomes cos)
488 | // r2 = current error in angle p38
489 | // r3 = ptr to table of angles [atan(2^-i)]
490 | // r4 = iteration i (and shift count)
491 | // r12 = octant info
492 | SmallRotLoop:
493 | adds r4, #1
494 | ldmia r3!, {r5} // next atan()
495 | lsrs r7, r0, #2*SMALL_ANGLE_SHIFT
496 | lsrs r7, r4 // y * 2^-i
497 | movs r6, r1
498 | lsrs r6, r4 // x * 2^-i
499 | cmp r2, #0
500 | blt SmallTooSmall
501 | subs r1, r7 // x -= y * 2^-i
502 | adds r0, r6 // y += x * 2^-i
503 | subs r2, r5 // new angle
504 | cmp r4, #SMALL_SHIFT_END
505 | bne SmallRotLoop
506 | b SmallLoopDone
507 |
508 | SmallTooSmall:
509 | adds r1, r7 // x += y * 2^-i
510 | subs r0, r6 // y -= x * 2^-i
511 | adds r2, r5 // new angle
512 | cmp r4, #SMALL_SHIFT_END
513 | bne SmallRotLoop
514 | SmallLoopDone:
515 | // r0 = y p39 (becomes sin)
516 | // r1 = x p32 (becomes cos)
517 | // r2 = z p38 (current error in angle)
518 | // r12 = octant info
519 | // |z| <= last table entry, 26 bits incl. sign
520 | // 0 < y < 1
521 | // 0 < x < 1
522 |
523 | lsls r3, r2, #1 // z, p39
524 | mul32sx32hi r3, r1, r4, r5, r6, r7 // upper product, p(39+32-32) = p39
525 | lsrs r7, r0, #20 // Keep 12 bits of y before we hammer it, p19
526 | adds r0, r4 // y p39
527 |
528 | // Now finish x
529 | asrs r6, r2, #15 // Keep 12 bits of z, p23
530 | muls r7, r6 // p42
531 |
532 | // Prepare to normalize
533 | movs r2, #EXP_BIAS32 - SMALL_ANGLE_SHIFT // exponent
534 | movs r5, #0
535 | // r0 = y
536 | // r1 = x p32 uncorrected
537 | // r2 = exponent of y
538 | // r5 = extended y p63
539 | // r7 = correction for x p42
540 | // r12 = octant info
541 | b Normalize
542 |
543 | .endfunc
544 |
--------------------------------------------------------------------------------
/src/float/sqrtf.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * sqrtf.s
4 | *
5 | * Created: 6/13/2021 5:40:15 PM
6 | * Author: Tim Paterson
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 32-bit IEEE floating-point square root
19 | //
20 | // Entry:
21 | // r0 = input
22 | // Exit:
23 | // r0 = root
24 | //
25 | // The calculation will use Newton-Raphson iteration on inverse square root.
26 | // The initial guess will be calculated by subtracting the upper mantissa bits
27 | // from one of two constants -- one for [1, 2) and the other for [2, 4). The
28 | // values were determined using a spreadsheet.
29 | //
30 | // [1, 2) Mlo = 1.2109375; Y0 = Mlo - X / 4; in hex, 0x9B p7 (0x9B / 0x80)
31 | // [2, 4) Mhi = 0.96875; Y0 = Mhi - X / 8; in hex, 0x7C p7 (0x7C / 0x80)
32 | //
33 | // The guess will have more than 4 bits of accuracy, allowing 3 iterations to
34 | // get to the required accuracy. The notation p7 means there are 7 bits to the
35 | // right of the binary point, and this notation is used throuout the comments.
36 | //
37 | // Mark Owen demonstrates in Qfplib (http://www.quinapalus.com) some clever
38 | // arrangements that simplify the code for an iteration. First, the common
39 | // representation of an iteration is refactored (x = input, y = guess for
40 | // 1/sqrt(x)):
41 | //
42 | // next = 1.5*y - x*y^3/2 = y - y*(x*y^2 - 1)/2
43 | //
44 | // Note that since y is a guess for 1/sqrt(x), the inner term x*y^2 will
45 | // be close to 1. By computing this so the binary point is left of the
46 | // 32-bit word, the integer portion just falls off.
47 | //
48 | // Owen's implementation is brilliant and very hard to improve upon. If you
49 | // compare this one with his, you will see a great deal of similarity.
50 | //
51 | // This routine has been tested using every mantissa value in [1, 4).
52 |
53 | .set Mlo, 0x9B // magic number for lo range, [1, 2)
54 | .set Mhi, 0x7C // magic number for hi range, [2, 4)
55 |
56 |
57 | FUNC_START __sqrtf, sqrtf
58 | SAVE_REG r4
59 | lsls r1, r0, #1 // clear input sign
60 | beq Exit // input is zero, just return it, sign intact
61 | bcs ReturnNan // must not be negative
62 | lsrs r1, #MANT_BITS32 + 1 // input exponent
63 | .ifdef NO_DENORMALS
64 | beq Exit // input is zero, just return it
65 | .else
66 | beq Denormal
67 | .endif
68 | cmp r1, #EXP_SPECIAL32
69 | beq Exit // return NAN or +Infinity
70 | Normalized:
71 | // Set implied bit
72 | movs r2, #1
73 | lsls r2, #MANT_BITS32
74 | orrs r2, r0
75 | lsls r2, #EXP_BITS32 // normalize, clearing exponent
76 |
77 | // r1 = exponent
78 | // r2 = input mantissa with implied bit set, p31
79 |
80 | movs r3, #Mhi // assume [2, 4)
81 | lsrs r0, r2, #26 // save top bits [1, 2) p5
82 |
83 | // Result exponent is current exponent / 2
84 | // Double the bias before halving. Implied bit position will get
85 | // added at end, so counteract it as well.
86 | adds r1, #EXP_BIAS32 - 2
87 | // exp >>= 1
88 | asrs r1, #1
89 | bcs 1f // was it odd?
90 | lsrs r2, #1 // if not, add leading zero
91 | movs r3, #Mlo // input interval [1, 2)
92 | 1:
93 | // Compute guess by subtracting upper bits from magic number in r3
94 | subs r3, r0
95 |
96 | // First iteration
97 | // r2 = input p30
98 | // r3 = guess p7, accurate to 4 bits
99 | lsrs r0, r2, #12 // x p18
100 | muls r0, r3 // x*y, p25
101 | muls r0, r3 // x*y^2 p32
102 | // As described above, we now view r0 as signed and really have
103 | // x*y^2 - 1, p32
104 | asrs r0, #9 // p23
105 | muls r0, r3 // y*(x*y^2 - 1) p30 = y*(x*y^2 - 1)/2 p31
106 | lsls r3, #24 // y p31
107 | subs r3, r0 // y - y*(x*y^2 - 1)/2 p31
108 | lsrs r3, #16 // p15
109 |
110 | // Do it again.
111 | movs r0, r3 // y p15
112 | muls r0, r0 // y^2 p30
113 | lsrs r0, #13 // y^2 p17
114 | lsrs r4, r2, #15 // x p15
115 | muls r0, r4 // x*y^2 p32
116 | asrs r0, #10 // p22
117 | muls r0, r3 // y*(x*y^2 - 1) p37 = y*(x*y^2 - 1)/2 p38
118 | asrs r0, #23 // p15
119 | subs r3, r0 // y - y*(x*y^2 - 1)/2 p15
120 |
121 | // For the third iteration, we refactor again, taking into account
122 | // we don't want y (the next guess), but x*y (the actual root).
123 | // So it becomes:
124 | //
125 | // result = x*y - x*y*(x*y^2 - 1)/2 = x*y - y*((x*y)^2 - x)/2
126 | // = x*y + y*(x - (x*y)^2)/2
127 |
128 | // last iteration left x p15 in r4
129 | muls r4, r3 // x*y p30, approximate sqrt(x)
130 | lsrs r4, #14 // x*y p16
131 | movs r0, r4
132 | muls r0, r4 // (x*y)^2 p32
133 | // With binary point at p32, we've dropped the integer bits.
134 | lsls r2, #2 // drop upper bits on x as well, p32
135 | subs r0, r2, r0 // x - (x*y)^2 p32 = (x - (x*y)^2)/2 p33
136 | // The next step is to finish calculating the error term that will
137 | // be added to the root. Since our guess is pretty close by now,
138 | // the upper bits of this term are zero and we can use a binary
139 | // point well past 32 bits.
140 | asrs r0, #6 // (x - (x*y)^2)/2 p27
141 | muls r0, r3 // y*(x - (x*y)^2)/2 p42
142 | lsls r4, #7 // x*y p23
143 | asrs r0, #17 // y*(x - (x*y)^2)/2 p25
144 |
145 | // Result will be accurate, but rounding is tricky because
146 | // the error, no matter how small, can straddle a rounding boundary.
147 | // First check to see if it does by looking at the rounding bit and
148 | // the guard bit below it:
149 | //
150 | // 00 - never round up
151 | // 01 - maybe round up
152 | // 10 - maybe round up
153 | // 11 - always round up
154 | //
155 | // This is tested by adding 1 to the guard bit. This will leave the
156 | // rounding and guard bits:
157 | //
158 | // 01 - never round up
159 | // 10 - maybe round up
160 | // 11 - maybe round up
161 | // 00 - already rounded up
162 | //
163 | // So if the round bit ends up 1, we need to calculate the final
164 | // remainder for rounding.
165 |
166 | adds r0, #1 // add to guard bit
167 | asrs r0, #2 // p23 - round bit to CY
168 | add r0, r4 // x*y + y*(x - (x*y)^2)/2 p23 - final result
169 | bcc RootDone // add did not effect CY, this is round bit from asrs
170 |
171 | // Add 1/2 LSB to result, then see if that's too big or too small by
172 | // squaring it and comparing with x. Only low bits need comparing, the
173 | // upper ones must be the same.
174 | lsls r3, r0, #1 // result p24
175 | adds r3, #1 // bump by half a bit
176 | muls r3, r3 // (res + 0.5)^2 p48
177 | lsls r2, #16 // x p48 (16 low bits left)
178 | subs r3, r2 // (res + 0.5)^2 - x p48
179 | asrs r3, #31 // sign((res + 0.5)^2 - x)
180 | subs r0, r3 // add 1 if negative
181 | RootDone:
182 | // r0 = result mantissa, proper position with implied bit set
183 | // r1 = final exponent, adjusted by -1 for adding implied bit
184 | // root += exp << MANT_BITS32
185 | lsls r1, #MANT_BITS32
186 | adds r0, r1
187 | Exit:
188 | EXIT r4
189 |
190 | .ifndef NO_DENORMALS
191 | Denormal:
192 | // r0 = input
193 | // r1 = exponent, currently zero
194 | push {r5, r6}
195 | movs r4, r0 // pass value
196 | // __clz_denormal uses tailored calling convention
197 | // r4 = input to count leading zeros
198 | // r0 - r3, r7 preserved
199 | // r5, r6 trashed
200 | bl __clz_denormal // Get leading zeros
201 | subs r4, #EXP_BITS32
202 | lsls r0, r4
203 | movs r1, #1
204 | subs r1, r4
205 | pop {r5, r6}
206 | b Normalized
207 | .endif
208 |
209 | ReturnNan:
210 | ldr r0, =#NAN32
211 | .ifdef NO_DENORMALS
212 | b Exit // two-instruction return
213 | .else
214 | pop {r4, pc}
215 | .endif
216 |
217 | .endfunc
218 |
--------------------------------------------------------------------------------
/src/float/tanf.s:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * tanf.s
4 | *
5 | * Created: 6/22/2023 10:05:24 AM
6 | * Author: Tim
7 | */
8 |
9 | .syntax unified
10 | .cpu cortex-m0plus
11 | .thumb
12 |
13 | .include "macros.inc"
14 | .include "ieee.inc"
15 | .include "options.inc"
16 |
17 |
18 | // 32-bit floating-point tangent
19 | //
20 | // Entry:
21 | // r0 = input angle in radians
22 | // Exit:
23 | // r0 = tangent
24 | //
25 | // This simply calls __sinf, which return sinf() in r0 and cosf() in r1.
26 | // Tangent is computed by dividing them: tan(x) = sin(x)/cos(x).
27 |
28 | FUNC_START __tanfM0, tanf
29 | push {lr}
30 | bl __sinfM0
31 | bl __fdiv
32 | pop {pc}
33 |
34 | .endfunc
35 |
--------------------------------------------------------------------------------
/src/include/ieee.inc:
--------------------------------------------------------------------------------
1 | /*
2 | * ieee.inc
3 | *
4 | * Created: 6/22/2020 5:23:27 PM
5 | * Author: Tim
6 | */
7 |
8 |
9 | // IEEE single
10 | // Bit fields
11 | .set MANT_BITS32, 23
12 | .set EXP_BITS32, 8
13 | .set EXP_BIAS32, ((1 << (EXP_BITS32 - 1)) - 1)
14 | .set EXP_MIN32, (-EXP_BIAS32 + 1)
15 | .set EXP_SPECIAL32, ((1 << EXP_BITS32) - 1)
16 | .set SIGN_BIT32, (1LL << 31)
17 |
18 | .macro SET_FLOAT name, sign, exp, mant
19 | .set \name, ((\sign) << 31) | (((\exp) + EXP_BIAS32) << MANT_BITS32) | ((\mant) & ((1 << MANT_BITS32) - 1))
20 | .endm
21 |
22 | .macro LOAD_FLOAT reg, sign, exp, mant
23 | MOV_IMM \reg, ((\sign) << 31) | (((\exp) + EXP_BIAS32) << MANT_BITS32) | ((\mant) & ((1 << MANT_BITS32) - 1))
24 | .endm
25 |
26 | // Special values
27 | .set ONE32, EXP_BIAS32 << MANT_BITS32
28 | .set INFINITY32, EXP_SPECIAL32 << MANT_BITS32
29 | // Quiet NAN has MSB of mantissa set
30 | .set NAN32, INFINITY32 | (1 << (MANT_BITS32 - 1))
31 |
32 | // IEEE double
33 | .set MANT_BITS64, 52
34 | .set MANT_BITS_HI64, (MANT_BITS64 - 32)
35 | .set EXP_BITS64, 11
36 | .set EXP_BIAS64, ((1 << (EXP_BITS64 - 1)) - 1)
37 | .set EXP_MIN64, (-EXP_BIAS64 + 1)
38 | .set EXP_SPECIAL64, ((1 << EXP_BITS64) - 1)
39 |
40 | // Special values (high word)
41 | .set INFINITY64, EXP_SPECIAL64 << MANT_BITS_HI64
42 | // Quiet NAN has MSB of mantissa set
43 | .set NAN64, INFINITY64 | (1 << (MANT_BITS_HI64 - 1))
44 |
--------------------------------------------------------------------------------
/src/include/macros.inc:
--------------------------------------------------------------------------------
1 | /*
2 | * macros.inc
3 | *
4 | * Created: 6/21/2020 12:28:05 PM
5 | * Author: Tim Paterson
6 | */
7 |
8 |
9 | //*********************************************************************
10 | // Functions that can optionally be aliased
11 | //*********************************************************************
12 |
13 | .macro DEFINE_ENTRY local, system
14 | .thumb_set \system, \local
15 | .endm
16 |
17 | .macro ENTRY_POINT local, system
18 | .global \local
19 | .thumb_func
20 | \local :
21 | .ifndef \system
22 | .global \system
23 | .thumb_set \system, \local
24 | .elseif \system == \local
25 | .global \system
26 | .endif
27 | .endm
28 |
29 | // Same as ENTRY_POINT, but also starts a .func block
30 | .macro FUNC_START local, system
31 | .func \local
32 | ENTRY_POINT \local, \system
33 | .endm
34 |
35 | //*********************************************************************
36 | // Register save/restore, depending on NO_DENORMALS
37 | //*********************************************************************
38 |
39 | .macro SAVE_REG reg:vararg
40 | .ifdef NO_DENORMALS
41 | push { \reg }
42 | .else
43 | push { \reg, lr }
44 | .endif
45 | .endm
46 |
47 | .macro EXIT reg:vararg
48 | .ifdef NO_DENORMALS
49 | pop { \reg }
50 | bx lr
51 | .else
52 | pop { \reg, pc }
53 | .endif
54 | .endm
55 |
56 | //*********************************************************************
57 | // 32x32 multiply
58 | //
59 | // You must pass 7 register operands (r0 - r7) as follows:
60 | //
61 | // x - left operand
62 | // y - right operand
63 | // pl - low product, can be same as x
64 | // ph - high product, can be same as y
65 | // t1, t2, t3 - temp registers, trashed
66 | //
67 | // Because x/pl and y/ph can share, a minimum of 5 different registers
68 | // are required.
69 | //
70 | //*********************************************************************
71 |
72 | // unsigned operands
73 | .macro mul32x32 x, y, pl, ph, t1, t2, t3
74 | mul32x32signs \x, \y, \pl, \ph, \t1, \t2, \t3, lsrs, lsrs
75 | .endm
76 |
77 | // left operand signed, right unsigned
78 | .macro mul32sx32 x, y, pl, ph, t1, t2, t3
79 | mul32x32signs \x, \y, \pl, \ph, \t1, \t2, \t3, asrs, lsrs
80 | .endm
81 |
82 | // left operand unsigned, right signed
83 | .macro mul32x32s x, y, pl, ph, t1, t2, t3
84 | mul32x32signs \x, \y, \pl, \ph, \t1, \t2, \t3, lsrs, asrs
85 | .endm
86 |
87 | // both operands signed
88 | .macro mul32sx32s x, y, pl, ph, t1, t2, t3
89 | mul32x32signs \x, \y, \pl, \ph, \t1, \t2, \t3, asrs, asrs
90 | .endm
91 |
92 | .macro mul32x32signs x, y, pl, ph, t1, t2, t3, shrx, shry
93 | uxth \t1, \x
94 | uxth \t2, \y
95 | muls \t1, \t2 // t1 = low product
96 | \shrx \t3, \x, #16
97 | muls \t2, \t3 // t2 = mid product xh * yl
98 | \shry \ph, \y, #16
99 | muls \t3, \ph // t3 = hi product
100 | uxth \pl, \x
101 | muls \pl, \ph // pl = mid product xl * yh
102 | // add pl mid product
103 | lsls \ph, \pl, #16
104 | \shry \pl, \pl, #16
105 | adds \t1, \ph
106 | adcs \t3, \pl
107 | // add t2 mid product
108 | lsls \pl, \t2, #16
109 | \shrx \ph, \t2, #16
110 | adds \pl, \t1
111 | adcs \ph, \t3
112 | .endm
113 |
114 | //*********************************************************************
115 | // 32x32 multiply, return only hi result
116 | // Doesn't compute lowest product, so can be off by almost 1 bit
117 | // product p can be same as x
118 |
119 | // unsigned operands
120 | .macro mul32x32hi x, y, p, t1, t2, t3
121 | mul32x32hiSigns \x, \y, \p, \t1, \t2, \t3, lsrs, lsrs
122 | .endm
123 |
124 | // left operand signed, right unsigned
125 | .macro mul32sx32hi x, y, p, t1, t2, t3
126 | mul32x32hiSigns \x, \y, \p, \t1, \t2, \t3, asrs, lsrs
127 | .endm
128 |
129 | // left operand unsigned, right signed
130 | .macro mul32x32shi x, y, p, t1, t2, t3
131 | mul32x32hiSigns \x, \y, \p, \t1, \t2, \t3, lsrs, asrs
132 | .endm
133 |
134 | // both operands signed
135 | .macro mul32sx32shi x, y, p, t1, t2, t3
136 | mul32x32hiSigns \x, \y, \p, \t1, \t2, \t3, asrs, asrs
137 | .endm
138 |
139 | .macro mul32x32hiSigns x, y, p, t1, t2, t3, shrx, shry
140 | \shry \t1, \y, #16 // yH
141 | uxth \t2, \x // xL
142 | muls \t2, \t1 // yH * xL = mid 1
143 | \shrx \p, \x, #16 // xH
144 | muls \t1, \p // yH * xH = hi
145 | \shry \t3, \t2, #16 // hi half of mid 1
146 | adds \t1, \t3
147 | uxth \t3, \y // yL
148 | muls \t3, \p // yL * xH = mid 2
149 | uxth \t2, \t2 // lo half of mid1
150 | adds \t3, \t2 // sum mids
151 | \shrx \t3, #16
152 | adds \p, \t1, \t3
153 | .endm
154 |
155 | //*********************************************************************
156 | // 64-bit shifts
157 | //*********************************************************************
158 |
159 | .macro lsl64const lo, hi, cnt, tmp
160 | .if \cnt == 1
161 | adds \lo, \lo
162 | adcs \hi, \hi
163 | .elseif \cnt == 2
164 | adds \lo, \lo
165 | adcs \hi, \hi
166 | adds \lo, \lo
167 | adcs \hi, \hi
168 | .else
169 | lsrs \tmp, \lo, #32 - (\cnt)
170 | lsls \lo, #\cnt
171 | lsls \hi, #\cnt
172 | orrs \hi, \tmp
173 | .endif
174 | .endm
175 |
176 | .macro lsr64const lo, hi, cnt, tmp
177 | lsrs \lo, #\cnt
178 | lsls \tmp, \hi, #32 - (\cnt)
179 | orrs \lo, \tmp
180 | lsrs \hi, #\cnt
181 | .endm
182 |
183 | // For shift count in register, <= 32
184 | .macro lsl64short lo, hi, cnt, tmp
185 | lsls \hi, \cnt
186 | movs \tmp, \lo
187 | lsls \lo, \cnt
188 | subs \cnt, #32
189 | negs \cnt, \cnt
190 | lsrs \tmp, \cnt
191 | orrs \hi, \tmp
192 | .endm
193 |
194 | .macro lsr64short lo, hi, cnt, tmp
195 | lsrs \lo, \cnt
196 | movs \tmp, \hi
197 | lsrs \hi, \cnt
198 | subs \cnt, #32
199 | negs \cnt, \cnt
200 | lsls \tmp, \cnt
201 | orrs \lo, \tmp
202 | .endm
203 |
204 | // For shift count in register, <= 64
205 | .macro lsr64 lo, hi, cnt, tmp1, tmp2
206 | // inspired by __aeabi_llsr
207 | movs \tmp1, \hi // hi1
208 | movs \tmp2, \hi // hi2
209 |
210 | // cnt < 32 cnt >= 32
211 | // -------- ---------
212 | lsrs \lo, \cnt // lo >>= cnt lo = 0
213 | lsrs \hi, \cnt // hi >>= cnt hi = 0
214 | subs \cnt, #32 // cnt1 < 0 (=>big) 0 <= cnt1 < 32
215 | lsrs \tmp1, \cnt // hi1 = 0 hi1 >>= cnt1
216 | orrs \lo, \tmp1 // lo1 = lo lo1 = hi1
217 | negs \cnt, \cnt // cnt2 = 32 - cnt cnt2 < 0 (=>big)
218 | lsls \tmp2, \cnt // hi2 <<= cnt2 hi2 = 0
219 | orrs \lo, \tmp2 // lo1 |= hi2 lo1
220 | .endm
221 |
222 | .macro lsl64 lo, hi, cnt, tmp1, tmp2
223 | movs \tmp1, \lo // lo1
224 | movs \tmp2, \lo // lo2
225 |
226 | // cnt < 32 cnt >= 32
227 | // -------- ---------
228 | lsls \lo, \cnt // lo <<= cnt lo = 0
229 | lsls \hi, \cnt // hi <<= cnt hi = 0
230 | subs \cnt, #32 // cnt1 < 0 (=>big) 0 <= cnt1 < 32
231 | lsls \tmp1, \cnt // lo1 = 0 lo1 <<= cnt1
232 | orrs \hi, \tmp1 // hi1 = hi hi1 = lo1
233 | negs \cnt, \cnt // cnt2 = 32 - cnt cnt2 < 0 (=>big)
234 | lsrs \tmp2, \cnt // lo2 >>= cnt2 lo2 = 0
235 | orrs \hi, \tmp2 // hi1 |= lo2 hi1
236 | .endm
237 |
238 | //*********************************************************************
239 | // 96-bit shifts
240 | //*********************************************************************
241 |
242 | // For shift count in register, <= 32
243 | .macro lsl96short lo, mid, hi, cnt, tmp
244 | lsls \hi, \cnt
245 | movs \tmp, \mid
246 | lsls \mid, \cnt
247 | subs \cnt, #32
248 | negs \cnt, \cnt
249 | lsrs \tmp, \cnt
250 | orrs \hi, \tmp
251 | lsrs \lo, \cnt
252 | orrs \mid, \lo
253 | .endm
254 |
255 | //*********************************************************************
256 | // Count leading zeros, you choose the registers to use
257 | //*********************************************************************
258 |
259 | // Returns 31 on input of zero.
260 | .macro CLZ arg, cnt, tmp
261 | movs \cnt, #31
262 | CLZ_EXT \arg, \cnt, \tmp
263 | .endm
264 |
265 | // You must initialize cnt register to max value
266 | .macro CLZ_EXT arg, cnt, tmp
267 | lsrs \tmp, \arg, #16
268 | beq 1f
269 | movs \arg, \tmp
270 | subs \cnt, #16
271 | 1:
272 | lsrs \tmp, \arg, #8
273 | beq 2f
274 | movs \arg, \tmp
275 | subs \cnt, #8
276 | 2:
277 | lsrs \tmp, \arg, #4
278 | beq 3f
279 | movs \arg, \tmp
280 | subs \cnt, #4
281 | 3:
282 | lsrs \tmp, \arg, #2
283 | beq 4f
284 | movs \arg, \tmp
285 | subs \cnt, #2
286 | 4:
287 | lsrs \arg, #1
288 | subs \arg, \cnt, \arg
289 | .endm
290 |
291 | //*********************************************************************
292 | // Swap registers
293 | //*********************************************************************
294 |
295 | .macro SWAP arg1, arg2
296 | eors \arg1, \arg2
297 | eors \arg2, \arg1
298 | eors \arg1, \arg2
299 | .endm
300 |
301 | //*********************************************************************
302 | // Optimize loading immediate value into register
303 | //*********************************************************************
304 |
305 | .macro MOV_IMM reg, int, shift=0
306 | .if (\int) & 0xFFFFFF00 == 0
307 | movs \reg, #\int
308 | .if (\shift) != 0
309 | lsls \reg, #\shift
310 | .endif
311 | .elseif (\int) & 1 == 0
312 | MOV_IMM \reg, (\int) >> 1, (\shift) + 1
313 | .else
314 | ldr \reg, =#(\int) << (\shift)
315 | .endif
316 | .endm
317 |
318 | //*********************************************************************
319 | // Shift constants (assembler doesn't support 64-bit numbers)
320 | //*********************************************************************
321 |
322 | .macro LSL name, lo, hi, cnt
323 | .set \name, ((\hi) << (\cnt)) | ((\lo) >> (32 - (\cnt)))
324 | .endm
325 |
326 | .macro LSR name, lo, hi, cnt
327 | .set \name, ((\lo) >> (\cnt)) | ((\hi) << (32 - (\cnt)))
328 | .endm
329 |
--------------------------------------------------------------------------------
/src/include/trigf.inc:
--------------------------------------------------------------------------------
1 | /*
2 | * trigf.inc
3 | *
4 | * Created: 7/17/2023 11:53:18 AM
5 | * Author: Tim
6 | */
7 |
8 |
9 | .set TAN_X_EQUALS_X_EXP, -12
10 | .set COS_X_EQUALS_1_EXP, -12
11 | .set PI_MANTISSA, 0xC90FDAA2 // p30 for pi, p31 for pi/2, p32 for pi/4
12 | .set PI_MANTISSA_LO, 0x2168C235 // next 32 bits
13 | .set PI_MANTISSA_FLOAT, ((PI_MANTISSA >> (EXP_BITS32 - 1)) + 1) >> 1
14 | .set ONE_OVER_PI_MANTISSA, 0xa2f9836E // p33 for 1/pi, p31 for 4/pi
15 |
16 | // for atan2f
17 | .set ATAN_TABLE_ENTRIES, 14
18 | .set ATAN_TABLE_END_OFFSET, ATAN_TABLE_ENTRIES * 4
19 | .set SMALL_ATAN_TABLE_ENTRIES, 7
20 | .set SMALL_ATAN_TABLE_OFFSET, ATAN_TABLE_ENTRIES * 4 // offset from __fullAtanTable
21 | .set SMALL_ATAN_TABLE_END_OFFSET, SMALL_ATAN_TABLE_ENTRIES * 4 // offset from AtanTableEnd
22 | .set SMALL_ATAN_TABLE_START_I, ATAN_TABLE_ENTRIES - SMALL_ATAN_TABLE_ENTRIES
23 | .set SMALL_ATAN_TABLE_SHIFT, 38 - 32
24 |
25 | // for sinf
26 | .set SINE_ATAN_TABLE_ENTRIES, ATAN_TABLE_ENTRIES - 1
27 | .set SINE_TABLE_END_OFFSET, SINE_ATAN_TABLE_ENTRIES * 4
28 | .set SMALL_SINE_ATAN_TABLE_ENTRIES, SMALL_ATAN_TABLE_ENTRIES - 1
29 | .set SMALL_SINE_ATAN_TABLE_OFFSET, SMALL_ATAN_TABLE_OFFSET // offset from __sineAtanTable
30 |
--------------------------------------------------------------------------------