├── Makefile
├── VERSION
├── bfbench.c
├── bftest.c
├── cutils.c
├── cutils.h
├── libbf.c
├── libbf.h
├── pi_1e5.sha1sum
├── pi_1e6.sha1sum
├── pi_1e7.sha1sum
├── pi_1e8.sha1sum
├── pi_1e9.sha1sum
├── readme.txt
├── softfp.c
├── softfp.h
├── softfp_template.h
├── softfp_template_icvt.h
└── tinypi.c


/Makefile:
--------------------------------------------------------------------------------
  1 | # Tiny arbitrary precision floating point library
  2 | # 
  3 | # Copyright (c) 2017-2018 Fabrice Bellard
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | # THE SOFTWARE.
 22 | 
 23 | # Enable Windows compilation
 24 | #CONFIG_WIN32=y
 25 | # build AVX2 version
 26 | CONFIG_AVX2=y
 27 | # Enable profiling with gprof
 28 | #CONFIG_PROFILE=y
 29 | # compile the bftest utility to do regression tests and benchmarks. Must have
 30 | # the MPFR and MPDecimal libraries
 31 | #CONFIG_BFTEST=y
 32 | # 32 bit compilation
 33 | #CONFIG_M32=y
 34 | 
 35 | #CONFIG_ASAN=y
 36 | 
 37 | ifdef CONFIG_WIN32
 38 | CROSS_PREFIX=x86_64-w64-mingw32-
 39 | EXE:=.exe
 40 | else
 41 | EXE:=
 42 | endif
 43 | 
 44 | CC=$(CROSS_PREFIX)gcc
 45 | CFLAGS=-Wall -g $(PROFILE) -MMD
 46 | CFLAGS+=-O2
 47 | CFLAGS+=-flto
 48 | #CFLAGS+=-Os
 49 | LDFLAGS=
 50 | ifdef CONFIG_PROFILE
 51 | CFLAGS+=-p
 52 | LDFLAGS+=-p
 53 | else
 54 | #LDFLAGS+=-s # strip output
 55 | endif
 56 | ifdef CONFIG_ASAN
 57 | CFLAGS+=-fsanitize=address
 58 | LDFLAGS+=-fsanitize=address
 59 | endif
 60 | LIBS=-lm
 61 | 
 62 | PROGS+=bfbench$(EXE) tinypi$(EXE)
 63 | ifdef CONFIG_BFTEST
 64 | PROGS+=bftest$(EXE)
 65 | ifdef CONFIG_M32
 66 | PROGS+=bftest32$(EXE)
 67 | endif
 68 | endif
 69 | ifdef CONFIG_AVX2
 70 | PROGS+=bfbench-avx2$(EXE) tinypi-avx2$(EXE)
 71 | endif
 72 | 
 73 | all: $(PROGS)
 74 | 
 75 | tinypi$(EXE): tinypi.o libbf.o cutils.o
 76 | 	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
 77 | 
 78 | tinypi-avx2$(EXE): tinypi.avx2.o libbf.avx2.o cutils.avx2.o
 79 | 	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
 80 | 
 81 | BFTEST_LIBS:=$(LIBS)
 82 | 
 83 | ifdef CONFIG_BFTEST
 84 | BFTEST_LIBS:=-lmpfr -lgmp $(BFTEST_LIBS)
 85 | bfbench.o bfbench.avx2.o: CFLAGS+=-DCONFIG_MPFR
 86 | 
 87 | bftest$(EXE): bftest.o libbf.o cutils.o softfp.o
 88 | 	$(CC) $(LDFLAGS) -o $@ $^ -lmpdec $(BFTEST_LIBS)
 89 | 
 90 | ifdef CONFIG_M32
 91 | bftest32$(EXE): bftest.m32.o libbf.m32.o cutils.m32.o softfp.m32.o
 92 | 	$(CC) $(LDFLAGS) -m32 -o $@ $^ -lmpdec $(BFTEST_LIBS)
 93 | endif
 94 | endif
 95 | 
 96 | bfbench$(EXE): bfbench.o libbf.o cutils.o
 97 | 	$(CC) $(LDFLAGS) -o $@ $^ $(BFTEST_LIBS)
 98 | 
 99 | bfbench-avx2$(EXE): bfbench.avx2.o libbf.avx2.o  cutils.avx2.o
100 | 	$(CC) $(LDFLAGS) -o $@ $^ $(BFTEST_LIBS)
101 | 
102 | test: all
103 | 	time ./tinypi 1e5 pi_1e5.txt
104 | 	sha1sum -c pi_1e5.sha1sum
105 | ifdef CONFIG_AVX2
106 | 	time ./tinypi-avx2 1e5 pi_1e5.txt
107 | 	sha1sum -c pi_1e5.sha1sum
108 | endif
109 | #
110 | 	time ./tinypi 1e6 pi_1e6.txt
111 | 	sha1sum -c pi_1e6.sha1sum
112 | ifdef CONFIG_AVX2
113 | 	time ./tinypi-avx2 1e6 pi_1e6.txt
114 | 	sha1sum -c pi_1e6.sha1sum
115 | #
116 | 	time ./tinypi-avx2 1e7 pi_1e7.txt
117 | 	sha1sum -c pi_1e7.sha1sum
118 | #
119 | #	time ./tinypi-avx2 1e8 pi_1e8.txt
120 | #	sha1sum -c pi_1e8.sha1sum
121 | endif
122 | 
123 | %.o: %.c
124 | 	$(CC) $(CFLAGS) -c -o $@ $<
125 | 
126 | %.m32.o: %.c
127 | 	$(CC) -m32 $(CFLAGS) -c -o $@ $<
128 | 
129 | %.avx2.o: %.c
130 | 	$(CC) $(CFLAGS) -mavx -mavx2 -mfma -mbmi2 -c -o $@ $<
131 | 
132 | clean:
133 | 	rm -f $(PROGS) *.o *.d *~
134 | 
135 | -include $(wildcard *.d)
136 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 2021-03-27
2 | 


--------------------------------------------------------------------------------
/bfbench.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Big float tests
  3 |  * 
  4 |  * Copyright (c) 2017 Fabrice Bellard
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | #include <stdlib.h>
 25 | #include <stdio.h>
 26 | #include <inttypes.h>
 27 | #include <math.h>
 28 | #include <string.h>
 29 | #include <assert.h>
 30 | #include <sys/time.h>
 31 | #ifdef CONFIG_MPFR
 32 | #include <mpfr.h>
 33 | #endif
 34 | 
 35 | #include "libbf.h"
 36 | 
 37 | /* number of bits per base 10 digit */
 38 | #define BITS_PER_DIGIT 3.32192809488736234786
 39 | 
 40 | static bf_context_t bf_ctx;
 41 | 
 42 | static void *my_bf_realloc(void *opaque, void *ptr, size_t size)
 43 | {
 44 |     return realloc(ptr, size);
 45 | }
 46 | 
 47 | static int64_t get_clock_msec(void)
 48 | {
 49 |     struct timeval tv;
 50 |     gettimeofday(&tv, NULL);
 51 |     return tv.tv_sec * 1000LL + (tv.tv_usec / 1000);
 52 | }
 53 | 
 54 | /* we print at least 3 significant digits with at most 5 chars, except
 55 |    if larger than 9999T. The value is rounded to zero. */
 56 | char *get_si_prefix(char *buf, int buf_size, uint64_t val)
 57 | {
 58 |     static const char suffixes[4] = "kMGT";
 59 |     uint64_t base;
 60 |     int i;
 61 | 
 62 |     if (val <= 999) {
 63 |         snprintf(buf, buf_size, "%" PRId64, val);
 64 |     } else {
 65 |         base = 1000;
 66 |         for(i=0;i<4;i++) {
 67 |             /* Note: we round to 0 */
 68 |             if (val < base * 10) {
 69 |                 snprintf(buf, buf_size, "%0.2f%c", 
 70 |                          floor((val * 100.0) / base) / 100.0,
 71 |                          suffixes[i]);
 72 |                 break;
 73 |             } else if (val < base * 100) {
 74 |                 snprintf(buf, buf_size, "%0.1f%c", 
 75 |                          floor((val * 10.0) / base) / 10.0,
 76 |                          suffixes[i]);
 77 |                 break;
 78 |             } else if (val < base * 1000 || (i == 3)) {
 79 |                 snprintf(buf, buf_size,
 80 |                          "%" PRId64 "%c", 
 81 |                          val / base,
 82 |                          suffixes[i]);
 83 |                 break;
 84 |             }
 85 |             base = base * 1000;
 86 |         }
 87 |     }
 88 |     return buf;
 89 | }
 90 | 
 91 | static uint64_t mp_random64(uint64_t *pseed)
 92 | {
 93 |     *pseed = *pseed * 6364136223846793005 + 1;
 94 |     return *pseed;
 95 | }
 96 | 
 97 | typedef enum {
 98 |     BF_OP_MUL,
 99 |     BF_OP_DIV,
100 |     BF_OP_SQRT,
101 | 
102 |     BF_OP_COUNT,
103 | } BFOPEnum;
104 | 
105 | const char *op_str[BF_OP_COUNT] = {
106 |     "mul",
107 |     "div",
108 |     "sqrt",
109 | };
110 | 
111 | static BFOPEnum get_op_from_str(const char *str)
112 | {
113 |     BFOPEnum op;
114 |     for(op = 0; op < BF_OP_COUNT; op++) {
115 |         if (!strcmp(str, op_str[op]))
116 |             break;
117 |         }
118 |     if (op == BF_OP_COUNT) {
119 |         fprintf(stderr, "Unknown operation: %s\n", str);
120 |         exit(1);
121 |     }
122 |     return op;
123 | }
124 | 
125 | #define K_STEPS 10
126 | 
127 | static void bf_op_speed(double k_start1, double k_end1,
128 |                         const char *filename, int log_scale, BFOPEnum op)
129 | {
130 |     int k, nb_its, it, dpl, fft_len_log2, nb_mods, k_end, k_start;
131 |     bf_t A, B, C;
132 |     limb_t n, i, prec;
133 |     int64_t start_time, ti, n_digits;
134 |     FILE *f;
135 |     double tpl, K;
136 |     char buf1[32], buf2[32];
137 |     uint64_t seed = 2;
138 |     
139 |     f = fopen(filename, "wb");
140 |     printf("%5s %5s %5s", "K", "BITS", "DIGIT");
141 |     if (op == BF_OP_MUL) {
142 |         printf(" %3s %3s %2s", "FFT", "DPL", "M");
143 |     }
144 |     printf(" %10s %10s\n", "ms", "ns/limb");
145 | 
146 |     k_start = lrint(k_start1 * K_STEPS);
147 |     k_end = lrint(k_end1 * K_STEPS);
148 |     for(k = k_start; k <= k_end; k++) {
149 |         K = (double)k / K_STEPS;
150 |         n_digits = (int64_t)ceil(pow(10.0, K));
151 |         n = (limb_t)ceil(n_digits * BITS_PER_DIGIT / LIMB_BITS);
152 |         prec = n * LIMB_BITS;
153 |         fft_len_log2 = bf_get_fft_size(&dpl, &nb_mods, 2 * n);
154 |         printf("%5.1f %5s %5s",
155 |                K,
156 |                get_si_prefix(buf1, sizeof(buf1), prec),
157 |                get_si_prefix(buf2, sizeof(buf2),
158 |                              (int64_t)ceil(prec / BITS_PER_DIGIT)));
159 |         if (op == BF_OP_MUL) {
160 |             printf(" %3d %3d %2d",
161 |                    fft_len_log2,
162 |                    dpl,
163 |                    nb_mods);
164 |         }
165 |         fflush(stdout);
166 |         bf_init(&bf_ctx, &A);
167 |         bf_init(&bf_ctx, &B);
168 |         bf_init(&bf_ctx, &C);
169 |         bf_resize(&A, n);
170 |         bf_resize(&B, n);
171 |         A.expn = n * LIMB_BITS;
172 |         B.expn = n * LIMB_BITS;
173 |         for(i = 0; i < n; i++) {
174 |             A.tab[i] = mp_random64(&seed);
175 |             B.tab[i] = mp_random64(&seed);
176 |         }
177 |         /* normalize */
178 |         A.tab[n - 1] |= (limb_t)1 << (LIMB_BITS - 1);
179 |         B.tab[n - 1] |= (limb_t)1 << (LIMB_BITS - 1);
180 | 
181 |         /* one multiplication to initialize the constants */
182 |         if (fft_len_log2 <= 20) {
183 |             bf_mul(&C, &A, &B, n, BF_RNDN);
184 |             bf_set_ui(&C, 0);
185 |         }
186 |         nb_its = 1;
187 |         for(;;) {
188 |             start_time = get_clock_msec();
189 |             switch(op) {
190 |             case BF_OP_MUL:
191 |                 for(it = 0; it < nb_its; it++) {
192 |                     bf_mul(&C, &A, &B, prec, BF_RNDN);
193 |                 }
194 |                 break;
195 |             case BF_OP_DIV:
196 |                 for(it = 0; it < nb_its; it++) {
197 |                     bf_div(&C, &A, &B, prec, BF_RNDF);
198 |                 }
199 |                 break;
200 |             case BF_OP_SQRT:
201 |                 for(it = 0; it < nb_its; it++) {
202 |                     bf_sqrt(&C, &A, prec, BF_RNDF);
203 |                 }
204 |                 break;
205 |             default:
206 |                 break;
207 |             }
208 |             ti = get_clock_msec() - start_time;
209 |             if (ti >= 100)
210 |                 break;
211 |             nb_its *= 2;
212 |         }
213 |         bf_delete(&A);
214 |         bf_delete(&B);
215 |         bf_delete(&C);
216 |         tpl = (double)ti / nb_its / n * 1e6;
217 |         printf(" %10.3f %10.1f\n",
218 |                (double)ti / nb_its,
219 |                tpl);
220 |         if (log_scale)
221 |             fprintf(f, "%f %f\n", K, tpl);
222 |         else
223 |             fprintf(f, "%" PRIu64 " %f\n", n_digits, tpl);
224 |         fflush(f);
225 |     }
226 |     fclose(f);
227 | }
228 | 
229 | #ifdef CONFIG_MPFR
230 | 
231 | static void mpfr_mul_speed(double k_start1, double k_end1,
232 |                            const char *filename)
233 | {
234 |     int k, nb_its, it, k_end, k_start;
235 |     mpfr_t A, B, C;
236 |     limb_t n, prec;
237 |     int64_t start_time, ti, n_digits;
238 |     FILE *f;
239 |     double tpl, K;
240 |     char buf1[32], buf2[32];
241 |     gmp_randstate_t rnd_state;
242 | 
243 |     gmp_randinit_mt(rnd_state);
244 |     f = fopen(filename, "wb");
245 |     printf("%5s %5s %5s %10s %10s\n", "K", "BITS", "DIGIT",
246 |            "ms", "ns/limb");
247 |     k_start = lrint(k_start1 * K_STEPS);
248 |     k_end = lrint(k_end1 * K_STEPS);
249 |     for(k = k_start; k <= k_end; k++) {
250 |         K = (double)k / K_STEPS;
251 |         n_digits = (int64_t)ceil(pow(10.0, K));
252 |         n = (limb_t)ceil(n_digits * BITS_PER_DIGIT / LIMB_BITS);
253 |         printf("%5.1f %5s %5s",
254 |                K,
255 |                get_si_prefix(buf1, sizeof(buf1), n * LIMB_BITS),
256 |                get_si_prefix(buf2, sizeof(buf2),
257 |                              (int64_t)ceil(n * LIMB_BITS / BITS_PER_DIGIT)));
258 |         fflush(stdout);
259 |         prec = n * LIMB_BITS;
260 |         mpfr_init2(A, prec);
261 |         mpfr_init2(B, prec);
262 |         mpfr_init2(C, prec);
263 |         mpfr_urandomb(A, rnd_state);
264 |         mpfr_urandomb(B, rnd_state);
265 |         nb_its = 1;
266 |         for(;;) {
267 |             start_time = get_clock_msec();
268 |             for(it = 0; it < nb_its; it++) {
269 |                 mpfr_mul(C, A, B, MPFR_RNDZ);
270 |             }
271 |             ti = get_clock_msec() - start_time;
272 |             if (ti >= 100)
273 |                 break;
274 |             nb_its *= 2;
275 |         }
276 |         mpfr_clear(A);
277 |         mpfr_clear(B);
278 |         mpfr_clear(C);
279 |         tpl = (double)ti / nb_its / n * 1e6;
280 |         printf(" %10.3f %10.1f\n",
281 |                (double)ti / nb_its,
282 |                tpl);
283 |         fprintf(f, "%" PRIu64 " %f\n", n_digits, tpl);
284 |         fflush(f);
285 |     }
286 |     fclose(f);
287 |     gmp_randclear(rnd_state);
288 | }
289 | 
290 | static void mpfr_bench(double k_start, double k_end,
291 |                        const char *output_filename)
292 | {
293 |     FILE *f;
294 |     const char *name;
295 |     
296 |     printf("LIBBF:\n");
297 |     bf_op_speed(k_start, k_end, "/tmp/bf_mul.txt", 0, BF_OP_MUL);
298 |     printf("MPFR:\n");
299 |     mpfr_mul_speed(k_start, k_end, "/tmp/mpfr_mul.txt");
300 | 
301 |     f = fopen("/tmp/gnuplot.cmd", "wb");
302 |     if (output_filename) {
303 |         fprintf(f, "set terminal png\n"
304 |                 "set output \"%s\"\n", 
305 |                 output_filename);
306 |     }
307 |     fprintf(f, "set xlabel \"Number of digits\"\n");
308 |     fprintf(f, "set ylabel \"ns/limb\"\n");
309 |     fprintf(f, "set logscale x 10\n");
310 |     fprintf(f, "plot ");
311 | #ifdef __AVX2__
312 |     name = "LIBBF(AVX2)";
313 | #else
314 |     name = "LIBBF";
315 | #endif
316 |     fprintf(f, "\"/tmp/bf_mul.txt\" with linespoints title \"%s\","
317 |             "\"/tmp/mpfr_mul.txt\" with linespoints title \"MPFR\"\n", name);
318 |     if (!output_filename) {
319 |         fprintf(f, "pause -1\n");
320 |     }
321 |     fclose(f);
322 | 
323 |     system("gnuplot /tmp/gnuplot.cmd");
324 | }
325 | 
326 | #endif /* CONFIG_MPFR */
327 | 
328 | int main(int argc, char **argv)
329 | {
330 |     const char *cmd;
331 |     
332 |     if (argc < 2) {
333 |         printf("usage: bftest cmd [arguments...]\n"
334 |                "cmd is:\n"
335 |                "[mul|div|sqrt] [k_start] [k_end] test function on numbers of 10^k digits\n"
336 | #ifdef CONFIG_MPFR
337 |                "mpfr_bench [k_start] [k_end] [png_file] benchmark with MPFR\n"
338 | #endif
339 |                );
340 |         exit(1);
341 |     }
342 |     bf_context_init(&bf_ctx, my_bf_realloc, NULL);
343 |     cmd = argv[1];
344 | #ifdef CONFIG_MPFR
345 |     if (!strcmp(cmd, "mpfr_bench")) {
346 |         double k_start, k_end;
347 |         const char *filename;
348 |         k_start = 4;
349 |         if (argc > 2)
350 |             k_start = strtod(argv[2], NULL);
351 |         k_end = k_start;
352 |         if (argc > 3)
353 |             k_end = strtod(argv[3], NULL);
354 |         filename = NULL;
355 |         if (argc > 4)
356 |             filename = argv[4];
357 |         mpfr_bench(k_start, k_end, filename);
358 |     } else
359 | #endif
360 |     {
361 |         double k_start, k_end;
362 |         BFOPEnum op;
363 |         op = get_op_from_str(cmd);
364 |         k_start = 4;
365 |         if (argc > 2)
366 |             k_start = strtod(argv[2], NULL);
367 |         k_end = k_start;
368 |         if (argc > 3)
369 |             k_end = strtod(argv[3], NULL);
370 |         bf_op_speed(k_start, k_end, "/tmp/plot.txt", 1, op);
371 |     }
372 |     return 0;
373 | }
374 | 


--------------------------------------------------------------------------------
/bftest.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * Tiny arbitrary precision floating point library tests
   3 |  * 
   4 |  * Copyright (c) 2017 Fabrice Bellard
   5 |  *
   6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 |  * of this software and associated documentation files (the "Software"), to deal
   8 |  * in the Software without restriction, including without limitation the rights
   9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 |  * copies of the Software, and to permit persons to whom the Software is
  11 |  * furnished to do so, subject to the following conditions:
  12 |  *
  13 |  * The above copyright notice and this permission notice shall be included in
  14 |  * all copies or substantial portions of the Software.
  15 |  *
  16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 |  * THE SOFTWARE.
  23 |  */
  24 | #include <stdlib.h>
  25 | #include <stdio.h>
  26 | #include <inttypes.h>
  27 | #include <string.h>
  28 | #include <assert.h>
  29 | #include <math.h>
  30 | #include <getopt.h>
  31 | #include <sys/time.h>
  32 | #include <gmp.h>
  33 | #include <mpfr.h>
  34 | 
  35 | #include "libbf.h"
  36 | #include "cutils.h"
  37 | #include "softfp.h"
  38 | #include "mpdecimal.h"
  39 | 
  40 | typedef enum {
  41 |     /* low level operations */
  42 |     BF_OP_MP_SQRTREM,
  43 |     BF_OP_MP_RECIP,
  44 | 
  45 |     /* binary floating point */
  46 |     BF_OP_MUL,
  47 |     BF_OP_ADD,
  48 |     BF_OP_SUB,
  49 |     BF_OP_RINT,
  50 |     BF_OP_ROUND,
  51 |     BF_OP_CMP_EQ,
  52 |     BF_OP_CMP_LT,
  53 |     BF_OP_CMP_LE,
  54 |     BF_OP_DIV,
  55 |     BF_OP_FMOD,
  56 |     BF_OP_REM,
  57 |     BF_OP_SQRT,
  58 |     BF_OP_OR,
  59 |     BF_OP_XOR,
  60 |     BF_OP_AND,
  61 |     BF_OP_CAN_ROUND,
  62 |     BF_OP_MUL_L2RADIX,
  63 |     BF_OP_DIV_L2RADIX,
  64 |     BF_OP_ATOF,
  65 |     BF_OP_FTOA,
  66 |     BF_OP_EXP,
  67 |     BF_OP_LOG,
  68 |     BF_OP_COS,
  69 |     BF_OP_SIN,
  70 |     BF_OP_TAN,
  71 |     BF_OP_ATAN,
  72 |     BF_OP_ATAN2,
  73 |     BF_OP_ASIN,
  74 |     BF_OP_ACOS,
  75 |     BF_OP_POW,
  76 | 
  77 |     /* decimal floating point */
  78 |     BF_OP_ADD_DEC,
  79 |     BF_OP_MUL_DEC,
  80 |     BF_OP_DIV_DEC,
  81 |     BF_OP_SQRT_DEC,
  82 |     BF_OP_FMOD_DEC,
  83 |     BF_OP_DIVREM_DEC,
  84 |     BF_OP_RINT_DEC,
  85 | 
  86 |     BF_OP_COUNT,
  87 | } MPFTestOPEnum;
  88 | 
  89 | const char *op_str[BF_OP_COUNT] = {
  90 |     "mp_sqrtrem",
  91 |     "mp_recip",
  92 |     "mul",
  93 |     "add",
  94 |     "sub",
  95 |     "rint",
  96 |     "round",
  97 |     "cmp_eq",
  98 |     "cmp_lt",
  99 |     "cmp_le",
 100 |     "div",
 101 |     "fmod",
 102 |     "rem",
 103 |     "sqrt",
 104 |     "or",
 105 |     "xor",
 106 |     "and",
 107 |     "can_round",
 108 |     "mul_l2radix",
 109 |     "div_l2radix",
 110 |     "atof",
 111 |     "ftoa",
 112 |     "exp",
 113 |     "log",
 114 |     "cos",
 115 |     "sin",
 116 |     "tan",
 117 |     "atan",
 118 |     "atan2",
 119 |     "asin",
 120 |     "acos",
 121 |     "pow",
 122 | 
 123 |     "add_dec",
 124 |     "mul_dec",
 125 |     "div_dec",
 126 |     "sqrt_dec",
 127 |     "fmod_dec",
 128 |     "divrem_dec",
 129 |     "rint_dec",
 130 | };
 131 | 
 132 | const char *rnd_str[7] = {
 133 |     "N",
 134 |     "Z",
 135 |     "D",
 136 |     "U",
 137 |     "NA",
 138 |     "A",
 139 |     "F",
 140 | };
 141 | 
 142 | #define SPECIAL_COUNT 7
 143 | 
 144 | static bf_context_t bf_ctx;
 145 | 
 146 | static void *my_bf_realloc(void *opaque, void *ptr, size_t size)
 147 | {
 148 |     return realloc(ptr, size);
 149 | }
 150 | 
 151 | int mp_cmp(const limb_t *taba, size_t na, const limb_t *tabb, size_t nb)
 152 | {
 153 |     slimb_t n, i;
 154 |     limb_t a, b;
 155 |     
 156 |     n = na;
 157 |     if (nb > n)
 158 |         n = nb;
 159 |     for(i = n - 1; i >= 0; i--) {
 160 |         if (i < na)
 161 |             a = taba[i];
 162 |         else
 163 |             a = 0;
 164 |         if (i < nb)
 165 |             b = tabb[i];
 166 |         else
 167 |             b = 0;
 168 |         if (a != b) {
 169 |             if (a < b)
 170 |                 return -1;
 171 |             else
 172 |                 return 1;
 173 |         }
 174 |     }
 175 |     return 0;
 176 | }
 177 | 
 178 | static void set_special(bf_t *a, int idx)
 179 | {
 180 |     switch(idx) {
 181 |     case 0:
 182 |         bf_set_zero(a, 0);
 183 |         break;
 184 |     case 1:
 185 |         bf_set_zero(a, 1); /* -0 */
 186 |         break;
 187 |     case 2:
 188 |         bf_set_inf(a, 0);
 189 |         break;
 190 |     case 3:
 191 |         bf_set_inf(a, 1);
 192 |         break;
 193 |     case 4:
 194 |         bf_set_si(a, 1);
 195 |         break;
 196 |     case 5:
 197 |         bf_set_si(a, -1);
 198 |         break;
 199 |     case 6:
 200 |         bf_set_nan(a);
 201 |         break;
 202 |     default:
 203 |         abort();
 204 |     }
 205 | }
 206 | 
 207 | static void set_special_dec(bfdec_t *a, int idx)
 208 | {
 209 |     switch(idx) {
 210 |     case 0:
 211 |         bfdec_set_zero(a, 0);
 212 |         break;
 213 |     case 1:
 214 |         bfdec_set_zero(a, 1); /* -0 */
 215 |         break;
 216 |     case 2:
 217 |         bfdec_set_inf(a, 0);
 218 |         break;
 219 |     case 3:
 220 |         bfdec_set_inf(a, 1);
 221 |         break;
 222 |     case 4:
 223 |         bfdec_set_si(a, 1);
 224 |         break;
 225 |     case 5:
 226 |         bfdec_set_si(a, -1);
 227 |         break;
 228 |     case 6:
 229 |         bfdec_set_nan(a);
 230 |         break;
 231 |     default:
 232 |         abort();
 233 |     }
 234 | }
 235 | 
 236 | typedef struct mp_randstate_t {
 237 |     uint64_t val;
 238 | } mp_randstate_t;
 239 | 
 240 | void mp_randinit(mp_randstate_t *state, uint64_t seed)
 241 | {
 242 |     state->val = seed;
 243 | }
 244 | 
 245 | static inline uint64_t mp_random64(mp_randstate_t *s)
 246 | {
 247 |     s->val = s->val * 6364136223846793005 + 1;
 248 |     /* avoid bad modulo properties 
 249 |        XXX: use mersenne twistter generator */
 250 |     return (s->val << 32) | (s->val >> 32);
 251 | }
 252 | 
 253 | /* random number between 0 and 1 with large sequences of identical bits */
 254 | static void mp_rrandom(limb_t *tab, limb_t prec, mp_randstate_t *state)
 255 | {
 256 |     slimb_t n, max_run_len, cur_len, j, len, bit_index, nb_bits;
 257 |     int cur_state, m;
 258 |     
 259 |     n = (prec + LIMB_BITS - 1) / LIMB_BITS;
 260 |     /* same idea as GMP. It would be probably better to use a non
 261 |        uniform law */
 262 |     m = mp_random64(state) % 4 + 1;
 263 |     max_run_len = bf_max(prec / m, 1);
 264 |     cur_state = mp_random64(state) & 1;
 265 |     cur_len = mp_random64(state) % max_run_len + 1;
 266 |     nb_bits = n * LIMB_BITS;
 267 |     
 268 |     memset(tab, 0, sizeof(limb_t) * n);
 269 |     bit_index = nb_bits - prec;
 270 |     while (bit_index < nb_bits) {
 271 |         len = bf_min(cur_len, nb_bits - bit_index);
 272 |         if (cur_state) {
 273 |             /* XXX: inefficient */
 274 |             for(j = 0; j < len; j++) {
 275 |                 tab[bit_index >> LIMB_LOG2_BITS] |= (limb_t)1 << (bit_index & (LIMB_BITS - 1));
 276 |                 bit_index++;
 277 |             }
 278 |         }
 279 |         bit_index += len;
 280 |         cur_len -= len;
 281 |         if (cur_len == 0) {
 282 |             cur_len = mp_random64(state) % max_run_len + 1;
 283 |             cur_state ^= 1;
 284 |         }
 285 |     }
 286 | }
 287 | 
 288 | static void bf_rrandom(bf_t *a, limb_t prec, mp_randstate_t *state)
 289 | {
 290 |     slimb_t n;
 291 |     
 292 |     n = (prec + LIMB_BITS - 1) / LIMB_BITS;
 293 |     bf_resize(a, n);
 294 |     mp_rrandom(a->tab, prec, state);
 295 |     a->sign = 0;
 296 |     a->expn = 0;
 297 |     bf_normalize_and_round(a, prec, BF_RNDZ);
 298 | }
 299 | 
 300 | static void bf_rrandom_large(bf_t *a, limb_t prec, mp_randstate_t *s)
 301 | {
 302 |     limb_t prec1;
 303 |     prec1 = mp_random64(s) % (2 * prec) + 1;
 304 |     bf_rrandom(a, prec1, s);
 305 |     a->sign = mp_random64(s) & 1;
 306 | }
 307 | 
 308 | /* random number between 0 and 1 with large sequences zeros, nines or
 309 |    random digits */
 310 | static void bfdec_rrandom(bfdec_t *a, limb_t prec, mp_randstate_t *state)
 311 | {
 312 |     slimb_t n, max_run_len, cur_len, j, len, digit_index, nb_digits;
 313 |     int cur_state, m;
 314 |     
 315 |     n = (prec + LIMB_DIGITS - 1) / LIMB_DIGITS;
 316 |     bfdec_resize(a, n);
 317 |     
 318 |     /* same idea as GMP. It would be probably better to use a non
 319 |        uniform law */
 320 |     m = mp_random64(state) % 4 + 1;
 321 |     max_run_len = bf_max(prec / m, 1);
 322 |     cur_state = mp_random64(state) % 3;
 323 |     cur_len = mp_random64(state) % max_run_len + 1;
 324 |     nb_digits = n * LIMB_DIGITS;
 325 |     
 326 |     memset(a->tab, 0, sizeof(limb_t) * n);
 327 |     digit_index = nb_digits - prec;
 328 |     while (digit_index < nb_digits) {
 329 |         len = bf_min(cur_len, nb_digits - digit_index);
 330 |         switch(cur_state) {
 331 |         case 0:
 332 |              /* zeros */
 333 |             break;
 334 |         case 1:
 335 |             /* nines */
 336 |             for(j = 0; j < len; j++) {
 337 |                 a->tab[digit_index / LIMB_DIGITS] +=
 338 |                     9 * mp_pow_dec[digit_index % LIMB_DIGITS];
 339 |                 digit_index++;
 340 |             }
 341 |             break;
 342 |         case 2:
 343 |             /* random */
 344 |             for(j = 0; j < len; j++) {
 345 |                 a->tab[digit_index / LIMB_DIGITS] +=
 346 |                     (mp_random64(state) % 10) *
 347 |                     mp_pow_dec[digit_index % LIMB_DIGITS];
 348 |                 digit_index++;
 349 |             }
 350 |             break;
 351 |         }
 352 |         digit_index += len;
 353 |         cur_len -= len;
 354 |         if (cur_len == 0) {
 355 |             cur_len = mp_random64(state) % max_run_len + 1;
 356 |             cur_state ^= 1;
 357 |         }
 358 |     }
 359 |     a->sign = 0;
 360 |     a->expn = 0;
 361 |     bfdec_normalize_and_round(a, prec, BF_RNDZ);
 362 | }
 363 | 
 364 | static void bfdec_rrandom_large(bfdec_t *a, limb_t prec, mp_randstate_t *s)
 365 | {
 366 |     limb_t prec1;
 367 |     
 368 |     prec1 = mp_random64(s) % (2 * prec) + 1;
 369 |     bfdec_rrandom(a, prec1, s);
 370 |     a->sign = mp_random64(s) & 1;
 371 | }
 372 | 
 373 | /* random integer with 0 to prec bits */
 374 | static void bf_rrandom_int(bf_t *a, limb_t prec, mp_randstate_t *rnd_state)
 375 | {
 376 |     limb_t prec1;
 377 |     prec1 = mp_random64(rnd_state) % prec + 1;
 378 |     bf_rrandom(a, prec1, rnd_state);
 379 |     if (a->expn != BF_EXP_ZERO)
 380 |         a->expn += prec1;
 381 |     a->sign = mp_random64(rnd_state) & 1;
 382 | }
 383 | 
 384 | /* random integer with long sequences of '0' and '1' */
 385 | uint64_t rrandom_u(int len, mp_randstate_t *s)
 386 | {
 387 |     int bit, pos, n, end;
 388 |     uint64_t a;
 389 |     
 390 |     bit = mp_random64(s) & 1;
 391 |     pos = 0;
 392 |     a = 0;
 393 |     for(;;) {
 394 |         n = (mp_random64(s) % len) + 1;
 395 |         end = pos + n;
 396 |         if (end > len)
 397 |             end = len;
 398 |         if (bit) {
 399 |             n = end - pos;
 400 |             a |= ((uint64_t)(1 << n) - 1) << pos;
 401 |         }
 402 |         if (end >= len)
 403 |             break;
 404 |         pos = end;
 405 |         bit ^= 1;
 406 |     }
 407 |     return a;
 408 | }
 409 | 
 410 | #define F64_MANT_SIZE 52
 411 | #define F64_EXP_MASK ((1 << 11) - 1)
 412 | 
 413 | uint64_t rrandom_sf64(mp_randstate_t *s)
 414 | {
 415 |     uint32_t a_exp, a_sign;
 416 |     uint64_t a_mant;
 417 |     a_sign = mp_random64(s) & 1;
 418 | 
 419 |     /* generate exponent close to the min/max more often than random */
 420 |     switch(mp_random64(s) & 15) {
 421 |     case 0:
 422 |         a_exp = (mp_random64(s) % (2 * F64_MANT_SIZE)) & F64_EXP_MASK;
 423 |         break;
 424 |     case 1:
 425 |         a_exp = (F64_EXP_MASK - (mp_random64(s) % (2 * F64_MANT_SIZE))) & F64_EXP_MASK;
 426 |         break;
 427 |     default:
 428 |         a_exp = mp_random64(s) & F64_EXP_MASK;
 429 |         break;
 430 |     }
 431 |     a_mant = rrandom_u(F64_MANT_SIZE, s);
 432 |     return ((uint64_t)a_sign << 63) | ((uint64_t)a_exp << F64_MANT_SIZE) | a_mant;
 433 | }
 434 | 
 435 | static int64_t get_clock_msec(void)
 436 | {
 437 |     struct timeval tv;
 438 |     gettimeofday(&tv, NULL);
 439 |     return tv.tv_sec * 1000LL + (tv.tv_usec / 1000);
 440 | }
 441 | 
 442 | static inline uint64_t get_cycles(void)
 443 | {
 444 |     uint32_t low,high;
 445 |     uint64_t val;
 446 |     asm volatile("rdtsc" : "=a" (low), "=d" (high));
 447 |     val = high;
 448 |     val <<= 32;
 449 |     val |= low;
 450 |     return val;
 451 | }
 452 | 
 453 | static mpfr_rnd_t mpfr_get_rnd_mode(bf_rnd_t rnd_mode)
 454 | {
 455 |     const mpfr_rnd_t rnd_mode_tab[] = {
 456 |         MPFR_RNDN,
 457 |         MPFR_RNDZ,
 458 |         MPFR_RNDD,
 459 |         MPFR_RNDU,
 460 |         MPFR_RNDNA,
 461 |         MPFR_RNDA,
 462 |     };
 463 |     return rnd_mode_tab[rnd_mode];
 464 | }
 465 | 
 466 | static void mpfr_to_bf(bf_t *r1, mpfr_t r)
 467 | {
 468 |     char *str;
 469 |     mpfr_asprintf(&str, "%Ra", r);
 470 |     //    printf("mpfr r=%s\n", str);
 471 |     assert(bf_atof(r1, str, NULL, 16, BF_PREC_INF, BF_RNDZ) == 0);
 472 |     mpfr_free_str(str);
 473 | }
 474 | 
 475 | static void bf_to_mpfr(mpfr_t a, const bf_t *a1)
 476 | {
 477 |     char *str;
 478 |     //    bf_print_str("a", a1);
 479 |     str = bf_ftoa(NULL, a1, 16, BF_PREC_INF, BF_RNDZ | BF_FTOA_FORMAT_FREE |
 480 |                   BF_FTOA_ADD_PREFIX);
 481 |     //    printf("mpfr a=%s\n", str);
 482 |     mpfr_set_str(a, str, 0, MPFR_RNDZ);
 483 |     free(str);
 484 | }
 485 | 
 486 | void mpfr_exec_init(void)
 487 | {
 488 |     slimb_t e_max, e_min;
 489 |     e_max = (limb_t)1 << (BF_EXP_BITS_MAX - 1);
 490 |     e_min = -e_max + 3;
 491 |     mpfr_set_emin(e_min);
 492 |     mpfr_set_emax(e_max);
 493 | }
 494 | 
 495 | int mpfr_exec_op(MPFTestOPEnum op, bf_t *r1, bf_t *a1, bf_t *b1,
 496 |                  int64_t prec, int rnd_mode1, int64_t *pcycles)
 497 | {
 498 |     mpfr_t a, b, r;
 499 |     mpfr_rnd_t rnd_mode;
 500 |     int ret, mpfr_ret;
 501 |     
 502 |     mpfr_init2(a, bf_max(a1->len, 1) * LIMB_BITS);
 503 |     mpfr_init2(b, bf_max(b1->len, 1) * LIMB_BITS);
 504 |     if (op == BF_OP_RINT) {
 505 |         /* infinite precision for rint */
 506 |         mpfr_init2(r, bf_max(a1->len, 1) * LIMB_BITS);
 507 |     } else {
 508 |         mpfr_init2(r, prec);
 509 |     }
 510 | 
 511 |     bf_to_mpfr(a, a1);
 512 |     bf_to_mpfr(b, b1);
 513 |     
 514 |     rnd_mode = mpfr_get_rnd_mode(rnd_mode1);
 515 | 
 516 |     ret = 0;
 517 |     mpfr_ret = 0;
 518 |     *pcycles -= get_cycles();
 519 |     switch(op) {
 520 |     case BF_OP_MUL:
 521 |         mpfr_ret = mpfr_mul(r, a, b, rnd_mode);
 522 |         break;
 523 |     case BF_OP_ADD:
 524 |         mpfr_ret = mpfr_add(r, a, b, rnd_mode);
 525 |         break;
 526 |     case BF_OP_SUB:
 527 |         mpfr_ret = mpfr_sub(r, a, b, rnd_mode);
 528 |         break;
 529 |     case BF_OP_RINT:
 530 |         mpfr_ret = mpfr_rint(r, a, rnd_mode);
 531 |         break;
 532 |     case BF_OP_ROUND:
 533 |         mpfr_ret = mpfr_set(r, a, rnd_mode);
 534 |         break;
 535 |     case BF_OP_CMP_EQ:
 536 |         ret = mpfr_equal_p(a, b);
 537 |         break;
 538 |     case BF_OP_CMP_LT:
 539 |         ret = mpfr_less_p(a, b);
 540 |         break;
 541 |     case BF_OP_CMP_LE:
 542 |         ret = mpfr_lessequal_p(a, b);
 543 |         break;
 544 |     case BF_OP_DIV:
 545 |         mpfr_ret = mpfr_div(r, a, b, rnd_mode);
 546 |         break;
 547 |     case BF_OP_FMOD:
 548 |         mpfr_ret = mpfr_fmod(r, a, b, rnd_mode);
 549 |         break;
 550 |     case BF_OP_REM:
 551 |         mpfr_ret = mpfr_remainder(r, a, b, rnd_mode);
 552 |         break;
 553 |     case BF_OP_SQRT:
 554 |         mpfr_ret = mpfr_sqrt(r, a, rnd_mode);
 555 |         break;
 556 |     case BF_OP_OR:
 557 |     case BF_OP_XOR:
 558 |     case BF_OP_AND:
 559 |         {
 560 |             mpz_t ai, bi;
 561 | 
 562 |             mpz_init(ai);
 563 |             mpz_init(bi);
 564 |             mpfr_get_z(ai, a, MPFR_RNDZ);
 565 |             mpfr_get_z(bi, b, MPFR_RNDZ);
 566 |             switch(op) {
 567 |             case BF_OP_OR:
 568 |                 mpz_ior(ai, ai, bi);
 569 |                 break;
 570 |             case BF_OP_XOR:
 571 |                 mpz_xor(ai, ai, bi);
 572 |                 break;
 573 |             case BF_OP_AND:
 574 |                 mpz_and(ai, ai, bi);
 575 |                 break;
 576 |             default:
 577 |                 break;
 578 |             }
 579 |             mpfr_set_z(r, ai, MPFR_RNDZ);
 580 |             mpz_clear(ai);
 581 |             mpz_clear(bi);
 582 |         }
 583 |         break;
 584 |     case BF_OP_EXP:
 585 |         mpfr_ret = mpfr_exp(r, a, rnd_mode);
 586 |         break;
 587 |     case BF_OP_LOG:
 588 |         mpfr_ret = mpfr_log(r, a, rnd_mode);
 589 |         break;
 590 |     case BF_OP_COS:
 591 |         mpfr_ret = mpfr_cos(r, a, rnd_mode);
 592 |         break;
 593 |     case BF_OP_SIN:
 594 |         mpfr_ret = mpfr_sin(r, a, rnd_mode);
 595 |         break;
 596 |     case BF_OP_TAN:
 597 |         mpfr_ret = mpfr_tan(r, a, rnd_mode);
 598 |         break;
 599 |     case BF_OP_ATAN:
 600 |         mpfr_ret = mpfr_atan(r, a, rnd_mode);
 601 |         break;
 602 |     case BF_OP_ATAN2:
 603 |         mpfr_ret = mpfr_atan2(r, a, b, rnd_mode);
 604 |         break;
 605 |     case BF_OP_ASIN:
 606 |         mpfr_ret = mpfr_asin(r, a, rnd_mode);
 607 |         break;
 608 |     case BF_OP_ACOS:
 609 |         mpfr_ret = mpfr_acos(r, a, rnd_mode);
 610 |         break;
 611 |     case BF_OP_POW:
 612 |         mpfr_ret = mpfr_pow(r, a, b, rnd_mode);
 613 |         break;
 614 |     default:
 615 |         abort();
 616 |     }
 617 |     *pcycles += get_cycles();
 618 |     if (mpfr_ret != 0)
 619 |         ret |= BF_ST_INEXACT;
 620 |     mpfr_to_bf(r1, r);
 621 |     mpfr_clear(a);
 622 |     mpfr_clear(b);
 623 |     mpfr_clear(r);
 624 |     return ret;
 625 | }
 626 | 
 627 | int mpfr_exec_setstr(bf_t *r, const char *str, int radix,
 628 |                      int64_t prec, int rnd_mode)
 629 | {
 630 |     mpfr_t r1;
 631 |     int mpfr_ret, ret;
 632 |     mpfr_init2(r1, prec);
 633 |     mpfr_ret = mpfr_strtofr(r1, str, NULL, radix, mpfr_get_rnd_mode(rnd_mode));
 634 |     ret = 0;
 635 |     if (mpfr_ret != 0)
 636 |         ret |= BF_ST_INEXACT;
 637 |     mpfr_to_bf(r, r1);
 638 |     mpfr_clear(r1);
 639 |     return ret;
 640 | }
 641 | 
 642 | static int softfp_get_rnd_mode(bf_rnd_t rnd_mode)
 643 | {
 644 |     switch(rnd_mode) {
 645 |     case BF_RNDN:
 646 |         return RM_RNE;
 647 |     case BF_RNDZ:
 648 |         return RM_RTZ;
 649 |     case BF_RNDU:
 650 |         return RM_RUP;
 651 |     case BF_RNDD:
 652 |         return RM_RDN;
 653 |     case BF_RNDNA:
 654 |         return RM_RMM;
 655 |     default:
 656 |         abort();
 657 |     }
 658 | }
 659 | 
 660 | static int softfp_set_status(uint32_t fflags)
 661 | {
 662 |     int ret = 0;
 663 |     if (fflags & FFLAG_INVALID_OP)
 664 |         ret |= BF_ST_INVALID_OP;
 665 |     if (fflags & FFLAG_DIVIDE_ZERO)
 666 |         ret |= BF_ST_DIVIDE_ZERO;
 667 |     if (fflags & FFLAG_OVERFLOW)
 668 |         ret |= BF_ST_OVERFLOW;
 669 |     if (fflags & FFLAG_UNDERFLOW)
 670 |         ret |= BF_ST_UNDERFLOW;
 671 |     if (fflags & FFLAG_INEXACT)
 672 |         ret |= BF_ST_INEXACT;
 673 |     return ret;
 674 | }
 675 | 
 676 | typedef union {
 677 |     double d;
 678 |     sfloat64 u;
 679 | } Float64Union;
 680 | 
 681 | int softfp_exec_op(MPFTestOPEnum op, bf_t *r1, bf_t *a1, bf_t *b1,
 682 |                    limb_t prec, bf_rnd_t rnd_mode, int64_t *pcycles)
 683 | {
 684 |     sfloat64 r, a, b;
 685 |     int ret = 0;
 686 |     uint32_t fflags, rm;
 687 |     Float64Union u;
 688 |     
 689 |     *pcycles -= get_cycles();
 690 |     /* Note: the inputs must already be float64 */
 691 |     bf_get_float64(a1, &u.d, BF_RNDZ);
 692 |     //    printf("ad=%a\n", u.d);
 693 |     a = u.u;
 694 |     /* Note: the inputs must already be float64 */
 695 |     bf_get_float64(b1, &u.d, BF_RNDZ);
 696 |     //    printf("bd=%a\n", u.d);
 697 |     b = u.u;
 698 |     
 699 |     rm = softfp_get_rnd_mode(rnd_mode);
 700 |     fflags = 0;
 701 |     switch(op) {
 702 |     case BF_OP_MUL:
 703 |         r = mul_sf64(a, b, rm, &fflags);
 704 |         ret = softfp_set_status(fflags);
 705 |         break;
 706 |     case BF_OP_ADD:
 707 |         r = add_sf64(a, b, rm, &fflags);
 708 |         ret = softfp_set_status(fflags);
 709 |         break;
 710 |     case BF_OP_SUB:
 711 |         r = sub_sf64(a, b, rm, &fflags);
 712 |         ret = softfp_set_status(fflags);
 713 |         break;
 714 |     case BF_OP_CMP_EQ:
 715 |         r = 0;
 716 |         ret = eq_quiet_sf64(a, b, &fflags);
 717 |         break;
 718 |     case BF_OP_CMP_LT:
 719 |         r = 0;
 720 |         ret = lt_sf64(a, b, &fflags);
 721 |         break;
 722 |     case BF_OP_CMP_LE:
 723 |         r = 0;
 724 |         ret = le_sf64(a, b, &fflags);
 725 |         break;
 726 |     case BF_OP_DIV:
 727 |         r = div_sf64(a, b, rm, &fflags);
 728 |         ret = softfp_set_status(fflags);
 729 |         break;
 730 |     case BF_OP_SQRT:
 731 |         r = sqrt_sf64(a, rm, &fflags);
 732 |         ret = softfp_set_status(fflags);
 733 |         break;
 734 |         //    case BF_OP_RINT:
 735 |         //    case BF_OP_OR:
 736 |         //    case BF_OP_XOR:
 737 |         //    case BF_OP_AND:
 738 |     default:
 739 |         abort();
 740 |     }
 741 |     /* Note: the inputs must already be float64 */
 742 |     u.u = r;
 743 |     //    printf("rd=%a\n", u.d);
 744 |     bf_set_float64(r1, u.d);
 745 |     *pcycles += get_cycles();
 746 |     return ret;
 747 | }
 748 | 
 749 | mpd_context_t mpd_ctx;
 750 | 
 751 | static void bfdec_to_mpd(mpd_t *a1, const bfdec_t *a)
 752 | {
 753 |     char *a_str;
 754 |     a_str = bfdec_ftoa(NULL, a, BF_PREC_INF, BF_RNDZ | BF_FTOA_FORMAT_FREE);
 755 |     //    printf("a_str=%s\n", a_str);
 756 |     mpd_qsetprec(&mpd_ctx, a->len * LIMB_DIGITS);
 757 |     mpd_set_string(a1, a_str, &mpd_ctx);
 758 |     free(a_str);
 759 | }
 760 | 
 761 | static void mpd_to_bfdec(bfdec_t *r, const mpd_t *r1)
 762 | {
 763 |     char *r1_str;
 764 |     r1_str = mpd_to_sci(r1, 0);
 765 |     //    printf("r1_str=%s\n", r1_str);
 766 |     bfdec_atof(r, r1_str, NULL, BF_PREC_INF, BF_RNDZ);
 767 |     //    bfdec_print_str("ref", r);
 768 |     free(r1_str);
 769 | }
 770 | 
 771 | int mpdecimal_exec_op(MPFTestOPEnum op, bfdec_t *r, bfdec_t *a, bfdec_t *b,
 772 |                       limb_t prec, bf_rnd_t rnd_mode, int64_t *pcycles)
 773 | {
 774 |     mpd_t *a1, *b1, *r1;
 775 |     uint32_t status;
 776 |     int ret;
 777 |     
 778 |     a1 = mpd_new(&mpd_ctx);
 779 |     b1 = mpd_new(&mpd_ctx);
 780 |     r1 = mpd_new(&mpd_ctx);
 781 |     
 782 |     bfdec_to_mpd(a1, a);
 783 |     bfdec_to_mpd(b1, b);
 784 |     
 785 |     mpd_qsetprec(&mpd_ctx, prec);
 786 |     
 787 |     //    printf("rnd_mode1=%d\n", rnd_mode);
 788 |     switch(rnd_mode) {
 789 |     case BF_RNDN:
 790 |         mpd_qsetround(&mpd_ctx, MPD_ROUND_HALF_EVEN);
 791 |         break;
 792 |     case BF_RNDZ:
 793 |         mpd_qsetround(&mpd_ctx, MPD_ROUND_DOWN);
 794 |         break;
 795 |     case BF_RNDU:
 796 |         mpd_qsetround(&mpd_ctx, MPD_ROUND_CEILING);
 797 |         break;
 798 |     case BF_RNDD:
 799 |         mpd_qsetround(&mpd_ctx, MPD_ROUND_FLOOR);
 800 |         break;
 801 |     case BF_RNDNA:
 802 |         mpd_qsetround(&mpd_ctx, MPD_ROUND_HALF_UP);
 803 |         break;
 804 |     case BF_RNDA:
 805 |         mpd_qsetround(&mpd_ctx, MPD_ROUND_UP);
 806 |         break;
 807 |     default:
 808 |         abort();
 809 |     }
 810 | 
 811 |     *pcycles -= get_cycles();
 812 | 
 813 |     status = 0;
 814 |     switch(op) {
 815 |     case BF_OP_ADD_DEC:
 816 |         mpd_qadd(r1, a1, b1, &mpd_ctx, &status);
 817 |         break;
 818 |     case BF_OP_MUL_DEC:
 819 |         mpd_qmul(r1, a1, b1, &mpd_ctx, &status);
 820 |         break;
 821 |     case BF_OP_DIV_DEC:
 822 |         mpd_qdiv(r1, a1, b1, &mpd_ctx, &status);
 823 |         break;
 824 |     case BF_OP_SQRT_DEC:
 825 |         mpd_qsqrt(r1, a1, &mpd_ctx, &status);
 826 |         break;
 827 |     case BF_OP_FMOD_DEC:
 828 |         mpd_qrem(r1, a1, b1, &mpd_ctx, &status);
 829 |         break;
 830 |     case BF_OP_RINT_DEC:
 831 |         mpd_qround_to_intx(r1, a1, &mpd_ctx, &status);
 832 |         break;
 833 |     default:
 834 |         abort();
 835 |     }
 836 | 
 837 |     *pcycles += get_cycles();
 838 | 
 839 |     ret = 0;
 840 |     if (status & MPD_Inexact)
 841 |         ret |= BF_ST_INEXACT;
 842 |     if (status & MPD_Overflow)
 843 |         ret |= BF_ST_OVERFLOW;
 844 |     if (status & MPD_Underflow)
 845 |         ret |= BF_ST_UNDERFLOW;
 846 |     if (status & MPD_Invalid_operation)
 847 |         ret |= BF_ST_INVALID_OP;
 848 | 
 849 |     mpd_to_bfdec(r, r1);
 850 | 
 851 |     mpd_del(a1);
 852 |     mpd_del(b1);
 853 |     mpd_del(r1);
 854 |     
 855 |     return ret;
 856 | }
 857 | 
 858 | 
 859 | int bf_exec_op(MPFTestOPEnum op, bf_t *r, bf_t *a, bf_t *b,
 860 |                limb_t prec, bf_flags_t flags, int64_t *pcycles)
 861 | {
 862 |     int ret = 0;
 863 | 
 864 |     *pcycles -= get_cycles();
 865 |     switch(op) {
 866 |     case BF_OP_MUL:
 867 |         ret = bf_mul(r, a, b, prec, flags);
 868 |         break;
 869 |     case BF_OP_ADD:
 870 |         ret = bf_add(r, a, b, prec, flags);
 871 |         break;
 872 |     case BF_OP_SUB:
 873 |         ret = bf_sub(r, a, b, prec, flags);
 874 |         break;
 875 |     case BF_OP_RINT:
 876 |         bf_set(r, a);
 877 |         ret = bf_rint(r, flags);
 878 |         break;
 879 |     case BF_OP_ROUND:
 880 |         bf_set(r, a);
 881 |         ret = bf_round(r, prec, flags);
 882 |         break;
 883 |     case BF_OP_CMP_EQ:
 884 |         ret = bf_cmp_eq(a, b);
 885 |         break;
 886 |     case BF_OP_CMP_LT:
 887 |         ret = bf_cmp_lt(a, b);
 888 |         break;
 889 |     case BF_OP_CMP_LE:
 890 |         ret = bf_cmp_le(a, b);
 891 |         break;
 892 |     case BF_OP_DIV:
 893 |         ret = bf_div(r, a, b, prec, flags);
 894 |         break;
 895 |     case BF_OP_FMOD:
 896 |         ret = bf_rem(r, a, b, prec, flags, BF_RNDZ);
 897 |         break;
 898 |     case BF_OP_REM:
 899 |         ret = bf_rem(r, a, b, prec, flags, BF_RNDN);
 900 |         break;
 901 |     case BF_OP_SQRT:
 902 |         ret = bf_sqrt(r, a, prec, flags);
 903 |         break;
 904 |     case BF_OP_OR:
 905 |         bf_logic_or(r, a, b);
 906 |         break;
 907 |     case BF_OP_XOR:
 908 |         bf_logic_xor(r, a, b);
 909 |         break;
 910 |     case BF_OP_AND:
 911 |         bf_logic_and(r, a, b);
 912 |         break;
 913 |     case BF_OP_EXP:
 914 |         ret = bf_exp(r, a, prec, flags);
 915 |         break;
 916 |     case BF_OP_LOG:
 917 |         ret = bf_log(r, a, prec, flags);
 918 |         break;
 919 |     case BF_OP_COS:
 920 |         ret = bf_cos(r, a, prec, flags);
 921 |         break;
 922 |     case BF_OP_SIN:
 923 |         ret = bf_sin(r, a, prec, flags);
 924 |         break;
 925 |     case BF_OP_TAN:
 926 |         ret = bf_tan(r, a, prec, flags);
 927 |         break;
 928 |     case BF_OP_ATAN:
 929 |         ret = bf_atan(r, a, prec, flags);
 930 |         break;
 931 |     case BF_OP_ATAN2:
 932 |         ret = bf_atan2(r, a, b, prec, flags);
 933 |         break;
 934 |     case BF_OP_ASIN:
 935 |         ret = bf_asin(r, a, prec, flags);
 936 |         break;
 937 |     case BF_OP_ACOS:
 938 |         ret = bf_acos(r, a, prec, flags);
 939 |         break;
 940 |     case BF_OP_POW:
 941 |         ret = bf_pow(r, a, b, prec, flags);
 942 |         break;
 943 |     default:
 944 |         abort();
 945 |     }
 946 |     *pcycles += get_cycles();
 947 |     return ret;
 948 | }
 949 | 
 950 | int bfdec_exec_op(MPFTestOPEnum op, bfdec_t *r,
 951 |                   const bfdec_t *a, const bfdec_t *b,
 952 |                   limb_t prec, bf_flags_t flags, int64_t *pcycles)
 953 | {
 954 |     int ret;
 955 |     
 956 |     *pcycles -= get_cycles();
 957 |     switch(op) {
 958 |     case BF_OP_ADD_DEC:
 959 |         ret = bfdec_add(r, a, b, prec, flags);
 960 |         break;
 961 |     case BF_OP_MUL_DEC:
 962 |         ret = bfdec_mul(r, a, b, prec, flags);
 963 |         break;
 964 |     case BF_OP_DIV_DEC:
 965 |         ret = bfdec_div(r, a, b, prec, flags);
 966 |         break;
 967 |     case BF_OP_SQRT_DEC:
 968 |         ret = bfdec_sqrt(r, a, prec, flags);
 969 |         break;
 970 |     case BF_OP_FMOD_DEC:
 971 |         ret = bfdec_rem(r, a, b, prec, flags, BF_RNDZ);
 972 |         break;
 973 |     case BF_OP_RINT_DEC:
 974 |         bfdec_set(r, a);
 975 |         ret = bfdec_rint(r, flags);
 976 |         break;
 977 |     default:
 978 |         abort();
 979 |     }
 980 |     *pcycles += get_cycles();
 981 |     return ret;
 982 | }
 983 | 
 984 | void print_status(int status)
 985 | {
 986 |     printf("%c%c%c%c%c",
 987 |            (status & BF_ST_INVALID_OP) ? 'I' : '-',
 988 |            (status & BF_ST_DIVIDE_ZERO) ? 'Z' : '-',
 989 |            (status & BF_ST_OVERFLOW) ? 'O' : '-',
 990 |            (status & BF_ST_UNDERFLOW) ? 'U' : '-',
 991 |            (status & BF_ST_INEXACT) ? 'X' : '-');
 992 | }
 993 | 
 994 | static BOOL bf_is_same(const bf_t *a, const bf_t *b)
 995 | {
 996 |     return a->sign == b->sign && bf_cmpu(a, b) == 0;
 997 | }
 998 | 
 999 | void test_atof(limb_t prec, int duration_ms,
1000 |                int exp_bits, bf_rnd_t rnd_mode, int seed)
1001 | {
1002 |     DynBuf dbuf;
1003 |     int radix, it, c, e, status, ref_status, err, rnd_mode1, test_loop;
1004 |     mp_randstate_t rnd_state;
1005 |     slimb_t n_digits, prec1, i;
1006 |     char *str;
1007 |     bf_t r, r_ref;
1008 |     int64_t ti, ti_ref, nb_limbs, start_time;
1009 |     
1010 |     mp_randinit(&rnd_state, seed);
1011 | 
1012 |     bf_init(&bf_ctx, &r);
1013 |     bf_init(&bf_ctx, &r_ref);
1014 |     ti = 0;
1015 |     ti_ref = 0;
1016 |     start_time = get_clock_msec();
1017 |     test_loop = 1;
1018 |     it = 0;
1019 |     for(;;) {
1020 |         /* build a random string representing a number */
1021 |         if (mp_random64(&rnd_state) & 1)
1022 |             radix = (mp_random64(&rnd_state) % 35) + 2;
1023 |         else
1024 |             radix = 10;
1025 |         prec1 = (limb_t)ceil(prec / log2(radix));
1026 |         n_digits = mp_random64(&rnd_state) % (prec1 * 3) + 1;
1027 |         dbuf_init(&dbuf);
1028 |         if (mp_random64(&rnd_state) & 1)
1029 |             dbuf_putc(&dbuf, '-');
1030 | 
1031 |         for(i = 0; i < n_digits; i++) {
1032 |             c = mp_random64(&rnd_state) % radix;
1033 |             if (c < 10)
1034 |                 c += '0';
1035 |             else
1036 |                 c += 'a' - 10;
1037 |             dbuf_putc(&dbuf, c);
1038 |         }
1039 |         if (radix == 10)
1040 |             dbuf_putc(&dbuf, 'e');
1041 |         else
1042 |             dbuf_putc(&dbuf, '@');
1043 |         e = prec1 * 20;
1044 |         e = (mp_random64(&rnd_state) % (2 * e + 1)) - e;
1045 |         dbuf_printf(&dbuf, "%d", e);
1046 |         dbuf_putc(&dbuf, '\0');
1047 |         str = (char *)dbuf.buf;
1048 | 
1049 |         ti -= get_cycles();
1050 |         status = bf_atof(&r, str, NULL, radix, prec, rnd_mode) &
1051 |             BF_ST_INEXACT;
1052 |         ti += get_cycles();
1053 |         rnd_mode1 = rnd_mode;
1054 |         if (rnd_mode == BF_RNDF)
1055 |             rnd_mode1 = BF_RNDD;
1056 | 
1057 |         ti_ref -= get_cycles();
1058 |         ref_status = mpfr_exec_setstr(&r_ref, str, radix, prec, rnd_mode1);
1059 |         ti_ref += get_cycles();
1060 |         
1061 |         if (rnd_mode == BF_RNDF) {
1062 |             err = !bf_is_same(&r, &r_ref);
1063 |             if (err && rnd_mode == BF_RNDF) {
1064 |                 ref_status = mpfr_exec_setstr(&r_ref, str, radix, prec, BF_RNDU);
1065 |                 err = !bf_is_same(&r, &r_ref);
1066 |             }
1067 |         } else {
1068 |             err = !bf_is_same(&r, &r_ref) || status != ref_status;
1069 |         }
1070 |         
1071 |         if (err) {
1072 |             printf("\nERROR (%d):\n", it);
1073 |             printf("radix=%d\n", radix);
1074 |             printf("str=%s\n", str);
1075 |             bf_print_str("r  ", &r);
1076 |             bf_print_str("ref", &r_ref);
1077 |             printf("st    ="); print_status(status); printf("\n");
1078 |             printf("ref_st="); print_status(ref_status); printf("\n");
1079 |             exit(1);
1080 |         }
1081 |         free(str);
1082 |         it++;
1083 |         if ((it & (test_loop - 1)) == 0) {
1084 |             if ((get_clock_msec() - start_time) >= duration_ms)
1085 |                 break;
1086 |             test_loop *= 2;
1087 |         }
1088 |     }
1089 |     bf_delete(&r);
1090 |     bf_delete(&r_ref);
1091 | 
1092 |     nb_limbs = (prec + 63) / 64;
1093 |     printf(" %8u %8.1f %8.1f\n",
1094 |            it,
1095 |            (double)ti / it / nb_limbs,
1096 |            (double)ti_ref / it / nb_limbs);
1097 | }
1098 | 
1099 | void test_ftoa(limb_t prec, int duration_ms,
1100 |                int exp_bits, bf_rnd_t rnd_mode, int seed)
1101 | {
1102 |     int radix, it, e, test_loop;
1103 |     mp_randstate_t rnd_state;
1104 |     slimb_t n_digits, prec1, nb_limbs;
1105 |     char *r_str, *r_ref_str;
1106 |     bf_t a;
1107 |     int64_t ti, ti_ref, start_time;
1108 |     
1109 |     mp_randinit(&rnd_state, seed);
1110 |     bf_init(&bf_ctx, &a);
1111 |     ti_ref = 0;
1112 |     ti = 0;
1113 |     start_time = get_clock_msec();
1114 |     test_loop = 1;
1115 |     it = 0;
1116 |     for(;;) {
1117 |         /* build a random string representing a number */
1118 |         if ((mp_random64(&rnd_state) & 1) && 0)
1119 |             radix = (mp_random64(&rnd_state) % 35) + 2;
1120 |         else
1121 |             radix = 10;
1122 |         n_digits = (limb_t)ceil(prec / log2(radix));
1123 |         prec1 = mp_random64(&rnd_state) % (3 * prec) + 2;
1124 |         bf_rrandom(&a, prec1, &rnd_state);
1125 |         e = prec * 20;
1126 |         if (a.expn != BF_EXP_ZERO)
1127 |             a.expn += (mp_random64(&rnd_state) % (2 * e + 1)) - e;
1128 |         ti -= get_cycles();
1129 |         r_str = bf_ftoa(NULL, &a, radix, n_digits, rnd_mode |
1130 |                         BF_FTOA_FORMAT_FIXED | BF_FTOA_FORCE_EXP);
1131 |         ti += get_cycles();
1132 |         {
1133 |             mpfr_t a1;
1134 |             mpfr_exp_t expn;
1135 |             DynBuf s_s, *s = &s_s;
1136 |             char *str, *p;
1137 |             slimb_t i;
1138 |             BOOL is_zero;
1139 |             
1140 |             mpfr_init2(a1, bf_max(a.len, 1) * LIMB_BITS);
1141 |             bf_to_mpfr(a1, &a);
1142 |             ti_ref -= get_cycles();
1143 |             str = mpfr_get_str(NULL, &expn, radix, n_digits, a1,
1144 |                                mpfr_get_rnd_mode(rnd_mode));
1145 |             ti_ref += get_cycles();
1146 |             /* add the decimal point and exponent */
1147 |             is_zero = TRUE;
1148 |             for(i = 0; i < n_digits; i++) {
1149 |                 if (str[i] != '0') {
1150 |                     is_zero = FALSE;
1151 |                     break;
1152 |                 }
1153 |             }
1154 |             dbuf_init(s);
1155 |             p = str;
1156 |             if (*p == '-')
1157 |                 dbuf_putc(s, *p++);
1158 |             dbuf_putc(s, *p++);
1159 |             if (n_digits > 1) {
1160 |                 dbuf_putc(s, '.');
1161 |                 for(i = 1; i < n_digits; i++) {
1162 |                     dbuf_putc(s, *p++);
1163 |                 }
1164 |             }
1165 |             if (!is_zero)
1166 |                 expn--;
1167 |             if ((radix & (radix - 1)) == 0 && radix <= 16) {
1168 |                 int radix_bits = 1;
1169 |                 while ((1 << radix_bits) != radix)
1170 |                     radix_bits++;
1171 |                 dbuf_printf(s, "p%" PRId64 , (int64_t)(expn * radix_bits));
1172 |             } else {
1173 |                 dbuf_printf(s, "%c%" PRId64 , radix <= 10 ? 'e' : '@', (int64_t)expn);
1174 |             }
1175 |             dbuf_putc(s, '\0');
1176 |             
1177 |             r_ref_str = (char *)s->buf;
1178 |             mpfr_clear(a1);
1179 |             mpfr_free_str(str);
1180 |         }
1181 |         
1182 |         if (strcmp(r_ref_str, r_str) != 0) {
1183 |             printf("\nERROR (%d):\n", it);
1184 |             printf("radix=%d\n", radix);
1185 |             bf_print_str("a  ", &a);
1186 |             printf("r  =%s\n", r_str);
1187 |             printf("ref=%s\n", r_ref_str);
1188 |             exit(1);
1189 |         }
1190 |         free(r_str);
1191 |         free(r_ref_str);
1192 |         it++;
1193 |         if ((it & (test_loop - 1)) == 0) {
1194 |             if ((get_clock_msec() - start_time) >= duration_ms)
1195 |                 break;
1196 |             test_loop *= 2;
1197 |         }
1198 |     }
1199 |     bf_delete(&a);
1200 | 
1201 |     nb_limbs = (prec + 63) / 64;
1202 |     printf(" %8u %8.1f %8.1f\n",
1203 |            it,
1204 |            (double)ti / it / nb_limbs,
1205 |            (double)ti_ref / it / nb_limbs);
1206 | }
1207 | 
1208 | void test_can_round(limb_t prec, int duration_ms, bf_rnd_t rnd_mode, int seed)
1209 | {
1210 |     mp_randstate_t rnd_state;
1211 |     bf_t a, b, a_rounded, c;
1212 |     limb_t prec1, k;
1213 |     int res, it, i, res1, test_loop;
1214 |     int64_t start_time;
1215 |     
1216 |     mp_randinit(&rnd_state, seed);
1217 |     bf_init(&bf_ctx, &a);
1218 |     bf_init(&bf_ctx, &a_rounded);
1219 |     bf_init(&bf_ctx, &b);
1220 |     bf_init(&bf_ctx, &c);
1221 |     start_time = get_clock_msec();
1222 |     test_loop = 1;
1223 |     it = 0;
1224 |     for(;;) {
1225 |         prec1 = mp_random64(&rnd_state) % (3 * prec) + 2;
1226 |         bf_rrandom(&a, prec1, &rnd_state);
1227 |         a.sign = mp_random64(&rnd_state) & 1;
1228 | 
1229 |         k = prec + (mp_random64(&rnd_state) % 10);
1230 |         bf_set(&a_rounded, &a);
1231 |         bf_round(&a_rounded, prec, rnd_mode);
1232 |         res = bf_can_round(&a, prec, rnd_mode, k);
1233 |         if (res) {
1234 |             for(i = 0; i < 100; i++) {
1235 |                 bf_rrandom(&c, prec1, &rnd_state);
1236 |                 c.sign = mp_random64(&rnd_state) & 1;
1237 |                 if (c.expn != BF_EXP_ZERO)
1238 |                     c.expn += a.expn - k;
1239 |                 
1240 |                 bf_add(&b, &a, &c, BF_PREC_INF, BF_RNDZ);
1241 |                 bf_round(&b, prec, rnd_mode);
1242 |                 res1 = !bf_is_same(&b, &a_rounded);
1243 |                 if (res1) {
1244 |                     printf("\nERROR (%d):\n", it);
1245 |                     printf("k=%" PRId64 "\n", (int64_t)k);
1246 |                     bf_print_str("a    ", &a);
1247 |                     bf_print_str("a_rnd", &a_rounded);
1248 |                     bf_print_str("e    ", &c);
1249 |                     bf_print_str("b    ", &b);
1250 |                     exit(1);
1251 |                 }
1252 |             }
1253 |         }
1254 |         it++;
1255 |         if ((it & (test_loop - 1)) == 0) {
1256 |             if ((get_clock_msec() - start_time) >= duration_ms)
1257 |                 break;
1258 |             test_loop *= 2;
1259 |         }
1260 |     }
1261 |     bf_delete(&a);
1262 |     bf_delete(&a_rounded);
1263 |     bf_delete(&b);
1264 |     bf_delete(&c);
1265 |     printf(" %8u\n", it);
1266 | }
1267 | 
1268 | void test_mul_log2(int duration_ms, BOOL is_inv, BOOL is_ceil, int seed)
1269 | {
1270 |     mp_randstate_t rnd_state;
1271 |     int it, radix, err, test_loop;
1272 |     slimb_t a, v_max, r, r_ref, prec, d;
1273 |     mpfr_t a1, log2_radix[BF_RADIX_MAX - 1];
1274 |     int64_t start_time;
1275 |     
1276 |     mp_randinit(&rnd_state, seed);
1277 |     prec = 256;
1278 |     mpfr_init2(a1, prec);
1279 | 
1280 |     for(radix = 2; radix <= BF_RADIX_MAX; radix++) {
1281 |         mpfr_init2(log2_radix[radix - 2], prec);
1282 |         mpfr_set_ui(a1, radix, MPFR_RNDN);
1283 |         mpfr_log2(log2_radix[radix - 2], a1, MPFR_RNDN);
1284 |     }
1285 | 
1286 |     if (is_inv)
1287 |         v_max = BF_PREC_MAX;
1288 |     else
1289 |         v_max = BF_PREC_MAX / 6;
1290 |     start_time = get_clock_msec();
1291 |     test_loop = 1;
1292 |     it = 0;
1293 |     for(;;) {
1294 |         for(radix = 2; radix <= BF_RADIX_MAX; radix++) {
1295 |             a = (mp_random64(&rnd_state) % (2 * v_max + 1)) - v_max;
1296 |             r = bf_mul_log2_radix(a, radix, is_inv, is_ceil);
1297 |             
1298 |             mpfr_set_si(a1, a, MPFR_RNDN);
1299 |             if (is_inv)
1300 |                 mpfr_div(a1, a1, log2_radix[radix - 2], MPFR_RNDN);
1301 |             else
1302 |                 mpfr_mul(a1, a1, log2_radix[radix - 2], MPFR_RNDN);
1303 |             if (is_ceil)
1304 |                 mpfr_ceil(a1, a1);
1305 |             else
1306 |                 mpfr_floor(a1, a1);
1307 |             r_ref = mpfr_get_si(a1, MPFR_RNDN);
1308 |             if (is_inv) {
1309 |                 err = (r != r_ref);
1310 |             } else {
1311 |                 d = r - r_ref;
1312 |                 err = (d > 1 || d < -1);
1313 |             }
1314 |             if (err) {
1315 |                 printf("\nERROR (%d):\n", it);
1316 |                 printf("a=%" PRId64 " radix=%d inv=%d ceil=%d res=%" PRId64 " ref=%" PRId64 "\n",
1317 |                        (int64_t)a, radix, is_inv, is_ceil,
1318 |                        (int64_t)r, (int64_t)r_ref);
1319 |                 exit(1);
1320 |             }
1321 |         }
1322 |         it++;
1323 |         if ((it & (test_loop - 1)) == 0) {
1324 |             if ((get_clock_msec() - start_time) >= duration_ms)
1325 |                 break;
1326 |             test_loop *= 2;
1327 |         }
1328 |     }
1329 | 
1330 |     for(radix = 2; radix <= BF_RADIX_MAX; radix++)
1331 |         mpfr_clear(log2_radix[radix - 2]);
1332 |     mpfr_clear(a1);
1333 |     printf(" %8u\n", it);
1334 | }
1335 | 
1336 | void test_op_rm_dec(MPFTestOPEnum op, limb_t rprec, int duration_ms,
1337 |                     int exp_bits, bf_rnd_t rnd_mode, int seed)
1338 | {
1339 |     bfdec_t a, b, r, r_ref;
1340 |     uint32_t status, ref_status;
1341 |     int op_count, test_loop, it;
1342 |     int  nb_limbs;
1343 |     int64_t ti, ti_ref;
1344 |     mp_randstate_t rnd_state;
1345 |     BOOL res;
1346 |     bf_rnd_t rnd_mode1;
1347 |     bf_flags_t bf_flags;
1348 |     int64_t start_time;
1349 |     limb_t prec;
1350 |     
1351 |     bf_flags = rnd_mode | bf_set_exp_bits(exp_bits);
1352 |     
1353 |     mp_randinit(&rnd_state, seed);
1354 |     bfdec_init(&bf_ctx, &a);
1355 |     bfdec_init(&bf_ctx, &b);
1356 |     bfdec_init(&bf_ctx, &r);
1357 |     bfdec_init(&bf_ctx, &r_ref);
1358 |     bfdec_set_ui(&b, 0);
1359 |     bfdec_set_ui(&r, 0);
1360 |     bfdec_set_ui(&r_ref, 0);
1361 | 
1362 |     ti = 0;
1363 |     ti_ref = 0;
1364 |     start_time = get_clock_msec();
1365 |     test_loop = 1;
1366 |     it = 0;
1367 |     for(;;) {
1368 |         if (rprec == 0) {
1369 |             prec = (mp_random64(&rnd_state) % 1000) + 24;
1370 |         } else {
1371 |             prec = rprec;
1372 |         }
1373 |         switch(op) {
1374 |         case BF_OP_RINT_DEC:
1375 |         case BF_OP_SQRT_DEC:
1376 |             op_count = 1;
1377 |             break;
1378 |         default:
1379 |             op_count = 2;
1380 |             break;
1381 |         }
1382 |         if (op_count == 1) {
1383 |             if (it < SPECIAL_COUNT) {
1384 |                 set_special_dec(&a, it);
1385 |             } else {
1386 |                 limb_t prec1;
1387 |                 
1388 |                 prec1 = mp_random64(&rnd_state) % (3 * prec) + 1;
1389 |                 bfdec_rrandom(&a, prec1, &rnd_state);
1390 |                 if (a.expn != BF_EXP_ZERO)
1391 |                     a.expn += prec1 / 2;
1392 |                 if (op == BF_OP_SQRT_DEC) {
1393 |                     a.sign = 0;
1394 |                 } else {
1395 |                     a.sign = mp_random64(&rnd_state) & 1;
1396 |                 }
1397 |             }
1398 |         } else {
1399 |             if (it < SPECIAL_COUNT * SPECIAL_COUNT) {
1400 |                 set_special_dec(&a, it % SPECIAL_COUNT);
1401 |                 set_special_dec(&b, it / SPECIAL_COUNT);
1402 |             } else {
1403 |                 bfdec_rrandom_large(&a, prec, &rnd_state);
1404 |                 bfdec_rrandom_large(&b, prec, &rnd_state);
1405 |             }
1406 |         }
1407 | 
1408 |         if (op == BF_OP_DIVREM_DEC) {
1409 |             bfdec_t q, a_ref;
1410 |             bfdec_init(&bf_ctx, &q);
1411 |             bfdec_init(&bf_ctx, &a_ref);
1412 |             bfdec_divrem(&q, &r, &a, &b, BF_PREC_INF, BF_RNDZ, rnd_mode);
1413 |             if (bf_is_finite((bf_t *)&r) &&
1414 |                 bf_is_finite((bf_t *)&a) &&
1415 |                 bf_is_finite((bf_t *)&b)) {
1416 |                 bfdec_mul(&a_ref, &q, &b, BF_PREC_INF, BF_RNDZ);
1417 |                 bfdec_add(&a_ref, &a_ref, &r, BF_PREC_INF, BF_RNDZ);
1418 |                 res = !bfdec_cmp_eq(&a, &a_ref);
1419 |                 if (res) {
1420 |                     printf("\nERROR (%d):\n", it);
1421 |                     bfdec_print_str("a  ", &a);
1422 |                     bfdec_print_str("b  ", &b);
1423 |                     bfdec_print_str("q  ", &q);
1424 |                     bfdec_print_str("r  ", &r);
1425 |                     bfdec_print_str("a_ref", &a_ref);
1426 |                     exit(1);
1427 |                 }
1428 |             }
1429 |             bfdec_delete(&q);
1430 |             bfdec_delete(&a_ref);
1431 |         } else {
1432 |             //        bfdec_print_str("a", &a);
1433 |             //        bfdec_print_str("b", &b);
1434 |             status = bfdec_exec_op(op, &r, &a, &b, prec, bf_flags, &ti);
1435 |             //        bfdec_print_str("r", &r);
1436 |             
1437 |             rnd_mode1 = rnd_mode;
1438 |             ref_status = mpdecimal_exec_op(op, &r_ref, &a, &b, prec, rnd_mode1,
1439 |                                            &ti_ref);
1440 |             
1441 |             if (op == BF_OP_CMP_EQ ||
1442 |                 op == BF_OP_CMP_LE ||
1443 |                 op == BF_OP_CMP_LT) {
1444 |                 res = (status != ref_status);
1445 |             } else {
1446 |                 res = (bfdec_cmp_full(&r, &r_ref) != 0);
1447 |                 if ((status & BF_ST_INEXACT) !=
1448 |                     (ref_status & BF_ST_INEXACT))
1449 |                     res = 1;
1450 |             }
1451 |             
1452 |             if (res) {
1453 |                 printf("\nERROR (%d):\n", it);
1454 |                 
1455 |                 bfdec_print_str("a  ", &a);
1456 |                 if (op_count > 1) {
1457 |                     bfdec_print_str("b  ", &b);
1458 |                 }
1459 |                 bfdec_print_str("r  ", &r);
1460 |                 bfdec_print_str("ref", &r_ref);
1461 |                 printf("st    ="); print_status(status); printf("\n");
1462 |                 printf("ref_st="); print_status(ref_status); printf("\n");
1463 |                 exit(1);
1464 |             }
1465 |         }
1466 | 
1467 |         it++;
1468 |         if ((it & (test_loop - 1)) == 0) {
1469 |             if ((get_clock_msec() - start_time) >= duration_ms)
1470 |                 break;
1471 |             test_loop *= 2;
1472 |         }
1473 |     }
1474 | 
1475 |     nb_limbs = (prec + 63) / 64;
1476 |     printf(" %8u %8.1f %8.1f\n",
1477 |            it,
1478 |            (double)ti / it / nb_limbs,
1479 |            (double)ti_ref / it / nb_limbs);
1480 | 
1481 |     bfdec_delete(&a);
1482 |     bfdec_delete(&b);
1483 |     bfdec_delete(&r);
1484 |     bfdec_delete(&r_ref);
1485 | }
1486 | 
1487 | static void test_mp_sqrtrem(limb_t rprec, int duration_ms, int seed)
1488 | {
1489 |     int it, test_loop;
1490 |     int64_t start_time, ti;
1491 |     limb_t *tabs, *tabr, *taba, *tabb, c;
1492 |     slimb_t n, i, n_max;
1493 |     mp_randstate_t rnd_state;
1494 | 
1495 |     n_max = rprec;
1496 |     
1497 |     mp_randinit(&rnd_state, seed);
1498 |     taba = malloc(2 * n_max * sizeof(limb_t));
1499 |     tabb = malloc(2 * n_max * sizeof(limb_t));
1500 |     tabs = malloc(n_max * sizeof(limb_t));
1501 |     tabr = malloc(2 * n_max * sizeof(limb_t));
1502 | 
1503 |     test_loop = 1;
1504 |     it = 0;
1505 |     start_time = get_clock_msec();
1506 |     ti = 0;
1507 |     for(;;) {
1508 |         n = (mp_random64(&rnd_state) % n_max) + 1;
1509 | 
1510 |         mp_rrandom(taba, 2 * n * LIMB_BITS, &rnd_state);
1511 |         taba[2 * n - 1] |= (limb_t)1 << (LIMB_BITS - 2);
1512 |         
1513 |         for(i = 0; i < n * 2; i++)
1514 |             tabr[i] = taba[i];
1515 |         ti -= get_cycles();
1516 |         mp_sqrtrem(&bf_ctx, tabs, tabr, n);
1517 |         ti += get_cycles();
1518 | 
1519 |         /* check the result */
1520 |         mp_mul(&bf_ctx, tabb, tabs, n, tabs, n);
1521 |         c = mp_add(tabb, tabb, tabr, n + 1, 0);
1522 |         c = mp_add_ui(tabb + n + 1, c, n - 1);
1523 |         if (mp_cmp(taba, n * 2, tabb, n * 2) != 0)
1524 |             goto error;
1525 |         tabb[n] = mp_add(tabb, tabs, tabs, n, 0);
1526 |         if (mp_cmp(tabr, n + 1, tabb, n + 1) > 0) {
1527 |         error:
1528 |             printf("ERROR %d\n", it);
1529 |             mp_print_str("a", taba, n * 2);
1530 |             mp_print_str("s", tabs, n);
1531 |             mp_print_str("r", tabr, n + 1);
1532 |             exit(1);
1533 |         }
1534 | 
1535 |         it++;
1536 |         if (it == test_loop) {
1537 |             if ((get_clock_msec() - start_time) >= duration_ms)
1538 |                 break;
1539 |             test_loop *= 2;
1540 |         }
1541 |     }
1542 |     printf(" %8u %8.1f\n",
1543 |            it,
1544 |            (double)ti / it / n);
1545 |     free(taba);
1546 |     free(tabb);
1547 |     free(tabr);
1548 |     free(tabs);
1549 | }
1550 | 
1551 | static void test_mp_recip(limb_t rprec, int duration_ms, int seed)
1552 | {
1553 |     int it, test_loop, incr;
1554 |     int64_t start_time, ti;
1555 |     limb_t *tabr, *taba, *tabb, *tabc;
1556 |     slimb_t n, n_max, i;
1557 |     mp_randstate_t rnd_state;
1558 | 
1559 |     n_max = rprec;
1560 |     
1561 |     mp_randinit(&rnd_state, seed);
1562 |     taba = malloc(n_max * sizeof(limb_t));
1563 |     tabb = malloc((2 * n_max + 1) * sizeof(limb_t));
1564 |     tabc = malloc((n_max + 1) * sizeof(limb_t));
1565 |     tabr = malloc((n_max + 1) * sizeof(limb_t));
1566 | 
1567 |     test_loop = 1;
1568 |     it = 0;
1569 |     start_time = get_clock_msec();
1570 |     ti = 0;
1571 |     for(;;) {
1572 |         n = (mp_random64(&rnd_state) % n_max) + 1;
1573 | 
1574 |         mp_rrandom(taba, n * LIMB_BITS, &rnd_state);
1575 |         taba[n - 1] |= (limb_t)1 << (LIMB_BITS - 1);
1576 |         
1577 |         ti -= get_cycles();
1578 |         mp_recip(&bf_ctx, tabr, taba, n);
1579 |         ti += get_cycles();
1580 | 
1581 |         /* check the result */
1582 |         mp_mul(&bf_ctx, tabb, tabr, n + 1, taba, n);
1583 |         incr = 0;
1584 |         if (tabb[2 * n] >= 1)
1585 |             goto error;
1586 | 
1587 |         for(i = 0; i < n + 1; i++)
1588 |             tabc[i] = tabr[i];
1589 |         mp_add_ui(tabc, 2, n + 1);
1590 |         mp_mul(&bf_ctx, tabb, tabc, n + 1, taba, n);
1591 | 
1592 |         incr = 2;
1593 |         if (tabb[2 * n] < 1) {
1594 |         error:
1595 |             printf("ERROR %d\n", it);
1596 |             printf("n=%d incr=%d\n", (int)n, incr);
1597 |             mp_print_str("a", taba, n);
1598 |             mp_print_str("r", tabr, n + 1);
1599 |             mp_print_str("b", tabb, 2 * n + 1);
1600 |             exit(1);
1601 |         }
1602 | 
1603 |         it++;
1604 |         if (it == test_loop) {
1605 |             if ((get_clock_msec() - start_time) >= duration_ms)
1606 |                 break;
1607 |             test_loop *= 2;
1608 |         }
1609 |     }
1610 |     printf(" %8u %8.1f\n",
1611 |            it,
1612 |            (double)ti / it / n);
1613 |     free(taba);
1614 |     free(tabb);
1615 |     free(tabr);
1616 |     free(tabc);
1617 | }
1618 | 
1619 | void test_op_rm(MPFTestOPEnum op, limb_t rprec, int duration_ms,
1620 |                 int exp_bits, bf_rnd_t rnd_mode, int seed)
1621 | {
1622 |     bf_t a, b, r, r_ref;
1623 |     int op_count, status, ref_status, test_loop, it, it_perf;
1624 |     int  nb_limbs;
1625 |     int64_t ti, ti_ref, ti_dummy;
1626 |     mp_randstate_t rnd_state;
1627 |     BOOL res, use_float64_ref;
1628 |     bf_rnd_t rnd_mode1;
1629 |     bf_flags_t bf_flags;
1630 |     int64_t start_time;
1631 |     limb_t prec;
1632 |     
1633 |     printf("%-20s %5d %3d %3s %5d", op_str[op], (int)rprec, exp_bits,
1634 |            rnd_str[rnd_mode], seed);
1635 |     fflush(stdout);
1636 |     
1637 |     switch(op) {
1638 |     case BF_OP_MP_SQRTREM:
1639 |         test_mp_sqrtrem(rprec, duration_ms, seed);
1640 |         return;
1641 |     case BF_OP_MP_RECIP:
1642 |         test_mp_recip(rprec, duration_ms, seed);
1643 |         return;
1644 |     case BF_OP_ATOF:
1645 |         test_atof(rprec, duration_ms, exp_bits, rnd_mode, seed);
1646 |         return;
1647 |     case BF_OP_FTOA:
1648 |         test_ftoa(rprec, duration_ms, exp_bits, rnd_mode, seed);
1649 |         return;
1650 |     case BF_OP_CAN_ROUND:
1651 |         test_can_round(rprec, duration_ms, rnd_mode, seed);
1652 |         return;
1653 |     case BF_OP_MUL_L2RADIX:
1654 |     case BF_OP_DIV_L2RADIX:
1655 |         test_mul_log2(duration_ms, (op == BF_OP_DIV_L2RADIX), rnd_mode == BF_RNDU, seed);
1656 |         return;
1657 |     case BF_OP_ADD_DEC:
1658 |     case BF_OP_MUL_DEC:
1659 |     case BF_OP_DIV_DEC:
1660 |     case BF_OP_SQRT_DEC:
1661 |     case BF_OP_FMOD_DEC:
1662 |     case BF_OP_DIVREM_DEC:
1663 |     case BF_OP_RINT_DEC:
1664 |         test_op_rm_dec(op, rprec, duration_ms, exp_bits, rnd_mode, seed);
1665 |         return;
1666 |     default:
1667 |         break;
1668 |     }
1669 |     
1670 |     use_float64_ref = (rprec == 53 && exp_bits == 11);
1671 |     bf_flags = rnd_mode | bf_set_exp_bits(exp_bits);
1672 |     if (use_float64_ref)
1673 |         bf_flags |= BF_FLAG_SUBNORMAL;
1674 |     
1675 |     mp_randinit(&rnd_state, seed);
1676 |     bf_init(&bf_ctx, &a);
1677 |     bf_init(&bf_ctx, &b);
1678 |     bf_init(&bf_ctx, &r);
1679 |     bf_init(&bf_ctx, &r_ref);
1680 |     bf_set_ui(&b, 0);
1681 |     bf_set_ui(&r, 0);
1682 |     bf_set_ui(&r_ref, 0);
1683 |     ti = 0;
1684 |     ti_ref = 0;
1685 |     ti_dummy = 0;
1686 |     start_time = get_clock_msec();
1687 |     test_loop = 1;
1688 |     it = 0;
1689 |     it_perf = 0;
1690 |     for(;;) {
1691 |         if (rprec == 0) {
1692 |             prec = (mp_random64(&rnd_state) % 1000) + 24;
1693 |         } else {
1694 |             prec = rprec;
1695 |         }
1696 |         switch(op) {
1697 |         case BF_OP_RINT:
1698 |         case BF_OP_SQRT:
1699 |         case BF_OP_EXP:
1700 |         case BF_OP_LOG:
1701 |         case BF_OP_COS:
1702 |         case BF_OP_SIN:
1703 |         case BF_OP_TAN:
1704 |         case BF_OP_ATAN:
1705 |         case BF_OP_ASIN:
1706 |         case BF_OP_ACOS:
1707 |             op_count = 1;
1708 |             break;
1709 |         default:
1710 |             op_count = 2;
1711 |             break;
1712 |         }
1713 | 
1714 |         if (op_count == 1) {
1715 |             if (it < SPECIAL_COUNT) {
1716 |                 set_special(&a, it);
1717 |             } else {
1718 |                 limb_t prec1;
1719 |                 
1720 |                 if (use_float64_ref) {
1721 |                     Float64Union u;
1722 |                     u.u = rrandom_sf64(&rnd_state);
1723 |                     bf_set_float64(&a, u.d);
1724 |                 } else {
1725 |                     prec1 = mp_random64(&rnd_state) % (3 * prec) + 1;
1726 |                     bf_rrandom(&a, prec1, &rnd_state);
1727 |                     if (op == BF_OP_COS || op == BF_OP_SIN || op == BF_OP_TAN) {
1728 |                         int k;
1729 |                         bf_t c_s, *c = &c_s;
1730 |                         if (a.expn != BF_EXP_ZERO)
1731 |                             a.expn++;
1732 |                         k = (mp_random64(&rnd_state) % 2000) - 1000;
1733 |                         bf_init(&bf_ctx, c);
1734 |                         bf_const_pi(c, prec1 + 1, BF_RNDN);
1735 |                         c->expn--; /* pi/2 */
1736 |                         bf_mul_si(c, c, k, prec1 + 1, BF_RNDN);
1737 |                         bf_add(&a, &a, c, prec1, BF_RNDN);
1738 |                         bf_delete(c);
1739 |                     } else if (op == BF_OP_ACOS || op == BF_OP_ASIN) {
1740 |                     } else {
1741 |                         if (a.expn != BF_EXP_ZERO)
1742 |                             a.expn += prec1 / 2;
1743 |                     }
1744 |                 }
1745 |                 if (op == BF_OP_SQRT || op == BF_OP_LOG) {
1746 |                     a.sign = 0;
1747 |                 } else {
1748 |                     a.sign = mp_random64(&rnd_state) & 1;
1749 |                 }
1750 |             }
1751 |         } else if (op == BF_OP_OR ||
1752 |                    op == BF_OP_XOR ||
1753 |                    op == BF_OP_AND) {
1754 |             bf_rrandom_int(&a, prec, &rnd_state);
1755 |             bf_rrandom_int(&b, prec, &rnd_state);
1756 |         } else {
1757 |             if (it < SPECIAL_COUNT * SPECIAL_COUNT) {
1758 |                 set_special(&a, it % SPECIAL_COUNT);
1759 |                 set_special(&b, it / SPECIAL_COUNT);
1760 |             } else {
1761 |                 if (op == BF_OP_POW) {
1762 |                     bf_rrandom_large(&a, prec, &rnd_state);
1763 |                     if ((it % 10) == 0) {
1764 |                         bf_set_si(&b, (int32_t)mp_random64(&rnd_state));
1765 |                     } else {
1766 |                         bf_rrandom_large(&b, prec, &rnd_state);
1767 |                     }
1768 |                 } else if (use_float64_ref) {
1769 |                     Float64Union u;
1770 |                     u.u = rrandom_sf64(&rnd_state);
1771 |                     bf_set_float64(&a, u.d);
1772 |                     u.u = rrandom_sf64(&rnd_state);
1773 |                     bf_set_float64(&b, u.d);
1774 |                 } else {
1775 |                     bf_rrandom_large(&a, prec, &rnd_state);
1776 |                     bf_rrandom_large(&b, prec, &rnd_state);
1777 |                 }
1778 |             }
1779 |         }
1780 |         
1781 |         status = bf_exec_op(op, &r, &a, &b, prec, bf_flags, &ti);
1782 |         //        bf_print_str("r", &r);
1783 |         
1784 |         rnd_mode1 = rnd_mode;
1785 |         if (rnd_mode == BF_RNDF)
1786 |             rnd_mode1 = BF_RNDD;
1787 |         if (use_float64_ref) {
1788 |             ref_status = softfp_exec_op(op, &r_ref, &a, &b, prec, rnd_mode1, &ti_ref);
1789 |         } else {
1790 |             ref_status = mpfr_exec_op(op, &r_ref, &a, &b, prec, rnd_mode1, &ti_ref);
1791 |         }
1792 |         //        bf_print_str("r_ref", &r_ref);
1793 | 
1794 |         if (op == BF_OP_CMP_EQ ||
1795 |             op == BF_OP_CMP_LE ||
1796 |             op == BF_OP_CMP_LT) {
1797 |             res = (status != ref_status);
1798 |         } else {
1799 |             res = !bf_is_same(&r, &r_ref);
1800 |             if (rnd_mode == BF_RNDF) {
1801 |                 if (res) {
1802 |                     if (use_float64_ref) {
1803 |                         softfp_exec_op(op, &r_ref, &a, &b, prec, BF_RNDU, &ti_dummy);
1804 |                     } else {
1805 |                         mpfr_exec_op(op, &r_ref, &a, &b, prec, BF_RNDU, &ti_dummy);
1806 |                     }
1807 |                     res = !bf_is_same(&r, &r_ref);
1808 |                 }
1809 |             } else {
1810 |                 if ((status & BF_ST_INEXACT) !=
1811 |                     (ref_status & BF_ST_INEXACT))
1812 |                     res = 1;
1813 |             }
1814 |         }
1815 |         
1816 |         if (res) {
1817 |             printf("\nERROR (%d):\n", it);
1818 |             
1819 |             bf_print_str("a  ", &a);
1820 |             if (op_count > 1) {
1821 |                 bf_print_str("b  ", &b);
1822 |             }
1823 |             bf_print_str("r  ", &r);
1824 |             bf_print_str("ref", &r_ref);
1825 |             printf("st    ="); print_status(status); printf("\n");
1826 |             printf("ref_st="); print_status(ref_status); printf("\n");
1827 |             exit(1);
1828 |         }
1829 |         /* excluding special value from CPU time */
1830 |         if ((op_count == 1 && it < SPECIAL_COUNT) ||
1831 |             (op_count == 2 && it < SPECIAL_COUNT * SPECIAL_COUNT)) {
1832 |             ti = 0;
1833 |             ti_ref = 0;
1834 |         } else {
1835 |             it_perf++;
1836 |         }
1837 | 
1838 |         it++;
1839 |         if ((it & (test_loop - 1)) == 0) {
1840 |             if ((get_clock_msec() - start_time) >= duration_ms)
1841 |                 break;
1842 |             test_loop *= 2;
1843 |         }
1844 |     }
1845 | 
1846 |     nb_limbs = (prec + 63) / 64;
1847 |     printf(" %8u %8.1f %8.1f\n",
1848 |            it,
1849 |            (double)ti / it_perf / nb_limbs,
1850 |            (double)ti_ref / it_perf / nb_limbs);
1851 | 
1852 |     bf_delete(&a);
1853 |     bf_delete(&b);
1854 |     bf_delete(&r);
1855 |     bf_delete(&r_ref);
1856 | }
1857 | 
1858 | void test_op(MPFTestOPEnum op, limb_t prec, int duration_ms, int exp_bits,
1859 |              int seed)
1860 | {
1861 |     BOOL use_float64_ref;
1862 |     uint8_t rm_allowed[BF_RNDF + 1];
1863 |     bf_rnd_t rnd_mode;
1864 | 
1865 |     use_float64_ref = (prec == 53 && exp_bits == 11);
1866 |     memset(rm_allowed, 0, sizeof(rm_allowed));
1867 |     if (use_float64_ref) {
1868 |         rm_allowed[BF_RNDN] = 1;
1869 |         rm_allowed[BF_RNDZ] = 1;
1870 |         rm_allowed[BF_RNDU] = 1;
1871 |         rm_allowed[BF_RNDD] = 1;
1872 |         rm_allowed[BF_RNDNA] = 1;
1873 |     } else {
1874 |         switch(op) {
1875 |         case BF_OP_ADD:
1876 |         case BF_OP_MUL:
1877 |         case BF_OP_DIV:
1878 |         case BF_OP_FMOD:
1879 |         case BF_OP_REM:
1880 |         case BF_OP_RINT:
1881 |         case BF_OP_ROUND:
1882 |         case BF_OP_SQRT:
1883 |         case BF_OP_ATOF:
1884 |         case BF_OP_EXP:
1885 |         case BF_OP_LOG:
1886 |         case BF_OP_COS:
1887 |         case BF_OP_SIN:
1888 |         case BF_OP_TAN:
1889 |         case BF_OP_ATAN:
1890 |         case BF_OP_ATAN2:
1891 |         case BF_OP_ASIN:
1892 |         case BF_OP_ACOS:
1893 |         case BF_OP_POW:
1894 |             rm_allowed[BF_RNDN] = 1;
1895 |             rm_allowed[BF_RNDZ] = 1;
1896 |             rm_allowed[BF_RNDU] = 1;
1897 |             rm_allowed[BF_RNDD] = 1;
1898 |             rm_allowed[BF_RNDF] = 1;
1899 |             break;
1900 |         case BF_OP_CAN_ROUND:
1901 |             rm_allowed[BF_RNDN] = 1;
1902 |             rm_allowed[BF_RNDZ] = 1;
1903 |             rm_allowed[BF_RNDU] = 1;
1904 |             rm_allowed[BF_RNDD] = 1;
1905 |             rm_allowed[BF_RNDA] = 1;
1906 |             rm_allowed[BF_RNDNA] = 1;
1907 |             break;
1908 |         case BF_OP_FTOA:
1909 |             rm_allowed[BF_RNDN] = 1;
1910 |             rm_allowed[BF_RNDZ] = 1;
1911 |             rm_allowed[BF_RNDU] = 1;
1912 |             rm_allowed[BF_RNDD] = 1;
1913 |             rm_allowed[BF_RNDA] = 1;
1914 |             break;
1915 |         case BF_OP_SUB:
1916 |             /* minimal test for SUB which is like ADD */
1917 |             rm_allowed[BF_RNDN] = 1;
1918 |             break;
1919 |         case BF_OP_MUL_L2RADIX:
1920 |         case BF_OP_DIV_L2RADIX:
1921 |             rm_allowed[BF_RNDU] = 1;
1922 |             rm_allowed[BF_RNDD] = 1;
1923 |             break;
1924 |         case BF_OP_ADD_DEC:
1925 |         case BF_OP_MUL_DEC:
1926 |         case BF_OP_DIV_DEC:
1927 |         case BF_OP_RINT_DEC:
1928 |             rm_allowed[BF_RNDN] = 1;
1929 |             rm_allowed[BF_RNDZ] = 1;
1930 |             rm_allowed[BF_RNDU] = 1;
1931 |             rm_allowed[BF_RNDD] = 1;
1932 |             rm_allowed[BF_RNDA] = 1;
1933 |             rm_allowed[BF_RNDNA] = 1;
1934 |             break;
1935 |         case BF_OP_SQRT_DEC:
1936 |             rm_allowed[BF_RNDN] = 1;
1937 |             //* bug in mpd_qsqrt() */
1938 |             //            rm_allowed[BF_RNDZ] = 1;
1939 |             //            rm_allowed[BF_RNDU] = 1;
1940 |             //            rm_allowed[BF_RNDD] = 1;
1941 |             break;
1942 |         case BF_OP_FMOD_DEC:
1943 |             break; /* bug in mpd_qrem() */
1944 |         case BF_OP_DIVREM_DEC:
1945 |             rm_allowed[BF_RNDZ] = 1;
1946 |             rm_allowed[BF_RNDN] = 1;
1947 |             break;
1948 |         default:
1949 |             rm_allowed[BF_RNDZ] = 1;
1950 |             break;
1951 |         }
1952 |     }
1953 |     for(rnd_mode = 0; rnd_mode < countof(rm_allowed); rnd_mode++) {
1954 |         if (rm_allowed[rnd_mode]) {
1955 |             test_op_rm(op, prec, duration_ms, exp_bits, rnd_mode, seed);
1956 |         }
1957 |     }
1958 | }
1959 | 
1960 | static MPFTestOPEnum get_op_from_str(const char *str)
1961 | {
1962 |     MPFTestOPEnum op;
1963 |     for(op = 0; op < BF_OP_COUNT; op++) {
1964 |         if (!strcmp(str, op_str[op]))
1965 |             break;
1966 |         }
1967 |     if (op == BF_OP_COUNT) {
1968 |         fprintf(stderr, "Unknown operation: %s\n", str);
1969 |         exit(1);
1970 |     }
1971 |     return op;
1972 | }
1973 | 
1974 | void help(void)
1975 | {
1976 |     printf("usage: bftest [options] [first_op [last_op]]\n"
1977 |            "\n"
1978 |            "Options:\n"
1979 |            "-h         this help\n"
1980 |            "-s seed    set the initial seed\n"
1981 |            "-S         single iteration of tests\n"
1982 |            "-p prec    force precision\n"
1983 |            );
1984 |     exit(1);
1985 | }
1986 | 
1987 | int main(int argc, char **argv)
1988 | {
1989 |     int seed, duration_ms, c;
1990 |     limb_t prec;
1991 |     MPFTestOPEnum op, op_start, op_last;
1992 |     BOOL short_test = FALSE;
1993 |     
1994 |     seed = 1234;
1995 |     duration_ms = 100;
1996 |     prec = 0;
1997 |     for(;;) {
1998 |         c = getopt(argc, argv, "hs:Sp:");
1999 |         if (c == -1)
2000 |             break;
2001 |         switch(c) {
2002 |         case 'h':
2003 |             help();
2004 |         case 's':
2005 |             seed = strtoul(optarg, NULL, 0);
2006 |             duration_ms = 1000;
2007 |             break;
2008 |         case 'S':
2009 |             short_test = TRUE;
2010 |             break;
2011 |         case 'p':
2012 |             prec = (limb_t)strtod(optarg, NULL);
2013 |             break;
2014 |         default:
2015 |             exit(1);
2016 |         }
2017 |     }
2018 | 
2019 |     op_start = 0;
2020 |     op_last = BF_OP_COUNT - 1;
2021 |     if (optind < argc)
2022 |         op_start = get_op_from_str(argv[optind++]);
2023 |     if (optind < argc)
2024 |         op_last = get_op_from_str(argv[optind++]);
2025 | 
2026 |     mpfr_exec_init();
2027 |     bf_context_init(&bf_ctx, my_bf_realloc, NULL);
2028 |     mpd_init(&mpd_ctx, 16);
2029 |     
2030 |     printf("%-20s %5s %3s %3s %5s %8s %8s %8s\n", "OP", "PREC", "EXP", "RND", "SEED", "CNT", "c/64bit", "ref");
2031 | 
2032 |     for(;;) {
2033 |         for(op = op_start; op <= op_last; op++) {
2034 |             if (prec != 0) {
2035 |                 test_op(op, prec, duration_ms, BF_EXP_BITS_MAX, seed);
2036 |             } else {
2037 |                 if (op == BF_OP_MUL_L2RADIX || op == BF_OP_DIV_L2RADIX) {
2038 |                     test_op(op, LIMB_BITS, duration_ms, 0, seed);
2039 |                 } else if (op == BF_OP_CAN_ROUND) {
2040 |                     test_op(op, 8, duration_ms, BF_EXP_BITS_MAX, seed);
2041 |                     test_op(op, 53, duration_ms, BF_EXP_BITS_MAX, seed);
2042 |                     test_op(op, 256, duration_ms, BF_EXP_BITS_MAX, seed);
2043 |                 } else if (op >= BF_OP_ADD_DEC && op <= BF_OP_RINT_DEC) {
2044 |                     test_op(op, 16, duration_ms, BF_EXP_BITS_MAX, seed);
2045 |                     test_op(op, 100, duration_ms, BF_EXP_BITS_MAX, seed);
2046 |                 } else if (op == BF_OP_MP_SQRTREM ||
2047 |                            op == BF_OP_MP_RECIP) {
2048 |                     test_op(op, 100, duration_ms, BF_EXP_BITS_MAX, seed);
2049 |                 } else {
2050 |                     if (op == BF_OP_MUL ||
2051 |                         op == BF_OP_ADD ||
2052 |                         op == BF_OP_DIV ||
2053 |                         op == BF_OP_SQRT ||
2054 |                         op == BF_OP_CMP_EQ ||
2055 |                         op == BF_OP_CMP_LT ||
2056 |                         op == BF_OP_CMP_LE) {
2057 |                         test_op(op, 53, duration_ms, 11, seed);
2058 |                     }
2059 |                     test_op(op, 53, duration_ms, BF_EXP_BITS_MAX, seed);
2060 |                     test_op(op, 112, duration_ms, BF_EXP_BITS_MAX, seed);
2061 |                     /* mpfr bug ? */
2062 |                     if (op !=  BF_OP_SQRT)
2063 |                         test_op(op, 256, duration_ms, BF_EXP_BITS_MAX, seed);
2064 |                     test_op(op, 3000, duration_ms, BF_EXP_BITS_MAX, seed);
2065 |                 }
2066 |             }
2067 |         }
2068 |         seed++;
2069 |         duration_ms = 1000;
2070 |         if (short_test)
2071 |             break;
2072 |     }
2073 |     return 0;
2074 | }
2075 | 


--------------------------------------------------------------------------------
/cutils.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * C utilities
  3 |  *
  4 |  * Copyright (c) 2017 Fabrice Bellard
  5 |  * Copyright (c) 2018 Charlie Gordon
  6 |  *
  7 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  8 |  * of this software and associated documentation files (the "Software"), to deal
  9 |  * in the Software without restriction, including without limitation the rights
 10 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the Software is
 12 |  * furnished to do so, subject to the following conditions:
 13 |  *
 14 |  * The above copyright notice and this permission notice shall be included in
 15 |  * all copies or substantial portions of the Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 20 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 |  * THE SOFTWARE.
 24 |  */
 25 | #ifndef HAVE_CONFIG_H
 26 | #include "quickjs-config.h"
 27 | #else
 28 | #include "config.h"
 29 | #endif
 30 | 
 31 | #include <stdlib.h>
 32 | #include <stdio.h>
 33 | #include <stdarg.h>
 34 | #include <string.h>
 35 | 
 36 | #include "quickjs.h"
 37 | #include "cutils.h"
 38 | 
 39 | void qjs_assert(const char* msg, const char* file, int line)
 40 | {
 41 | 	fprintf(stderr, "\nAssertion failed (%s, %d): %s\n", file, line, msg);
 42 | 	fflush(stderr);
 43 | 	if (IsDebuggerPresent())
 44 | 		DebugBreak();
 45 | 
 46 | 	fprintf(stderr, "Triggering SEH exception\n");
 47 | 	fflush(stderr);
 48 | 	volatile int* pInt = 0x00000000;
 49 | 	*pInt = 20;
 50 | #if 0
 51 | 	abort();
 52 | #endif
 53 | }
 54 | 
 55 | 
 56 | void pstrcpy(char *buf, int buf_size, const char *str)
 57 | {
 58 |     int c;
 59 |     char *q = buf;
 60 | 
 61 |     if (buf_size <= 0)
 62 |         return;
 63 | 
 64 |     for(;;) {
 65 |         c = *str++;
 66 |         if (c == 0 || q >= buf + buf_size - 1)
 67 |             break;
 68 |         *q++ = c;
 69 |     }
 70 |     *q = '\0';
 71 | }
 72 | 
 73 | /* strcat and truncate. */
 74 | char *pstrcat(char *buf, int buf_size, const char *s)
 75 | {
 76 |     int len;
 77 |     len = strlen(buf);
 78 |     if (len < buf_size)
 79 |         pstrcpy(buf + len, buf_size - len, s);
 80 |     return buf;
 81 | }
 82 | 
 83 | int strstart(const char *str, const char *val, const char **ptr)
 84 | {
 85 |     const char *p, *q;
 86 |     p = str;
 87 |     q = val;
 88 |     while (*q != '\0') {
 89 |         if (*p != *q)
 90 |             return 0;
 91 |         p++;
 92 |         q++;
 93 |     }
 94 |     if (ptr)
 95 |         *ptr = p;
 96 |     return 1;
 97 | }
 98 | 
 99 | int has_suffix(const char *str, const char *suffix)
100 | {
101 |     size_t len = strlen(str);
102 |     size_t slen = strlen(suffix);
103 |     return (len >= slen && !memcmp(str + len - slen, suffix, slen));
104 | }
105 | 
106 | /* Dynamic buffer package */
107 | 
108 | static void *dbuf_default_realloc(void *opaque, void *ptr, size_t size)
109 | {
110 |     return realloc(ptr, size);
111 | }
112 | 
113 | void dbuf_init2(DynBuf *s, void *opaque, DynBufReallocFunc *realloc_func)
114 | {
115 |     memset(s, 0, sizeof(*s));
116 |     if (!realloc_func)
117 |         realloc_func = dbuf_default_realloc;
118 |     s->opaque = opaque;
119 |     s->realloc_func = realloc_func;
120 | }
121 | 
122 | void dbuf_init(DynBuf *s)
123 | {
124 |     dbuf_init2(s, NULL, NULL);
125 | }
126 | 
127 | /* return < 0 if error */
128 | int dbuf_realloc(DynBuf *s, size_t new_size)
129 | {
130 |     size_t size;
131 |     uint8_t *new_buf;
132 |     if (new_size > s->allocated_size) {
133 |         if (s->error)
134 |             return -1;
135 |         size = s->allocated_size * 3 / 2;
136 |         if (size > new_size)
137 |             new_size = size;
138 |         new_buf = s->realloc_func(s->opaque, s->buf, new_size);
139 |         if (!new_buf) {
140 |             s->error = TRUE;
141 |             return -1;
142 |         }
143 |         s->buf = new_buf;
144 |         s->allocated_size = new_size;
145 |     }
146 |     return 0;
147 | }
148 | 
149 | int dbuf_write(DynBuf *s, size_t offset, const uint8_t *data, size_t len)
150 | {
151 |     size_t end;
152 |     end = offset + len;
153 |     if (dbuf_realloc(s, end))
154 |         return -1;
155 |     memcpy(s->buf + offset, data, len);
156 |     if (end > s->size)
157 |         s->size = end;
158 |     return 0;
159 | }
160 | 
161 | int dbuf_put(DynBuf *s, const uint8_t *data, size_t len)
162 | {
163 |     if (unlikely((s->size + len) > s->allocated_size)) {
164 |         if (dbuf_realloc(s, s->size + len))
165 |             return -1;
166 |     }
167 |     memcpy(s->buf + s->size, data, len);
168 |     s->size += len;
169 |     return 0;
170 | }
171 | 
172 | int dbuf_put_self(DynBuf *s, size_t offset, size_t len)
173 | {
174 |     if (unlikely((s->size + len) > s->allocated_size)) {
175 |         if (dbuf_realloc(s, s->size + len))
176 |             return -1;
177 |     }
178 |     memcpy(s->buf + s->size, s->buf + offset, len);
179 |     s->size += len;
180 |     return 0;
181 | }
182 | 
183 | int dbuf_putc(DynBuf *s, uint8_t c)
184 | {
185 |     return dbuf_put(s, &c, 1);
186 | }
187 | 
188 | int dbuf_putstr(DynBuf *s, const char *str)
189 | {
190 |     return dbuf_put(s, (const uint8_t *)str, strlen(str));
191 | }
192 | 
193 | //  __attribute__((format(printf, 2, 3)))
194 | int __js_printf_like(2, 3) dbuf_printf(DynBuf* s, const char* fmt, ...)
195 | {
196 |     va_list ap;
197 |     char buf[128];
198 |     int len;
199 | 
200 |     va_start(ap, fmt);
201 |     len = vsnprintf(buf, sizeof(buf), fmt, ap);
202 |     va_end(ap);
203 |     if (len < sizeof(buf)) {
204 |         /* fast case */
205 |         return dbuf_put(s, (uint8_t *)buf, len);
206 |     } else {
207 |         va_start(ap, fmt);
208 |         int real_len = vsnprintf(0, 0, fmt, ap);
209 |         va_end(ap);
210 | 
211 |         if (dbuf_realloc(s, s->size + real_len + 1))
212 |             return -1;
213 | 
214 |         va_start(ap, fmt);
215 |         vsnprintf((char *)(s->buf + s->size), s->allocated_size - s->size,
216 |                   fmt, ap);
217 |         va_end(ap);
218 |         s->size += real_len;
219 |     }
220 |     return 0;
221 | }
222 | 
223 | void dbuf_free(DynBuf *s)
224 | {
225 |     /* we test s->buf as a fail safe to avoid crashing if dbuf_free()
226 |        is called twice */
227 |     if (s->buf) {
228 |         s->realloc_func(s->opaque, s->buf, 0);
229 |     }
230 |     memset(s, 0, sizeof(*s));
231 | }
232 | 
233 | /* Note: at most 31 bits are encoded. At most UTF8_CHAR_LEN_MAX bytes
234 |    are output. */
235 | int unicode_to_utf8(uint8_t *buf, unsigned int c)
236 | {
237 |     uint8_t *q = buf;
238 | 
239 |     if (c < 0x80) {
240 |         *q++ = c;
241 |     } else {
242 |         if (c < 0x800) {
243 |             *q++ = (c >> 6) | 0xc0;
244 |         } else {
245 |             if (c < 0x10000) {
246 |                 *q++ = (c >> 12) | 0xe0;
247 |             } else {
248 |                 if (c < 0x00200000) {
249 |                     *q++ = (c >> 18) | 0xf0;
250 |                 } else {
251 |                     if (c < 0x04000000) {
252 |                         *q++ = (c >> 24) | 0xf8;
253 |                     } else if (c < 0x80000000) {
254 |                         *q++ = (c >> 30) | 0xfc;
255 |                         *q++ = ((c >> 24) & 0x3f) | 0x80;
256 |                     } else {
257 |                         return 0;
258 |                     }
259 |                     *q++ = ((c >> 18) & 0x3f) | 0x80;
260 |                 }
261 |                 *q++ = ((c >> 12) & 0x3f) | 0x80;
262 |             }
263 |             *q++ = ((c >> 6) & 0x3f) | 0x80;
264 |         }
265 |         *q++ = (c & 0x3f) | 0x80;
266 |     }
267 |     return (int)(q - buf);
268 | }
269 | 
270 | static const unsigned int utf8_min_code[5] = {
271 |     0x80, 0x800, 0x10000, 0x00200000, 0x04000000,
272 | };
273 | 
274 | static const unsigned char utf8_first_code_mask[5] = {
275 |     0x1f, 0xf, 0x7, 0x3, 0x1,
276 | };
277 | 
278 | /* return -1 if error. *pp is not updated in this case. max_len must
279 |    be >= 1. The maximum length for a UTF8 byte sequence is 6 bytes. */
280 | int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp)
281 | {
282 |     int l, c, b, i;
283 | 
284 |     c = *p++;
285 |     if (c < 0x80) {
286 |         *pp = p;
287 |         return c;
288 |     }
289 |     switch(c) {
290 |     case 0xc0: case 0xc1: case 0xc2: case 0xc3:
291 |     case 0xc4: case 0xc5: case 0xc6: case 0xc7:
292 |     case 0xc8: case 0xc9: case 0xca: case 0xcb:
293 |     case 0xcc: case 0xcd: case 0xce: case 0xcf:
294 |     case 0xd0: case 0xd1: case 0xd2: case 0xd3:
295 |     case 0xd4: case 0xd5: case 0xd6: case 0xd7:
296 |     case 0xd8: case 0xd9: case 0xda: case 0xdb:
297 |     case 0xdc: case 0xdd: case 0xde: case 0xdf:
298 |         l = 1;
299 |         break;
300 |     case 0xe0: case 0xe1: case 0xe2: case 0xe3:
301 |     case 0xe4: case 0xe5: case 0xe6: case 0xe7:
302 |     case 0xe8: case 0xe9: case 0xea: case 0xeb:
303 |     case 0xec: case 0xed: case 0xee: case 0xef:
304 |         l = 2;
305 |         break;
306 |     case 0xf0: case 0xf1: case 0xf2: case 0xf3:
307 |     case 0xf4: case 0xf5: case 0xf6: case 0xf7:
308 |         l = 3;
309 |         break;
310 |     case 0xf8: case 0xf9: case 0xfa: case 0xfb:
311 |         l = 4;
312 |         break;
313 |     case 0xfc: case 0xfd:
314 |         l = 5;
315 |         break;
316 |     default:
317 |         return -1;
318 |     }
319 |     /* check that we have enough characters */
320 |     if (l > (max_len - 1))
321 |         return -1;
322 |     c &= utf8_first_code_mask[l - 1];
323 |     for(i = 0; i < l; i++) {
324 |         b = *p++;
325 |         if (b < 0x80 || b >= 0xc0)
326 |             return -1;
327 |         c = (c << 6) | (b & 0x3f);
328 |     }
329 |     if (c < (int)utf8_min_code[l - 1])
330 |         return -1;
331 |     *pp = p;
332 |     return c;
333 | }
334 | 
335 | #if 0
336 | 
337 | #if defined(EMSCRIPTEN) || defined(__ANDROID__)
338 | 
339 | static void *rqsort_arg;
340 | static int (*rqsort_cmp)(const void *, const void *, void *);
341 | 
342 | static int rqsort_cmp2(const void *p1, const void *p2)
343 | {
344 |     return rqsort_cmp(p1, p2, rqsort_arg);
345 | }
346 | 
347 | /* not reentrant, but not needed with emscripten */
348 | void rqsort(void *base, size_t nmemb, size_t size,
349 |             int (*cmp)(const void *, const void *, void *),
350 |             void *arg)
351 | {
352 |     rqsort_arg = arg;
353 |     rqsort_cmp = cmp;
354 |     qsort(base, nmemb, size, rqsort_cmp2);
355 | }
356 | 
357 | #endif
358 | 
359 | #else
360 | 
361 | typedef void (*exchange_f)(void *a, void *b, size_t size);
362 | typedef int (*cmp_f)(const void *, const void *, void *opaque);
363 | 
364 | static void exchange_bytes(void *a, void *b, size_t size) {
365 |     uint8_t *ap = (uint8_t *)a;
366 |     uint8_t *bp = (uint8_t *)b;
367 | 
368 |     while (size-- != 0) {
369 |         uint8_t t = *ap;
370 |         *ap++ = *bp;
371 |         *bp++ = t;
372 |     }
373 | }
374 | 
375 | static void exchange_one_byte(void *a, void *b, size_t size) {
376 |     uint8_t *ap = (uint8_t *)a;
377 |     uint8_t *bp = (uint8_t *)b;
378 |     uint8_t t = *ap;
379 |     *ap = *bp;
380 |     *bp = t;
381 | }
382 | 
383 | static void exchange_int16s(void *a, void *b, size_t size) {
384 |     uint16_t *ap = (uint16_t *)a;
385 |     uint16_t *bp = (uint16_t *)b;
386 | 
387 |     for (size /= sizeof(uint16_t); size-- != 0;) {
388 |         uint16_t t = *ap;
389 |         *ap++ = *bp;
390 |         *bp++ = t;
391 |     }
392 | }
393 | 
394 | static void exchange_one_int16(void *a, void *b, size_t size) {
395 |     uint16_t *ap = (uint16_t *)a;
396 |     uint16_t *bp = (uint16_t *)b;
397 |     uint16_t t = *ap;
398 |     *ap = *bp;
399 |     *bp = t;
400 | }
401 | 
402 | static void exchange_int32s(void *a, void *b, size_t size) {
403 |     uint32_t *ap = (uint32_t *)a;
404 |     uint32_t *bp = (uint32_t *)b;
405 | 
406 |     for (size /= sizeof(uint32_t); size-- != 0;) {
407 |         uint32_t t = *ap;
408 |         *ap++ = *bp;
409 |         *bp++ = t;
410 |     }
411 | }
412 | 
413 | static void exchange_one_int32(void *a, void *b, size_t size) {
414 |     uint32_t *ap = (uint32_t *)a;
415 |     uint32_t *bp = (uint32_t *)b;
416 |     uint32_t t = *ap;
417 |     *ap = *bp;
418 |     *bp = t;
419 | }
420 | 
421 | static void exchange_int64s(void *a, void *b, size_t size) {
422 |     uint64_t *ap = (uint64_t *)a;
423 |     uint64_t *bp = (uint64_t *)b;
424 | 
425 |     for (size /= sizeof(uint64_t); size-- != 0;) {
426 |         uint64_t t = *ap;
427 |         *ap++ = *bp;
428 |         *bp++ = t;
429 |     }
430 | }
431 | 
432 | static void exchange_one_int64(void *a, void *b, size_t size) {
433 |     uint64_t *ap = (uint64_t *)a;
434 |     uint64_t *bp = (uint64_t *)b;
435 |     uint64_t t = *ap;
436 |     *ap = *bp;
437 |     *bp = t;
438 | }
439 | 
440 | static void exchange_int128s(void *a, void *b, size_t size) {
441 |     uint64_t *ap = (uint64_t *)a;
442 |     uint64_t *bp = (uint64_t *)b;
443 | 
444 |     for (size /= sizeof(uint64_t) * 2; size-- != 0; ap += 2, bp += 2) {
445 |         uint64_t t = ap[0];
446 |         uint64_t u = ap[1];
447 |         ap[0] = bp[0];
448 |         ap[1] = bp[1];
449 |         bp[0] = t;
450 |         bp[1] = u;
451 |     }
452 | }
453 | 
454 | static void exchange_one_int128(void *a, void *b, size_t size) {
455 |     uint64_t *ap = (uint64_t *)a;
456 |     uint64_t *bp = (uint64_t *)b;
457 |     uint64_t t = ap[0];
458 |     uint64_t u = ap[1];
459 |     ap[0] = bp[0];
460 |     ap[1] = bp[1];
461 |     bp[0] = t;
462 |     bp[1] = u;
463 | }
464 | 
465 | static inline exchange_f exchange_func(const void *base, size_t size) {
466 |     switch (((uintptr_t)base | (uintptr_t)size) & 15) {
467 |     case 0:
468 |         if (size == sizeof(uint64_t) * 2)
469 |             return exchange_one_int128;
470 |         else
471 |             return exchange_int128s;
472 |     case 8:
473 |         if (size == sizeof(uint64_t))
474 |             return exchange_one_int64;
475 |         else
476 |             return exchange_int64s;
477 |     case 4:
478 |     case 12:
479 |         if (size == sizeof(uint32_t))
480 |             return exchange_one_int32;
481 |         else
482 |             return exchange_int32s;
483 |     case 2:
484 |     case 6:
485 |     case 10:
486 |     case 14:
487 |         if (size == sizeof(uint16_t))
488 |             return exchange_one_int16;
489 |         else
490 |             return exchange_int16s;
491 |     default:
492 |         if (size == 1)
493 |             return exchange_one_byte;
494 |         else
495 |             return exchange_bytes;
496 |     }
497 | }
498 | 
499 | static void heapsortx(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque)
500 | {
501 |     uint8_t *basep = (uint8_t *)base;
502 |     size_t i, n, c, r;
503 |     exchange_f swap = exchange_func(base, size);
504 | 
505 |     if (nmemb > 1) {
506 |         i = (nmemb / 2) * size;
507 |         n = nmemb * size;
508 | 
509 |         while (i > 0) {
510 |             i -= size;
511 |             for (r = i; (c = r * 2 + size) < n; r = c) {
512 |                 if (c < n - size && cmp(basep + c, basep + c + size, opaque) <= 0)
513 |                     c += size;
514 |                 if (cmp(basep + r, basep + c, opaque) > 0)
515 |                     break;
516 |                 swap(basep + r, basep + c, size);
517 |             }
518 |         }
519 |         for (i = n - size; i > 0; i -= size) {
520 |             swap(basep, basep + i, size);
521 | 
522 |             for (r = 0; (c = r * 2 + size) < i; r = c) {
523 |                 if (c < i - size && cmp(basep + c, basep + c + size, opaque) <= 0)
524 |                     c += size;
525 |                 if (cmp(basep + r, basep + c, opaque) > 0)
526 |                     break;
527 |                 swap(basep + r, basep + c, size);
528 |             }
529 |         }
530 |     }
531 | }
532 | 
533 | static inline void *med3(void *a, void *b, void *c, cmp_f cmp, void *opaque)
534 | {
535 |     return cmp(a, b, opaque) < 0 ?
536 |         (cmp(b, c, opaque) < 0 ? b : (cmp(a, c, opaque) < 0 ? c : a )) :
537 |         (cmp(b, c, opaque) > 0 ? b : (cmp(a, c, opaque) < 0 ? a : c ));
538 | }
539 | 
540 | /* pointer based version with local stack and insertion sort threshold */
541 | void rqsort(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque)
542 | {
543 |     struct { uint8_t *base; size_t count; int depth; } stack[50], *sp = stack;
544 |     uint8_t *ptr, *pi, *pj, *plt, *pgt, *top, *m;
545 |     size_t m4, i, lt, gt, span, span2;
546 |     int c, depth;
547 |     exchange_f swap = exchange_func(base, size);
548 |     exchange_f swap_block = exchange_func(base, size | 128);
549 | 
550 |     if (nmemb < 2 || size <= 0)
551 |         return;
552 | 
553 |     sp->base = (uint8_t *)base;
554 |     sp->count = nmemb;
555 |     sp->depth = 0;
556 |     sp++;
557 | 
558 |     while (sp > stack) {
559 |         sp--;
560 |         ptr = sp->base;
561 |         nmemb = sp->count;
562 |         depth = sp->depth;
563 | 
564 |         while (nmemb > 6) {
565 |             if (++depth > 50) {
566 |                 /* depth check to ensure worst case logarithmic time */
567 |                 heapsortx(ptr, nmemb, size, cmp, opaque);
568 |                 nmemb = 0;
569 |                 break;
570 |             }
571 |             /* select median of 3 from 1/4, 1/2, 3/4 positions */
572 |             /* should use median of 5 or 9? */
573 |             m4 = (nmemb >> 2) * size;
574 |             m = med3(ptr + m4, ptr + 2 * m4, ptr + 3 * m4, cmp, opaque);
575 |             swap(ptr, m, size);  /* move the pivot to the start or the array */
576 |             i = lt = 1;
577 |             pi = plt = ptr + size;
578 |             gt = nmemb;
579 |             pj = pgt = top = ptr + nmemb * size;
580 |             for (;;) {
581 |                 while (pi < pj && (c = cmp(ptr, pi, opaque)) >= 0) {
582 |                     if (c == 0) {
583 |                         swap(plt, pi, size);
584 |                         lt++;
585 |                         plt += size;
586 |                     }
587 |                     i++;
588 |                     pi += size;
589 |                 }
590 |                 while (pi < (pj -= size) && (c = cmp(ptr, pj, opaque)) <= 0) {
591 |                     if (c == 0) {
592 |                         gt--;
593 |                         pgt -= size;
594 |                         swap(pgt, pj, size);
595 |                     }
596 |                 }
597 |                 if (pi >= pj)
598 |                     break;
599 |                 swap(pi, pj, size);
600 |                 i++;
601 |                 pi += size;
602 |             }
603 |             /* array has 4 parts:
604 |              * from 0 to lt excluded: elements identical to pivot
605 |              * from lt to pi excluded: elements smaller than pivot
606 |              * from pi to gt excluded: elements greater than pivot
607 |              * from gt to n excluded: elements identical to pivot
608 |              */
609 |             /* move elements identical to pivot in the middle of the array: */
610 |             /* swap values in ranges [0..lt[ and [i-lt..i[
611 |                swapping the smallest span between lt and i-lt is sufficient
612 |              */
613 |             span = plt - ptr;
614 |             span2 = pi - plt;
615 |             lt = i - lt;
616 |             if (span > span2)
617 |                 span = span2;
618 |             swap_block(ptr, pi - span, span);
619 |             /* swap values in ranges [gt..top[ and [i..top-(top-gt)[
620 |                swapping the smallest span between top-gt and gt-i is sufficient
621 |              */
622 |             span = top - pgt;
623 |             span2 = pgt - pi;
624 |             pgt = top - span2;
625 |             gt = nmemb - (gt - i);
626 |             if (span > span2)
627 |                 span = span2;
628 |             swap_block(pi, top - span, span);
629 | 
630 |             /* now array has 3 parts:
631 |              * from 0 to lt excluded: elements smaller than pivot
632 |              * from lt to gt excluded: elements identical to pivot
633 |              * from gt to n excluded: elements greater than pivot
634 |              */
635 |             /* stack the larger segment and keep processing the smaller one
636 |                to minimize stack use for pathological distributions */
637 |             if (lt > nmemb - gt) {
638 |                 sp->base = ptr;
639 |                 sp->count = lt;
640 |                 sp->depth = depth;
641 |                 sp++;
642 |                 ptr = pgt;
643 |                 nmemb -= gt;
644 |             } else {
645 |                 sp->base = pgt;
646 |                 sp->count = nmemb - gt;
647 |                 sp->depth = depth;
648 |                 sp++;
649 |                 nmemb = lt;
650 |             }
651 |         }
652 |         /* Use insertion sort for small fragments */
653 |         for (pi = ptr + size, top = ptr + nmemb * size; pi < top; pi += size) {
654 |             for (pj = pi; pj > ptr && cmp(pj - size, pj, opaque) > 0; pj -= size)
655 |                 swap(pj, pj - size, size);
656 |         }
657 |     }
658 | }
659 | 
660 | #endif
661 | 


--------------------------------------------------------------------------------
/cutils.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * C utilities
  3 |  *
  4 |  * Copyright (c) 2017 Fabrice Bellard
  5 |  * Copyright (c) 2018 Charlie Gordon
  6 |  *
  7 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  8 |  * of this software and associated documentation files (the "Software"), to deal
  9 |  * in the Software without restriction, including without limitation the rights
 10 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the Software is
 12 |  * furnished to do so, subject to the following conditions:
 13 |  *
 14 |  * The above copyright notice and this permission notice shall be included in
 15 |  * all copies or substantial portions of the Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 20 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 |  * THE SOFTWARE.
 24 |  */
 25 | #ifndef CUTILS_H
 26 | #define CUTILS_H
 27 | 
 28 | #include <stdlib.h>
 29 | #include <inttypes.h>
 30 | 
 31 | #include "quickjs.h"
 32 | 
 33 | #ifdef _MSC_VER
 34 | #include <windows.h>
 35 | #include <intrin.h>
 36 | #include <malloc.h>
 37 | #else
 38 | #include <alloca.h>
 39 | #endif
 40 | 
 41 | 
 42 | /* set if CPU is big endian */
 43 | #undef WORDS_BIGENDIAN
 44 | 
 45 | #if !defined(__GNUC__) && !defined(__clang__)
 46 | #undef __attribute__
 47 | #define __attribute__(x)
 48 | 
 49 | #undef __builtin_expect
 50 | #define __builtin_expect(cond, m)	(cond)
 51 | #endif
 52 | 
 53 | #if defined(_MSC_VER)
 54 | #define likely(x)    (x)
 55 | #define unlikely(x)  (x)
 56 | #define force_inline __forceinline
 57 | #define no_inline __declspec(noinline)
 58 | #define __maybe_unused
 59 | #define __js_printf_like(a, b)
 60 | #define __attribute__(x)
 61 | #define __attribute(x)
 62 | typedef intptr_t ssize_t;
 63 | #else
 64 | #define likely(x)       __builtin_expect(!!(x), 1)
 65 | #define unlikely(x)     __builtin_expect(!!(x), 0)
 66 | #define force_inline inline __attribute__((always_inline))
 67 | #define no_inline __attribute__((noinline))
 68 | #define __maybe_unused __attribute__((unused))
 69 | #define __js_printf_like(f, a)   __attribute__((format(printf, f, a)))
 70 | #endif
 71 | 
 72 | #define xglue(x, y) x ## y
 73 | #define glue(x, y) xglue(x, y)
 74 | #define stringify(s)    tostring(s)
 75 | #define tostring(s)     #s
 76 | 
 77 | #ifndef offsetof
 78 | #define offsetof(type, field) ((size_t) &((type *)0)->field)
 79 | #endif
 80 | #ifndef countof
 81 | #define countof(x) (sizeof(x) / sizeof((x)[0]))
 82 | #endif
 83 | 
 84 | typedef int BOOL;
 85 | 
 86 | #ifndef FALSE
 87 | enum {
 88 |     FALSE = 0,
 89 |     TRUE = 1,
 90 | };
 91 | #endif
 92 | 
 93 | #ifndef no_return
 94 | #if defined(__GNUC__) || defined(__clang__)
 95 | #define no_return __attribute__ ((noreturn))
 96 | #elif defined(_MSC_VER)
 97 | #define no_return __declspec(noreturn)
 98 | #else
 99 | #define no_return
100 | #endif
101 | #endif
102 | 
103 | void pstrcpy(char *buf, int buf_size, const char *str);
104 | char *pstrcat(char *buf, int buf_size, const char *s);
105 | int strstart(const char *str, const char *val, const char **ptr);
106 | int has_suffix(const char *str, const char *suffix);
107 | 
108 | static inline int max_int(int a, int b)
109 | {
110 |     if (a > b)
111 |         return a;
112 |     else
113 |         return b;
114 | }
115 | 
116 | static inline int min_int(int a, int b)
117 | {
118 |     if (a < b)
119 |         return a;
120 |     else
121 |         return b;
122 | }
123 | 
124 | static inline uint32_t max_uint32(uint32_t a, uint32_t b)
125 | {
126 |     if (a > b)
127 |         return a;
128 |     else
129 |         return b;
130 | }
131 | 
132 | static inline uint32_t min_uint32(uint32_t a, uint32_t b)
133 | {
134 |     if (a < b)
135 |         return a;
136 |     else
137 |         return b;
138 | }
139 | 
140 | static inline int64_t max_int64(int64_t a, int64_t b)
141 | {
142 |     if (a > b)
143 |         return a;
144 |     else
145 |         return b;
146 | }
147 | 
148 | static inline int64_t min_int64(int64_t a, int64_t b)
149 | {
150 |     if (a < b)
151 |         return a;
152 |     else
153 |         return b;
154 | }
155 | 
156 | 
157 | // this chunk ripped from https://github.com/llvm-mirror/libcxx/blob/9dcbb46826fd4d29b1485f25e8986d36019a6dca/include/support/win32/support.h#L106-L182
158 | #if defined(_MSC_VER)
159 | 
160 | // Bit builtin's make these assumptions when calling _BitScanForward/Reverse
161 | // etc. These assumptions are expected to be true for Win32/Win64 which this
162 | // file supports.
163 | static_assert(sizeof(unsigned long long) == 8, "");
164 | static_assert(sizeof(unsigned long) == 4, "");
165 | static_assert(sizeof(unsigned int) == 4, "");
166 | 
167 | static inline int __builtin_popcount(unsigned int x)
168 | {
169 | 	// Binary: 0101...
170 | 	static const unsigned int m1 = 0x55555555;
171 | 	// Binary: 00110011..
172 | 	static const unsigned int m2 = 0x33333333;
173 | 	// Binary:  4 zeros,  4 ones ...
174 | 	static const unsigned int m4 = 0x0f0f0f0f;
175 | 	// The sum of 256 to the power of 0,1,2,3...
176 | 	static const unsigned int h01 = 0x01010101;
177 | 	// Put count of each 2 bits into those 2 bits.
178 | 	x -= (x >> 1) & m1;
179 | 	// Put count of each 4 bits into those 4 bits.
180 | 	x = (x & m2) + ((x >> 2) & m2);
181 | 	// Put count of each 8 bits into those 8 bits.
182 | 	x = (x + (x >> 4)) & m4;
183 | 	// Returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24).
184 | 	return (x * h01) >> 24;
185 | }
186 | 
187 | static inline int __builtin_popcountl(unsigned long x)
188 | {
189 | 	return __builtin_popcount((int)(x));
190 | }
191 | 
192 | static inline int __builtin_popcountll(unsigned long long x)
193 | {
194 | 	// Binary: 0101...
195 | 	static const unsigned long long m1 = 0x5555555555555555;
196 | 	// Binary: 00110011..
197 | 	static const unsigned long long m2 = 0x3333333333333333;
198 | 	// Binary:  4 zeros,  4 ones ...
199 | 	static const unsigned long long m4 = 0x0f0f0f0f0f0f0f0f;
200 | 	// The sum of 256 to the power of 0,1,2,3...
201 | 	static const unsigned long long h01 = 0x0101010101010101;
202 | 	// Put count of each 2 bits into those 2 bits.
203 | 	x -= (x >> 1) & m1;
204 | 	// Put count of each 4 bits into those 4 bits.
205 | 	x = (x & m2) + ((x >> 2) & m2);
206 | 	// Put count of each 8 bits into those 8 bits.
207 | 	x = (x + (x >> 4)) & m4;
208 | 	// Returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
209 | 	return (int)((x * h01) >> 56);
210 | }
211 | 
212 | // Returns the number of trailing 0-bits in x, starting at the least significant
213 | // bit position. If x is 0, the result is undefined.
214 | static inline int __builtin_ctzll(unsigned long long mask)
215 | {
216 | 	unsigned long where;
217 | 	// Search from LSB to MSB for first set bit.
218 | 	// Returns zero if no set bit is found.
219 | #if INTPTR_MAX >= INT64_MAX // 64-bit
220 | 	if (_BitScanForward64(&where, mask))
221 | 		return (int)(where);
222 | #else
223 |   // Win32 doesn't have _BitScanForward64 so emulate it with two 32 bit calls.
224 |   // Scan the Low Word.
225 | 	if (_BitScanForward(&where, (unsigned long)(mask)))
226 | 		return (int)(where);
227 | 	// Scan the High Word.
228 | 	if (_BitScanForward(&where, (unsigned long)(mask >> 32)))
229 | 		return (int)(where + 32); // Create a bit offset from the LSB.
230 | #endif
231 | 	return 64;
232 | }
233 | 
234 | static inline int __builtin_ctzl(unsigned long mask)
235 | {
236 | 	unsigned long where;
237 | 	// Search from LSB to MSB for first set bit.
238 | 	// Returns zero if no set bit is found.
239 | 	if (_BitScanForward(&where, mask))
240 | 		return (int)(where);
241 | 	return 32;
242 | }
243 | 
244 | static inline int __builtin_ctz(unsigned int mask)
245 | {
246 | 	// Win32 and Win64 expectations.
247 | 	static_assert(sizeof(mask) == 4, "");
248 | 	static_assert(sizeof(unsigned long) == 4, "");
249 | 	return __builtin_ctzl((unsigned long)(mask));
250 | }
251 | 
252 | // Returns the number of leading 0-bits in x, starting at the most significant
253 | // bit position. If x is 0, the result is undefined.
254 | static inline int __builtin_clzll(unsigned long long mask)
255 | {
256 | 	unsigned long where;
257 | 	// BitScanReverse scans from MSB to LSB for first set bit.
258 | 	// Returns 0 if no set bit is found.
259 | #if INTPTR_MAX >= INT64_MAX // 64-bit
260 | 	if (_BitScanReverse64(&where, mask))
261 | 		return (int)(63 - where);
262 | #else
263 |   // Scan the high 32 bits.
264 | 	if (_BitScanReverse(&where, (unsigned long)(mask >> 32)))
265 | 		return (int)(63 -
266 | 			(where + 32)); // Create a bit offset from the MSB.
267 | // Scan the low 32 bits.
268 | 	if (_BitScanReverse(&where, (unsigned long)(mask)))
269 | 		return (int)(63 - where);
270 | #endif
271 | 	return 64; // Undefined Behavior.
272 | }
273 | 
274 | static inline int __builtin_clzl(unsigned long mask)
275 | {
276 | 	unsigned long where;
277 | 	// Search from LSB to MSB for first set bit.
278 | 	// Returns zero if no set bit is found.
279 | 	if (_BitScanReverse(&where, mask))
280 | 		return (int)(31 - where);
281 | 	return 32; // Undefined Behavior.
282 | }
283 | 
284 | static inline int __builtin_clz(unsigned int x)
285 | {
286 | 	return __builtin_clzl(x);
287 | }
288 | 
289 | #endif // _LIBCPP_MSVC
290 | 
291 | /* WARNING: undefined if a = 0 */
292 | static inline int clz32(unsigned int a)
293 | {
294 |     return __builtin_clz(a);
295 | }
296 | 
297 | /* WARNING: undefined if a = 0 */
298 | static inline int clz64(uint64_t a)
299 | {
300 |   return __builtin_clzll(a);
301 | }
302 | 
303 | /* WARNING: undefined if a = 0 */
304 | static inline int ctz32(unsigned int a)
305 | {
306 |     return __builtin_ctz(a);
307 | }
308 | 
309 | /* WARNING: undefined if a = 0 */
310 | static inline int ctz64(uint64_t a)
311 | {
312 |   return __builtin_ctzll(a);
313 | }
314 | 
315 | #ifdef _MSC_VER
316 | #pragma pack(push, 1)
317 | struct packed_u64 {
318 |     uint64_t v;
319 | };
320 | 
321 | struct packed_u32 {
322 |     uint32_t v;
323 | };
324 | 
325 | struct packed_u16 {
326 |     uint16_t v;
327 | };
328 | #pragma pack(pop)
329 | #else
330 | struct __attribute__((packed)) packed_u64 {
331 |     uint64_t v;
332 | };
333 | 
334 | struct __attribute__((packed)) packed_u32 {
335 |     uint32_t v;
336 | };
337 | 
338 | struct __attribute__((packed)) packed_u16 {
339 |     uint16_t v;
340 | };
341 | #endif
342 | 
343 | static inline uint64_t get_u64(const uint8_t *tab)
344 | {
345 |     return ((const struct packed_u64 *)tab)->v;
346 | }
347 | 
348 | static inline int64_t get_i64(const uint8_t *tab)
349 | {
350 |     return (int64_t)((const struct packed_u64 *)tab)->v;
351 | }
352 | 
353 | static inline void put_u64(uint8_t *tab, uint64_t val)
354 | {
355 |     ((struct packed_u64 *)tab)->v = val;
356 | }
357 | 
358 | static inline uint32_t get_u32(const uint8_t *tab)
359 | {
360 |     return ((const struct packed_u32 *)tab)->v;
361 | }
362 | 
363 | static inline int32_t get_i32(const uint8_t *tab)
364 | {
365 |     return (int32_t)((const struct packed_u32 *)tab)->v;
366 | }
367 | 
368 | static inline void put_u32(uint8_t *tab, uint32_t val)
369 | {
370 |     ((struct packed_u32 *)tab)->v = val;
371 | }
372 | 
373 | static inline uint32_t get_u16(const uint8_t *tab)
374 | {
375 |     return ((const struct packed_u16 *)tab)->v;
376 | }
377 | 
378 | static inline int32_t get_i16(const uint8_t *tab)
379 | {
380 |     return (int16_t)((const struct packed_u16 *)tab)->v;
381 | }
382 | 
383 | static inline void put_u16(uint8_t *tab, uint16_t val)
384 | {
385 |     ((struct packed_u16 *)tab)->v = val;
386 | }
387 | 
388 | static inline uint32_t get_u8(const uint8_t *tab)
389 | {
390 |     return *tab;
391 | }
392 | 
393 | static inline int32_t get_i8(const uint8_t *tab)
394 | {
395 |     return (int8_t)*tab;
396 | }
397 | 
398 | static inline void put_u8(uint8_t *tab, uint8_t val)
399 | {
400 |     *tab = val;
401 | }
402 | 
403 | static inline uint16_t bswap16(uint16_t x)
404 | {
405 |     return (x >> 8) | (x << 8);
406 | }
407 | 
408 | static inline uint32_t bswap32(uint32_t v)
409 | {
410 |     return ((v & 0xff000000) >> 24) | ((v & 0x00ff0000) >>  8) |
411 |         ((v & 0x0000ff00) <<  8) | ((v & 0x000000ff) << 24);
412 | }
413 | 
414 | static inline uint64_t bswap64(uint64_t v)
415 | {
416 |     return ((v & ((uint64_t)0xff << (7 * 8))) >> (7 * 8)) |
417 |         ((v & ((uint64_t)0xff << (6 * 8))) >> (5 * 8)) |
418 |         ((v & ((uint64_t)0xff << (5 * 8))) >> (3 * 8)) |
419 |         ((v & ((uint64_t)0xff << (4 * 8))) >> (1 * 8)) |
420 |         ((v & ((uint64_t)0xff << (3 * 8))) << (1 * 8)) |
421 |         ((v & ((uint64_t)0xff << (2 * 8))) << (3 * 8)) |
422 |         ((v & ((uint64_t)0xff << (1 * 8))) << (5 * 8)) |
423 |         ((v & ((uint64_t)0xff << (0 * 8))) << (7 * 8));
424 | }
425 | 
426 | /* XXX: should take an extra argument to pass slack information to the caller */
427 | typedef void *DynBufReallocFunc(void *opaque, void *ptr, size_t size);
428 | 
429 | typedef struct DynBuf {
430 |     uint8_t *buf;
431 |     size_t size;
432 |     size_t allocated_size;
433 |     BOOL error; /* true if a memory allocation error occurred */
434 |     DynBufReallocFunc *realloc_func;
435 |     void *opaque; /* for realloc_func */
436 | } DynBuf;
437 | 
438 | void dbuf_init(DynBuf *s);
439 | void dbuf_init2(DynBuf *s, void *opaque, DynBufReallocFunc *realloc_func);
440 | int dbuf_realloc(DynBuf *s, size_t new_size);
441 | int dbuf_write(DynBuf *s, size_t offset, const uint8_t *data, size_t len);
442 | int dbuf_put(DynBuf *s, const uint8_t *data, size_t len);
443 | int dbuf_put_self(DynBuf *s, size_t offset, size_t len);
444 | int dbuf_putc(DynBuf *s, uint8_t c);
445 | int dbuf_putstr(DynBuf *s, const char *str);
446 | 
447 | static inline int dbuf_put_u16(DynBuf *s, uint16_t val)
448 | {
449 |     return dbuf_put(s, (uint8_t *)&val, 2);
450 | }
451 | static inline int dbuf_put_u32(DynBuf *s, uint32_t val)
452 | {
453 |     return dbuf_put(s, (uint8_t *)&val, 4);
454 | }
455 | static inline int dbuf_put_u64(DynBuf *s, uint64_t val)
456 | {
457 |     return dbuf_put(s, (uint8_t *)&val, 8);
458 | }
459 | int __js_printf_like(2, 3) dbuf_printf(DynBuf* s, const char* fmt, ...);
460 | void dbuf_free(DynBuf *s);
461 | static inline BOOL dbuf_error(DynBuf *s) {
462 |     return s->error;
463 | }
464 | static inline void dbuf_set_error(DynBuf *s)
465 | {
466 |     s->error = TRUE;
467 | }
468 | 
469 | #define UTF8_CHAR_LEN_MAX 6
470 | 
471 | int unicode_to_utf8(uint8_t *buf, unsigned int c);
472 | int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp);
473 | 
474 | static inline int from_hex(int c)
475 | {
476 |     if (c >= '0' && c <= '9')
477 |         return c - '0';
478 |     else if (c >= 'A' && c <= 'F')
479 |         return c - 'A' + 10;
480 |     else if (c >= 'a' && c <= 'f')
481 |         return c - 'a' + 10;
482 |     else
483 |         return -1;
484 | }
485 | 
486 | void rqsort(void *base, size_t nmemb, size_t size,
487 |             int (*cmp)(const void *, const void *, void *),
488 |             void *arg);
489 | 
490 | #endif  /* CUTILS_H */
491 | 


--------------------------------------------------------------------------------
/libbf.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Tiny arbitrary precision floating point library
  3 |  *
  4 |  * Copyright (c) 2017-2021 Fabrice Bellard
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | #ifndef LIBBF_H
 25 | #define LIBBF_H
 26 | 
 27 | #include <stddef.h>
 28 | #include <stdint.h>
 29 | 
 30 | #if (INTPTR_MAX >= INT64_MAX) && !defined(_MSC_VER) /* MSVC has no __int128 in stdC mode */
 31 | #define LIMB_LOG2_BITS 6
 32 | #else
 33 | #define LIMB_LOG2_BITS 5
 34 | #endif
 35 | 
 36 | #define LIMB_BITS (1 << LIMB_LOG2_BITS)
 37 | 
 38 | #if (LIMB_BITS == 64)
 39 | 
 40 | typedef __int128 int128_t;
 41 | typedef unsigned __int128 uint128_t;
 42 | typedef int64_t slimb_t;
 43 | typedef uint64_t limb_t;
 44 | typedef uint128_t dlimb_t;
 45 | #define BF_RAW_EXP_MIN INT64_MIN
 46 | #define BF_RAW_EXP_MAX INT64_MAX
 47 | 
 48 | #define LIMB_DIGITS 19
 49 | #define BF_DEC_BASE UINT64_C(10000000000000000000)
 50 | 
 51 | #else
 52 | 
 53 | typedef int32_t slimb_t;
 54 | typedef uint32_t limb_t;
 55 | typedef uint64_t dlimb_t;
 56 | #define BF_RAW_EXP_MIN INT32_MIN
 57 | #define BF_RAW_EXP_MAX INT32_MAX
 58 | 
 59 | #define LIMB_DIGITS 9
 60 | #define BF_DEC_BASE 1000000000U
 61 | 
 62 | #endif
 63 | 
 64 | /* in bits */
 65 | /* minimum number of bits for the exponent */
 66 | #define BF_EXP_BITS_MIN 3
 67 | /* maximum number of bits for the exponent */
 68 | #define BF_EXP_BITS_MAX (LIMB_BITS - 3)
 69 | /* extended range for exponent, used internally */
 70 | #define BF_EXT_EXP_BITS_MAX (BF_EXP_BITS_MAX + 1)
 71 | /* minimum possible precision */
 72 | #define BF_PREC_MIN 2
 73 | /* minimum possible precision */
 74 | #define BF_PREC_MAX (((limb_t)1 << (LIMB_BITS - 2)) - 2)
 75 | /* some operations support infinite precision */
 76 | #define BF_PREC_INF (BF_PREC_MAX + 1) /* infinite precision */
 77 | 
 78 | #if LIMB_BITS == 64
 79 | #define BF_CHKSUM_MOD (UINT64_C(975620677) * UINT64_C(9795002197))
 80 | #else
 81 | #define BF_CHKSUM_MOD 975620677U
 82 | #endif
 83 | 
 84 | #define BF_EXP_ZERO BF_RAW_EXP_MIN
 85 | #define BF_EXP_INF (BF_RAW_EXP_MAX - 1)
 86 | #define BF_EXP_NAN BF_RAW_EXP_MAX
 87 | 
 88 | /* +/-zero is represented with expn = BF_EXP_ZERO and len = 0,
 89 |    +/-infinity is represented with expn = BF_EXP_INF and len = 0,
 90 |    NaN is represented with expn = BF_EXP_NAN and len = 0 (sign is ignored)
 91 |  */
 92 | typedef struct {
 93 |     struct bf_context_t *ctx;
 94 |     int sign;
 95 |     slimb_t expn;
 96 |     limb_t len;
 97 |     limb_t *tab;
 98 | } bf_t;
 99 | 
100 | typedef struct {
101 |     /* must be kept identical to bf_t */
102 |     struct bf_context_t *ctx;
103 |     int sign;
104 |     slimb_t expn;
105 |     limb_t len;
106 |     limb_t *tab;
107 | } bfdec_t;
108 | 
109 | typedef enum {
110 |     BF_RNDN, /* round to nearest, ties to even */
111 |     BF_RNDZ, /* round to zero */
112 |     BF_RNDD, /* round to -inf (the code relies on (BF_RNDD xor BF_RNDU) = 1) */
113 |     BF_RNDU, /* round to +inf */
114 |     BF_RNDNA, /* round to nearest, ties away from zero */
115 |     BF_RNDA, /* round away from zero */
116 |     BF_RNDF, /* faithful rounding (nondeterministic, either RNDD or RNDU,
117 |                 inexact flag is always set)  */
118 | } bf_rnd_t;
119 | 
120 | /* allow subnormal numbers. Only available if the number of exponent
121 |    bits is <= BF_EXP_BITS_USER_MAX and prec != BF_PREC_INF. */
122 | #define BF_FLAG_SUBNORMAL (1 << 3)
123 | /* 'prec' is the precision after the radix point instead of the whole
124 |    mantissa. Can only be used with bf_round() and
125 |    bfdec_[add|sub|mul|div|sqrt|round](). */
126 | #define BF_FLAG_RADPNT_PREC (1 << 4)
127 | 
128 | #define BF_RND_MASK 0x7
129 | #define BF_EXP_BITS_SHIFT 5
130 | #define BF_EXP_BITS_MASK 0x3f
131 | 
132 | /* shortcut for bf_set_exp_bits(BF_EXT_EXP_BITS_MAX) */
133 | #define BF_FLAG_EXT_EXP (BF_EXP_BITS_MASK << BF_EXP_BITS_SHIFT)
134 | 
135 | /* contains the rounding mode and number of exponents bits */
136 | typedef uint32_t bf_flags_t;
137 | 
138 | typedef void *bf_realloc_func_t(void *opaque, void *ptr, size_t size);
139 | 
140 | typedef struct {
141 |     bf_t val;
142 |     limb_t prec;
143 | } BFConstCache;
144 | 
145 | typedef struct bf_context_t {
146 |     void *realloc_opaque;
147 |     bf_realloc_func_t *realloc_func;
148 |     BFConstCache log2_cache;
149 |     BFConstCache pi_cache;
150 |     struct BFNTTState *ntt_state;
151 | } bf_context_t;
152 | 
153 | static inline int bf_get_exp_bits(bf_flags_t flags)
154 | {
155 |     int e;
156 |     e = (flags >> BF_EXP_BITS_SHIFT) & BF_EXP_BITS_MASK;
157 |     if (e == BF_EXP_BITS_MASK)
158 |         return BF_EXP_BITS_MAX + 1;
159 |     else
160 |         return BF_EXP_BITS_MAX - e;
161 | }
162 | 
163 | static inline bf_flags_t bf_set_exp_bits(int n)
164 | {
165 |     return ((BF_EXP_BITS_MAX - n) & BF_EXP_BITS_MASK) << BF_EXP_BITS_SHIFT;
166 | }
167 | 
168 | /* returned status */
169 | #define BF_ST_INVALID_OP  (1 << 0)
170 | #define BF_ST_DIVIDE_ZERO (1 << 1)
171 | #define BF_ST_OVERFLOW    (1 << 2)
172 | #define BF_ST_UNDERFLOW   (1 << 3)
173 | #define BF_ST_INEXACT     (1 << 4)
174 | /* indicate that a memory allocation error occured. NaN is returned */
175 | #define BF_ST_MEM_ERROR   (1 << 5)
176 | 
177 | #define BF_RADIX_MAX 36 /* maximum radix for bf_atof() and bf_ftoa() */
178 | 
179 | static inline slimb_t bf_max(slimb_t a, slimb_t b)
180 | {
181 |     if (a > b)
182 |         return a;
183 |     else
184 |         return b;
185 | }
186 | 
187 | static inline slimb_t bf_min(slimb_t a, slimb_t b)
188 | {
189 |     if (a < b)
190 |         return a;
191 |     else
192 |         return b;
193 | }
194 | 
195 | void bf_context_init(bf_context_t *s, bf_realloc_func_t *realloc_func,
196 |                      void *realloc_opaque);
197 | void bf_context_end(bf_context_t *s);
198 | /* free memory allocated for the bf cache data */
199 | void bf_clear_cache(bf_context_t *s);
200 | 
201 | static inline void *bf_realloc(bf_context_t *s, void *ptr, size_t size)
202 | {
203 |     return s->realloc_func(s->realloc_opaque, ptr, size);
204 | }
205 | 
206 | /* 'size' must be != 0 */
207 | static inline void *bf_malloc(bf_context_t *s, size_t size)
208 | {
209 |     return bf_realloc(s, NULL, size);
210 | }
211 | 
212 | static inline void bf_free(bf_context_t *s, void *ptr)
213 | {
214 |     /* must test ptr otherwise equivalent to malloc(0) */
215 |     if (ptr)
216 |         bf_realloc(s, ptr, 0);
217 | }
218 | 
219 | void bf_init(bf_context_t *s, bf_t *r);
220 | 
221 | static inline void bf_delete(bf_t *r)
222 | {
223 |     bf_context_t *s = r->ctx;
224 |     /* we accept to delete a zeroed bf_t structure */
225 |     if (s && r->tab) {
226 |         bf_realloc(s, r->tab, 0);
227 |     }
228 | }
229 | 
230 | static inline void bf_neg(bf_t *r)
231 | {
232 |     r->sign ^= 1;
233 | }
234 | 
235 | static inline int bf_is_finite(const bf_t *a)
236 | {
237 |     return (a->expn < BF_EXP_INF);
238 | }
239 | 
240 | static inline int bf_is_nan(const bf_t *a)
241 | {
242 |     return (a->expn == BF_EXP_NAN);
243 | }
244 | 
245 | static inline int bf_is_zero(const bf_t *a)
246 | {
247 |     return (a->expn == BF_EXP_ZERO);
248 | }
249 | 
250 | static inline void bf_memcpy(bf_t *r, const bf_t *a)
251 | {
252 |     *r = *a;
253 | }
254 | 
255 | int bf_set_ui(bf_t *r, uint64_t a);
256 | int bf_set_si(bf_t *r, int64_t a);
257 | void bf_set_nan(bf_t *r);
258 | void bf_set_zero(bf_t *r, int is_neg);
259 | void bf_set_inf(bf_t *r, int is_neg);
260 | int bf_set(bf_t *r, const bf_t *a);
261 | void bf_move(bf_t *r, bf_t *a);
262 | int bf_get_float64(const bf_t *a, double *pres, bf_rnd_t rnd_mode);
263 | int bf_set_float64(bf_t *a, double d);
264 | 
265 | int bf_cmpu(const bf_t *a, const bf_t *b);
266 | int bf_cmp_full(const bf_t *a, const bf_t *b);
267 | int bf_cmp(const bf_t *a, const bf_t *b);
268 | static inline int bf_cmp_eq(const bf_t *a, const bf_t *b)
269 | {
270 |     return bf_cmp(a, b) == 0;
271 | }
272 | 
273 | static inline int bf_cmp_le(const bf_t *a, const bf_t *b)
274 | {
275 |     return bf_cmp(a, b) <= 0;
276 | }
277 | 
278 | static inline int bf_cmp_lt(const bf_t *a, const bf_t *b)
279 | {
280 |     return bf_cmp(a, b) < 0;
281 | }
282 | 
283 | int bf_add(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags);
284 | int bf_sub(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags);
285 | int bf_add_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec, bf_flags_t flags);
286 | int bf_mul(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags);
287 | int bf_mul_ui(bf_t *r, const bf_t *a, uint64_t b1, limb_t prec, bf_flags_t flags);
288 | int bf_mul_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec,
289 |               bf_flags_t flags);
290 | int bf_mul_2exp(bf_t *r, slimb_t e, limb_t prec, bf_flags_t flags);
291 | int bf_div(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags);
292 | #define BF_DIVREM_EUCLIDIAN BF_RNDF
293 | int bf_divrem(bf_t *q, bf_t *r, const bf_t *a, const bf_t *b,
294 |               limb_t prec, bf_flags_t flags, int rnd_mode);
295 | int bf_rem(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
296 |            bf_flags_t flags, int rnd_mode);
297 | int bf_remquo(slimb_t *pq, bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
298 |               bf_flags_t flags, int rnd_mode);
299 | /* round to integer with infinite precision */
300 | int bf_rint(bf_t *r, int rnd_mode);
301 | int bf_round(bf_t *r, limb_t prec, bf_flags_t flags);
302 | int bf_sqrtrem(bf_t *r, bf_t *rem1, const bf_t *a);
303 | int bf_sqrt(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags);
304 | slimb_t bf_get_exp_min(const bf_t *a);
305 | int bf_logic_or(bf_t *r, const bf_t *a, const bf_t *b);
306 | int bf_logic_xor(bf_t *r, const bf_t *a, const bf_t *b);
307 | int bf_logic_and(bf_t *r, const bf_t *a, const bf_t *b);
308 | 
309 | /* additional flags for bf_atof */
310 | /* do not accept hex radix prefix (0x or 0X) if radix = 0 or radix = 16 */
311 | #define BF_ATOF_NO_HEX       (1 << 16)
312 | /* accept binary (0b or 0B) or octal (0o or 0O) radix prefix if radix = 0 */
313 | #define BF_ATOF_BIN_OCT      (1 << 17)
314 | /* Do not parse NaN or Inf */
315 | #define BF_ATOF_NO_NAN_INF   (1 << 18)
316 | /* return the exponent separately */
317 | #define BF_ATOF_EXPONENT       (1 << 19)
318 | 
319 | int bf_atof(bf_t *a, const char *str, const char **pnext, int radix,
320 |             limb_t prec, bf_flags_t flags);
321 | /* this version accepts prec = BF_PREC_INF and returns the radix
322 |    exponent */
323 | int bf_atof2(bf_t *r, slimb_t *pexponent,
324 |              const char *str, const char **pnext, int radix,
325 |              limb_t prec, bf_flags_t flags);
326 | int bf_mul_pow_radix(bf_t *r, const bf_t *T, limb_t radix,
327 |                      slimb_t expn, limb_t prec, bf_flags_t flags);
328 | 
329 | 
330 | /* Conversion of floating point number to string. Return a null
331 |    terminated string or NULL if memory error. *plen contains its
332 |    length if plen != NULL.  The exponent letter is "e" for base 10,
333 |    "p" for bases 2, 8, 16 with a binary exponent and "@" for the other
334 |    bases. */
335 | 
336 | #define BF_FTOA_FORMAT_MASK (3 << 16)
337 | 
338 | /* fixed format: prec significant digits rounded with (flags &
339 |    BF_RND_MASK). Exponential notation is used if too many zeros are
340 |    needed.*/
341 | #define BF_FTOA_FORMAT_FIXED (0 << 16)
342 | /* fractional format: prec digits after the decimal point rounded with
343 |    (flags & BF_RND_MASK) */
344 | #define BF_FTOA_FORMAT_FRAC  (1 << 16)
345 | /* free format:
346 | 
347 |    For binary radices with bf_ftoa() and for bfdec_ftoa(): use the minimum
348 |    number of digits to represent 'a'. The precision and the rounding
349 |    mode are ignored.
350 | 
351 |    For the non binary radices with bf_ftoa(): use as many digits as
352 |    necessary so that bf_atof() return the same number when using
353 |    precision 'prec', rounding to nearest and the subnormal
354 |    configuration of 'flags'. The result is meaningful only if 'a' is
355 |    already rounded to 'prec' bits. If the subnormal flag is set, the
356 |    exponent in 'flags' must also be set to the desired exponent range.
357 | */
358 | #define BF_FTOA_FORMAT_FREE  (2 << 16)
359 | /* same as BF_FTOA_FORMAT_FREE but uses the minimum number of digits
360 |    (takes more computation time). Identical to BF_FTOA_FORMAT_FREE for
361 |    binary radices with bf_ftoa() and for bfdec_ftoa(). */
362 | #define BF_FTOA_FORMAT_FREE_MIN (3 << 16)
363 | 
364 | /* force exponential notation for fixed or free format */
365 | #define BF_FTOA_FORCE_EXP    (1 << 20)
366 | /* add 0x prefix for base 16, 0o prefix for base 8 or 0b prefix for
367 |    base 2 if non zero value */
368 | #define BF_FTOA_ADD_PREFIX   (1 << 21)
369 | /* return "Infinity" instead of "Inf" and add a "+" for positive
370 |    exponents */
371 | #define BF_FTOA_JS_QUIRKS    (1 << 22)
372 | 
373 | char *bf_ftoa(size_t *plen, const bf_t *a, int radix, limb_t prec,
374 |               bf_flags_t flags);
375 | 
376 | /* modulo 2^n instead of saturation. NaN and infinity return 0 */
377 | #define BF_GET_INT_MOD (1 << 0)
378 | int bf_get_int32(int *pres, const bf_t *a, int flags);
379 | int bf_get_int64(int64_t *pres, const bf_t *a, int flags);
380 | int bf_get_uint64(uint64_t *pres, const bf_t *a);
381 | 
382 | /* the following functions are exported for testing only. */
383 | void mp_print_str(const char *str, const limb_t *tab, limb_t n);
384 | void bf_print_str(const char *str, const bf_t *a);
385 | int bf_resize(bf_t *r, limb_t len);
386 | int bf_get_fft_size(int *pdpl, int *pnb_mods, limb_t len);
387 | int bf_normalize_and_round(bf_t *r, limb_t prec1, bf_flags_t flags);
388 | int bf_can_round(const bf_t *a, slimb_t prec, bf_rnd_t rnd_mode, slimb_t k);
389 | slimb_t bf_mul_log2_radix(slimb_t a1, unsigned int radix, int is_inv,
390 |                           int is_ceil1);
391 | int mp_mul(bf_context_t *s, limb_t *result,
392 |            const limb_t *op1, limb_t op1_size,
393 |            const limb_t *op2, limb_t op2_size);
394 | limb_t mp_add(limb_t *res, const limb_t *op1, const limb_t *op2,
395 |               limb_t n, limb_t carry);
396 | limb_t mp_add_ui(limb_t *tab, limb_t b, size_t n);
397 | int mp_sqrtrem(bf_context_t *s, limb_t *tabs, limb_t *taba, limb_t n);
398 | int mp_recip(bf_context_t *s, limb_t *tabr, const limb_t *taba, limb_t n);
399 | limb_t bf_isqrt(limb_t a);
400 | 
401 | /* transcendental functions */
402 | int bf_const_log2(bf_t *T, limb_t prec, bf_flags_t flags);
403 | int bf_const_pi(bf_t *T, limb_t prec, bf_flags_t flags);
404 | int bf_exp(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags);
405 | int bf_log(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags);
406 | #define BF_POW_JS_QUIRKS (1 << 16) /* (+/-1)^(+/-Inf) = NaN, 1^NaN = NaN */
407 | int bf_pow(bf_t *r, const bf_t *x, const bf_t *y, limb_t prec, bf_flags_t flags);
408 | int bf_cos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags);
409 | int bf_sin(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags);
410 | int bf_tan(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags);
411 | int bf_atan(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags);
412 | int bf_atan2(bf_t *r, const bf_t *y, const bf_t *x,
413 |              limb_t prec, bf_flags_t flags);
414 | int bf_asin(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags);
415 | int bf_acos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags);
416 | 
417 | /* decimal floating point */
418 | 
419 | static inline void bfdec_init(bf_context_t *s, bfdec_t *r)
420 | {
421 |     bf_init(s, (bf_t *)r);
422 | }
423 | static inline void bfdec_delete(bfdec_t *r)
424 | {
425 |     bf_delete((bf_t *)r);
426 | }
427 | 
428 | static inline void bfdec_neg(bfdec_t *r)
429 | {
430 |     r->sign ^= 1;
431 | }
432 | 
433 | static inline int bfdec_is_finite(const bfdec_t *a)
434 | {
435 |     return (a->expn < BF_EXP_INF);
436 | }
437 | 
438 | static inline int bfdec_is_nan(const bfdec_t *a)
439 | {
440 |     return (a->expn == BF_EXP_NAN);
441 | }
442 | 
443 | static inline int bfdec_is_zero(const bfdec_t *a)
444 | {
445 |     return (a->expn == BF_EXP_ZERO);
446 | }
447 | 
448 | static inline void bfdec_memcpy(bfdec_t *r, const bfdec_t *a)
449 | {
450 |     bf_memcpy((bf_t *)r, (const bf_t *)a);
451 | }
452 | 
453 | int bfdec_set_ui(bfdec_t *r, uint64_t a);
454 | int bfdec_set_si(bfdec_t *r, int64_t a);
455 | 
456 | static inline void bfdec_set_nan(bfdec_t *r)
457 | {
458 |     bf_set_nan((bf_t *)r);
459 | }
460 | static inline void bfdec_set_zero(bfdec_t *r, int is_neg)
461 | {
462 |     bf_set_zero((bf_t *)r, is_neg);
463 | }
464 | static inline void bfdec_set_inf(bfdec_t *r, int is_neg)
465 | {
466 |     bf_set_inf((bf_t *)r, is_neg);
467 | }
468 | static inline int bfdec_set(bfdec_t *r, const bfdec_t *a)
469 | {
470 |     return bf_set((bf_t *)r, (bf_t *)a);
471 | }
472 | static inline void bfdec_move(bfdec_t *r, bfdec_t *a)
473 | {
474 |     bf_move((bf_t *)r, (bf_t *)a);
475 | }
476 | static inline int bfdec_cmpu(const bfdec_t *a, const bfdec_t *b)
477 | {
478 |     return bf_cmpu((const bf_t *)a, (const bf_t *)b);
479 | }
480 | static inline int bfdec_cmp_full(const bfdec_t *a, const bfdec_t *b)
481 | {
482 |     return bf_cmp_full((const bf_t *)a, (const bf_t *)b);
483 | }
484 | static inline int bfdec_cmp(const bfdec_t *a, const bfdec_t *b)
485 | {
486 |     return bf_cmp((const bf_t *)a, (const bf_t *)b);
487 | }
488 | static inline int bfdec_cmp_eq(const bfdec_t *a, const bfdec_t *b)
489 | {
490 |     return bfdec_cmp(a, b) == 0;
491 | }
492 | static inline int bfdec_cmp_le(const bfdec_t *a, const bfdec_t *b)
493 | {
494 |     return bfdec_cmp(a, b) <= 0;
495 | }
496 | static inline int bfdec_cmp_lt(const bfdec_t *a, const bfdec_t *b)
497 | {
498 |     return bfdec_cmp(a, b) < 0;
499 | }
500 | 
501 | int bfdec_add(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec,
502 |               bf_flags_t flags);
503 | int bfdec_sub(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec,
504 |               bf_flags_t flags);
505 | int bfdec_add_si(bfdec_t *r, const bfdec_t *a, int64_t b1, limb_t prec,
506 |                  bf_flags_t flags);
507 | int bfdec_mul(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec,
508 |               bf_flags_t flags);
509 | int bfdec_mul_si(bfdec_t *r, const bfdec_t *a, int64_t b1, limb_t prec,
510 |                  bf_flags_t flags);
511 | int bfdec_div(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec,
512 |               bf_flags_t flags);
513 | int bfdec_divrem(bfdec_t *q, bfdec_t *r, const bfdec_t *a, const bfdec_t *b,
514 |                  limb_t prec, bf_flags_t flags, int rnd_mode);
515 | int bfdec_rem(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec,
516 |               bf_flags_t flags, int rnd_mode);
517 | int bfdec_rint(bfdec_t *r, int rnd_mode);
518 | int bfdec_sqrt(bfdec_t *r, const bfdec_t *a, limb_t prec, bf_flags_t flags);
519 | int bfdec_round(bfdec_t *r, limb_t prec, bf_flags_t flags);
520 | int bfdec_get_int32(int *pres, const bfdec_t *a);
521 | int bfdec_pow_ui(bfdec_t *r, const bfdec_t *a, limb_t b);
522 | 
523 | char *bfdec_ftoa(size_t *plen, const bfdec_t *a, limb_t prec, bf_flags_t flags);
524 | int bfdec_atof(bfdec_t *r, const char *str, const char **pnext,
525 |                limb_t prec, bf_flags_t flags);
526 | 
527 | /* the following functions are exported for testing only. */
528 | extern const limb_t mp_pow_dec[LIMB_DIGITS + 1];
529 | void bfdec_print_str(const char *str, const bfdec_t *a);
530 | static inline int bfdec_resize(bfdec_t *r, limb_t len)
531 | {
532 |     return bf_resize((bf_t *)r, len);
533 | }
534 | int bfdec_normalize_and_round(bfdec_t *r, limb_t prec1, bf_flags_t flags);
535 | 
536 | #endif /* LIBBF_H */
537 | 


--------------------------------------------------------------------------------
/pi_1e5.sha1sum:
--------------------------------------------------------------------------------
1 | ddec8e3cd091057af4de7dd147bff14860c802e8  pi_1e5.txt
2 | 


--------------------------------------------------------------------------------
/pi_1e6.sha1sum:
--------------------------------------------------------------------------------
1 | ec497f9b8b0aad5fe967d0916bff266972081f50  pi_1e6.txt
2 | 


--------------------------------------------------------------------------------
/pi_1e7.sha1sum:
--------------------------------------------------------------------------------
1 | 056fe739ad2e3b427691e4e62eef8936ce2a88e4  pi_1e7.txt
2 | 


--------------------------------------------------------------------------------
/pi_1e8.sha1sum:
--------------------------------------------------------------------------------
1 | 23456396be72fb9a390e5f707c7bff7a1c3697f8  pi_1e8.txt
2 | 


--------------------------------------------------------------------------------
/pi_1e9.sha1sum:
--------------------------------------------------------------------------------
1 | 8ef30374165e5e4a11552a0d896e9e961ea13c33  pi_1e9.txt
2 | 


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
  1 | Tiny Big Float library
  2 | ----------------------
  3 | 
  4 | Copyright (c) 2017-2020 Fabrice Bellard
  5 | 
  6 | LibBF is a small library to handle arbitrary precision binary or
  7 | decimal floating point numbers. Its compiled size is about 90 KB of
  8 | x86 code and has no dependency on other libraries. It is not the
  9 | fastest library nor the smallest but it tries to be simple while using
 10 | asymptotically optimal algorithms. The basic arithmetic operations
 11 | have a near linear running time.
 12 | 
 13 | The TinyPI example computes billions of digits of Pi using the
 14 | Chudnovsky formula.
 15 | 
 16 | 1) Features
 17 | -----------
 18 | 
 19 | - Arbitrary precision floating point numbers in base 2 using the IEEE
 20 |   754 semantics (including subnormal numbers, infinities and
 21 |   NaN).
 22 | - All operations are exactly rounded using the 5 IEEE 754 rounding
 23 |   modes (round to nearest with ties to even or away from zero, round
 24 |   to zero, -/+ infinity). The additional non-deterministic faithful
 25 |   rounding mode is supported when a lower or deterministic running
 26 |   time is necessary.
 27 | - Stateless API (each function takes as input the rounding mode,
 28 |   mantissa and exponent precisions in bits and return the IEEE status
 29 |   flags).
 30 | - The basic arithmetic operations (addition, subtraction,
 31 |   multiplication, division, square root) have a near linear running
 32 |   time.
 33 | - Multiplication using a SIMD optimized Number Theoretic Transform.
 34 | - Exactly rounded floating point input and output in any base between
 35 |   2 and 36 with near linear runnning time. Floating point output can
 36 |   select the smallest amount of digits to get the required precision.
 37 | - Transcendental functions are supported (exp, log, pow, sin, cos, tan,
 38 |   asin, acos, atan, atan2).
 39 | - Operations on arbitrarily large integers are supported by using a
 40 |   special "infinite" precision. Integer division with remainder and
 41 |   logical operations (assuming two complement binary representation)
 42 |   are implemented.
 43 | - Arbitrary precision floating point numbers in base 10 corresponding
 44 |   to the IEEE 754 2008 semantics with the limitation that the mantissa
 45 |   is always normalized. The basic arithmetic operations, output and
 46 |   input are supported with a quadratic running time.
 47 | - Easy to embed: a few C files need to be copied, the memory allocator
 48 |   can be redefined, the memory allocation failures are tested.
 49 | - MIT license.
 50 | 
 51 | 2) Compilation
 52 | --------------
 53 | 
 54 | Edit the top of the Makefile to select the build options. By default,
 55 | the MPFR library is used to compile the test tools (bftest and
 56 | bfbench) but it is not needed to build libbf. The included SoftFP code
 57 | (softfp* files) is only used by the bftest test tool.
 58 | 
 59 | TinyPI example: the "tinypi" executable uses the portable code. The
 60 | "tinypi-avx2" executable uses the AVX2 implementation. An x86 CPU of
 61 | at least the Intel Haswell generation is necessary for AVX2.
 62 | 
 63 | 3) Design principles
 64 | --------------------
 65 | 
 66 | - Base 2 and IEEE 754 semantics were chosen so that it is possible to
 67 |   get good performance and to compare the results with other libraries
 68 |   or hardware implementations. Moreover, base 2 arbitrary precision is
 69 |   easier to analyse and implement.
 70 | 
 71 | - The support of subnormal numbers and of a configurable number of
 72 |   bits for the exponent allows the exact emulation of IEEE 754
 73 |   floating hardware.
 74 | 
 75 | - The stateless API ensures that there is no global state to save and
 76 |   restore between operations. The rounding mode, subnormal flag and
 77 |   number of exponent bits are ored to a single "flags" parameter to
 78 |   limit the verbosity of the API. The number of exponent bits 'n' is
 79 |   specified as '(M-n)' where M is the maximum number of exponent bits
 80 |   so that '0' always indicates the maximum number of exponent bits.
 81 | 
 82 | - All the IEEE 754 status flags are returned by each operation. The
 83 |   user can easily or them when necessary.
 84 | 
 85 | - Unlike other libraries (such as MPFR [2]), the numbers have no
 86 |   attached precision. The general rule is that each operation is
 87 |   internally computed with infinite precision and then rounded with
 88 |   the precision and rounding mode specified for the operation.
 89 | 
 90 | - In many computations it is necessary to use arbitrarily large
 91 |   integers. LibBF support them without adding another number type by
 92 |   providing a special "infinite" precision. There is a small overhead
 93 |   of course because they are manipulated as floating point numbers but
 94 |   there is no cost to convert between floating point numbers and
 95 |   integers.
 96 | 
 97 | - The faithful rounding mode (i.e. the result is rounded to - or
 98 |   +infinity non deterministically) is supported for all operations. It
 99 |   usually gives a faster and deterministic running time. The
100 |   transcendental functions, inverse or inverse square root are
101 |   internally implemented to give a faithful rounding. When a
102 |   non-faithful rounding is requested by the user, the Ziv rounding
103 |   algorithm is invoked.
104 | 
105 | 4) Implementation notes
106 | -----------------------
107 | 
108 | - The code was tested on a 64 bit x86 CPU. It should be portable to
109 |   other CPUs. The portable version handles numbers with up to 4*10^16
110 |   digits. The AVX2 version handles numbers with up to 8*10^12 digits.
111 | 
112 | - 32 bits: the code compiles on 32 bit architectures but it is not
113 |   designed to be efficient nor scalable in this case. The size of the
114 |   numbers is limited to about 10 million digits.
115 | 
116 | - The Number Theoretic Transform is not the fastest algorithm for
117 |   small to medium numbers (i.e. a few million digits), but it gets
118 |   better when the size of the numbers grows. There is no round-off
119 |   errors as with Fast Fourier Transform, the memory usage is much
120 |   smaller and it is potentially easier to parallelize. This code
121 |   contains an original SIMD (AVX2 on x86) implementation using 64 bit
122 |   floating point numbers. It relies on the fact that the fused
123 |   multiply accumulate (FMA) operation gives access to the full
124 |   precision of the product of two 64 bit floating point numbers. The
125 |   portable code relies on the fact that the C compiler supports a
126 |   double word integer type (i.e. 128 bit integers on 64 bit). The
127 |   modulo operations were replaced with multiplications which are
128 |   usually faster.
129 | 
130 | - Base conversion: the algorithm is not the fastest one but it is
131 |   simple and still gives a near linear running time.
132 | 
133 | - This library reuses some ideas from TachusPI (
134 |   http://bellard.org/pi/pi2700e9/tpi.html ) . It is about 4 times
135 |   slower to compute Pi but is much smaller and simpler.
136 | 
137 | 5) Known limitations
138 | --------------------
139 | 
140 | - In some operations (such as the transcendental ones), there is no
141 |   rigourous proof of the rounding error. We expect to improve it by
142 |   reusing ideas from the MPFR algorithms. Some unlikely
143 |   overflow/underflow cases are also not handled in exp or pow.
144 | 
145 | - The transcendental operations are not speed optimized and do not use
146 |   an asymptotically optimal algorithm (the running time is in
147 |   O(n^(1/2)*M(n)) where M(n) is the time to multiply two n bit
148 |   numbers). A possible solution would be to implement a binary
149 |   splitting algorithm for exp and sin/cos (see [1]) and to use a
150 |   Newton based inversion to get log and atan.
151 | 
152 | - Memory allocation errors are not always correctly reported for the
153 |   transcendental operations.
154 | 
155 | 6) References
156 | -------------
157 | 
158 | [1] Modern Computer Arithmetic, Richard Brent and Paul Zimmermann,
159 | Cambridge University Press, 2010
160 | (https://members.loria.fr/PZimmermann/mca/pub226.html).
161 | 
162 | [2] The GNU MPFR Library (http://www.mpfr.org/)
163 | 


--------------------------------------------------------------------------------
/softfp.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SoftFP Library
 3 |  * 
 4 |  * Copyright (c) 2016 Fabrice Bellard
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in
14 |  * all copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 |  * THE SOFTWARE.
23 |  */
24 | #include <stdlib.h>
25 | #include <stdio.h>
26 | #include <assert.h>
27 | #include <string.h>
28 | 
29 | #include "cutils.h"
30 | #include "softfp.h"
31 | 
32 | static inline int clz_u32(uint32_t a)
33 | {
34 |     int r;
35 |     if (a == 0) {
36 |         r = 32;
37 |     } else {
38 |         r = __builtin_clz(a);
39 |     }
40 |     return r;
41 | }
42 | 
43 | static inline int clz_u64(uint64_t a)
44 | {
45 |     int r;
46 |     if (a == 0) {
47 |         r = 64;
48 |     } else 
49 |     {
50 |         r = __builtin_clzll(a);
51 |     }
52 |     return r;
53 | }
54 | 
55 | #ifdef HAVE_INT128
56 | static inline int clz_u128(uint128_t a)
57 | {
58 |     int r;
59 |     if (a == 0) {
60 |         r = 128;
61 |     } else 
62 |     {
63 |         uint64_t ah, al;
64 |         ah = a >> 64;
65 |         al = a;
66 |         if (ah != 0)
67 |             r = __builtin_clzll(ah);
68 |         else
69 |             r = __builtin_clzll(al) + 64;
70 |     }
71 |     return r;
72 | }
73 | #endif
74 | 
75 | #define F_SIZE 32
76 | #include "softfp_template.h"
77 | 
78 | #define F_SIZE 64
79 | #include "softfp_template.h"
80 | 
81 | #ifdef HAVE_INT128
82 | 
83 | #define F_SIZE 128
84 | #include "softfp_template.h"
85 | 
86 | #endif
87 | 
88 | 


--------------------------------------------------------------------------------
/softfp.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SoftFP Library
  3 |  * 
  4 |  * Copyright (c) 2016 Fabrice Bellard
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | #ifndef SOFTFP_H
 25 | #define SOFTFP_H
 26 | 
 27 | #include <inttypes.h>
 28 | #include "cutils.h"
 29 | 
 30 | typedef enum {
 31 |     RM_RNE, /* Round to Nearest, ties to Even */
 32 |     RM_RTZ, /* Round towards Zero */
 33 |     RM_RDN, /* Round Down */
 34 |     RM_RUP, /* Round Up */
 35 |     RM_RMM, /* Round to Nearest, ties to Max Magnitude */
 36 | } RoundingModeEnum;
 37 | 
 38 | #define FFLAG_INVALID_OP  (1 << 4)
 39 | #define FFLAG_DIVIDE_ZERO (1 << 3)
 40 | #define FFLAG_OVERFLOW    (1 << 2)
 41 | #define FFLAG_UNDERFLOW   (1 << 1)
 42 | #define FFLAG_INEXACT     (1 << 0)
 43 | 
 44 | #define FCLASS_NINF       (1 << 0)
 45 | #define FCLASS_NNORMAL    (1 << 1)
 46 | #define FCLASS_NSUBNORMAL (1 << 2)
 47 | #define FCLASS_NZERO      (1 << 3)
 48 | #define FCLASS_PZERO      (1 << 4)
 49 | #define FCLASS_PSUBNORMAL (1 << 5)
 50 | #define FCLASS_PNORMAL    (1 << 6)
 51 | #define FCLASS_PINF       (1 << 7)
 52 | #define FCLASS_SNAN       (1 << 8)
 53 | #define FCLASS_QNAN       (1 << 9)
 54 | 
 55 | typedef uint32_t sfloat32;
 56 | typedef uint64_t sfloat64;
 57 | #ifdef HAVE_INT128
 58 | typedef uint128_t sfloat128;
 59 | #endif
 60 | 
 61 | /* 32 bit floats */
 62 | 
 63 | #define FSIGN_MASK32 (1 << 31)
 64 | 
 65 | sfloat32 add_sf32(sfloat32 a, sfloat32 b, RoundingModeEnum rm, uint32_t *pfflags);
 66 | sfloat32 sub_sf32(sfloat32 a, sfloat32 b, RoundingModeEnum rm, uint32_t *pfflags);
 67 | sfloat32 mul_sf32(sfloat32 a, sfloat32 b, RoundingModeEnum rm, uint32_t *pfflags);
 68 | sfloat32 div_sf32(sfloat32 a, sfloat32 b, RoundingModeEnum rm, uint32_t *pfflags);
 69 | sfloat32 sqrt_sf32(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags);
 70 | sfloat32 fma_sf32(sfloat32 a, sfloat32 b, sfloat32 c, RoundingModeEnum rm, uint32_t *pfflags);
 71 | 
 72 | sfloat32 min_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags);
 73 | sfloat32 max_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags);
 74 | int eq_quiet_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags);
 75 | int le_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags);
 76 | int lt_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags);
 77 | uint32_t fclass_sf32(sfloat32 a);
 78 | 
 79 | sfloat64 cvt_sf32_sf64(sfloat32 a, uint32_t *pfflags);
 80 | sfloat32 cvt_sf64_sf32(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
 81 | int32_t cvt_sf32_i32(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags);
 82 | uint32_t cvt_sf32_u32(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags);
 83 | int64_t cvt_sf32_i64(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags);
 84 | uint64_t cvt_sf32_u64(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags);
 85 | #ifdef HAVE_INT128
 86 | int128_t cvt_sf32_i128(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags);
 87 | uint128_t cvt_sf32_u128(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags);
 88 | #endif
 89 | sfloat32 cvt_i32_sf32(int32_t a, RoundingModeEnum rm, uint32_t *pfflags);
 90 | sfloat32 cvt_u32_sf32(uint32_t a, RoundingModeEnum rm, uint32_t *pfflags);
 91 | sfloat32 cvt_i64_sf32(int64_t a, RoundingModeEnum rm, uint32_t *pfflags);
 92 | sfloat32 cvt_u64_sf32(uint64_t a, RoundingModeEnum rm, uint32_t *pfflags);
 93 | #ifdef HAVE_INT128
 94 | sfloat32 cvt_i128_sf32(int128_t a, RoundingModeEnum rm, uint32_t *pfflags);
 95 | sfloat32 cvt_u128_sf32(uint128_t a, RoundingModeEnum rm, uint32_t *pfflags);
 96 | #endif
 97 | 
 98 | /* 64 bit floats */
 99 | 
100 | #define FSIGN_MASK64 ((uint64_t)1 << 63)
101 | 
102 | sfloat64 add_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags);
103 | sfloat64 sub_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags);
104 | sfloat64 mul_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags);
105 | sfloat64 div_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags);
106 | sfloat64 sqrt_sf64(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
107 | sfloat64 fma_sf64(sfloat64 a, sfloat64 b, sfloat64 c, RoundingModeEnum rm, uint32_t *pfflags);
108 | 
109 | sfloat64 min_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags);
110 | sfloat64 max_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags);
111 | int eq_quiet_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags);
112 | int le_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags);
113 | int lt_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags);
114 | uint32_t fclass_sf64(sfloat64 a);
115 | 
116 | sfloat64 cvt_sf32_sf64(sfloat32 a, uint32_t *pfflags);
117 | sfloat32 cvt_sf64_sf32(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
118 | int32_t cvt_sf64_i32(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
119 | uint32_t cvt_sf64_u32(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
120 | int64_t cvt_sf64_i64(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
121 | uint64_t cvt_sf64_u64(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
122 | #ifdef HAVE_INT128
123 | int128_t cvt_sf64_i128(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
124 | uint128_t cvt_sf64_u128(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
125 | #endif
126 | sfloat64 cvt_i32_sf64(int32_t a, RoundingModeEnum rm, uint32_t *pfflags);
127 | sfloat64 cvt_u32_sf64(uint32_t a, RoundingModeEnum rm, uint32_t *pfflags);
128 | sfloat64 cvt_i64_sf64(int64_t a, RoundingModeEnum rm, uint32_t *pfflags);
129 | sfloat64 cvt_u64_sf64(uint64_t a, RoundingModeEnum rm, uint32_t *pfflags);
130 | #ifdef HAVE_INT128
131 | sfloat64 cvt_i128_sf64(int128_t a, RoundingModeEnum rm, uint32_t *pfflags);
132 | sfloat64 cvt_u128_sf64(uint128_t a, RoundingModeEnum rm, uint32_t *pfflags);
133 | #endif
134 | 
135 | /* 128 bit floats */
136 | 
137 | #ifdef HAVE_INT128
138 | 
139 | #define FSIGN_MASK128 ((uint128_t)1 << 127)
140 | 
141 | sfloat128 add_sf128(sfloat128 a, sfloat128 b, RoundingModeEnum rm, uint32_t *pfflags);
142 | sfloat128 sub_sf128(sfloat128 a, sfloat128 b, RoundingModeEnum rm, uint32_t *pfflags);
143 | sfloat128 mul_sf128(sfloat128 a, sfloat128 b, RoundingModeEnum rm, uint32_t *pfflags);
144 | sfloat128 div_sf128(sfloat128 a, sfloat128 b, RoundingModeEnum rm, uint32_t *pfflags);
145 | sfloat128 sqrt_sf128(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags);
146 | sfloat128 fma_sf128(sfloat128 a, sfloat128 b, sfloat128 c, RoundingModeEnum rm, uint32_t *pfflags);
147 | 
148 | sfloat128 min_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags);
149 | sfloat128 max_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags);
150 | int eq_quiet_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags);
151 | int le_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags);
152 | int lt_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags);
153 | uint32_t fclass_sf128(sfloat128 a);
154 | 
155 | sfloat128 cvt_sf32_sf128(sfloat32 a, uint32_t *pfflags);
156 | sfloat32 cvt_sf128_sf32(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags);
157 | sfloat128 cvt_sf64_sf128(sfloat64 a, uint32_t *pfflags);
158 | sfloat64 cvt_sf128_sf64(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags);
159 | 
160 | int32_t cvt_sf128_i32(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags);
161 | uint32_t cvt_sf128_u32(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags);
162 | int64_t cvt_sf128_i64(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags);
163 | uint64_t cvt_sf128_u64(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags);
164 | int128_t cvt_sf128_i128(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags);
165 | uint128_t cvt_sf128_u128(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags);
166 | sfloat128 cvt_i32_sf128(int32_t a, RoundingModeEnum rm, uint32_t *pfflags);
167 | sfloat128 cvt_u32_sf128(uint32_t a, RoundingModeEnum rm, uint32_t *pfflags);
168 | sfloat128 cvt_i64_sf128(int64_t a, RoundingModeEnum rm, uint32_t *pfflags);
169 | sfloat128 cvt_u64_sf128(uint64_t a, RoundingModeEnum rm, uint32_t *pfflags);
170 | sfloat128 cvt_i128_sf128(int128_t a, RoundingModeEnum rm, uint32_t *pfflags);
171 | sfloat128 cvt_u128_sf128(uint128_t a, RoundingModeEnum rm, uint32_t *pfflags);
172 | 
173 | #endif
174 | 
175 | #endif /* SOFTFP_H */
176 | 


--------------------------------------------------------------------------------
/softfp_template.h:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * SoftFP Library
   3 |  * 
   4 |  * Copyright (c) 2016 Fabrice Bellard
   5 |  *
   6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 |  * of this software and associated documentation files (the "Software"), to deal
   8 |  * in the Software without restriction, including without limitation the rights
   9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 |  * copies of the Software, and to permit persons to whom the Software is
  11 |  * furnished to do so, subject to the following conditions:
  12 |  *
  13 |  * The above copyright notice and this permission notice shall be included in
  14 |  * all copies or substantial portions of the Software.
  15 |  *
  16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 |  * THE SOFTWARE.
  23 |  */
  24 | #if F_SIZE == 32
  25 | #define F_UINT uint32_t
  26 | #define F_ULONG uint64_t
  27 | #define MANT_SIZE 23
  28 | #define EXP_SIZE 8
  29 | #elif F_SIZE == 64
  30 | #define F_UHALF uint32_t
  31 | #define F_UINT uint64_t
  32 | #ifdef HAVE_INT128
  33 | #define F_ULONG uint128_t
  34 | #endif
  35 | #define MANT_SIZE 52
  36 | #define EXP_SIZE 11
  37 | #elif F_SIZE == 128
  38 | #define F_UHALF uint64_t
  39 | #define F_UINT uint128_t
  40 | #define MANT_SIZE 112
  41 | #define EXP_SIZE 15
  42 | #else
  43 | #error unsupported F_SIZE
  44 | #endif
  45 | 
  46 | #define EXP_MASK ((1 << EXP_SIZE) - 1)
  47 | #define MANT_MASK (((F_UINT)1 << MANT_SIZE) - 1)
  48 | #define SIGN_MASK ((F_UINT)1 << (F_SIZE - 1))
  49 | #define IMANT_SIZE (F_SIZE - 2) /* internal mantissa size */
  50 | #define RND_SIZE (IMANT_SIZE - MANT_SIZE)
  51 | #define QNAN_MASK ((F_UINT)1 << (MANT_SIZE - 1))
  52 | 
  53 | /* quiet NaN */
  54 | #define F_QNAN glue(F_QNAN, F_SIZE)
  55 | #define clz glue(clz_u, F_SIZE)
  56 | #define pack_sf glue(pack_sf, F_SIZE)
  57 | #define unpack_sf glue(unpack_sf, F_SIZE)
  58 | #define rshift_rnd glue(rshift_rnd, F_SIZE)
  59 | #define round_pack_sf glue(roundpack_sf, F_SIZE)
  60 | #define normalize_sf glue(normalize_sf, F_SIZE)
  61 | #define normalize2_sf glue(normalize2_sf, F_SIZE)
  62 | #define issignan_sf glue(issignan_sf, F_SIZE)
  63 | #define isnan_sf glue(isnan_sf, F_SIZE)
  64 | #define add_sf glue(add_sf, F_SIZE)
  65 | #define mul_sf glue(mul_sf, F_SIZE)
  66 | #define fma_sf glue(fma_sf, F_SIZE)
  67 | #define div_sf glue(div_sf, F_SIZE)
  68 | #define sqrt_sf glue(sqrt_sf, F_SIZE)
  69 | #define normalize_subnormal_sf glue(normalize_subnormal_sf, F_SIZE)
  70 | #define divrem_u glue(divrem_u, F_SIZE)
  71 | #define sqrtrem_u glue(sqrtrem_u, F_SIZE)
  72 | #define mul_u glue(mul_u, F_SIZE)
  73 | #define cvt_sf32_sf glue(cvt_sf32_sf, F_SIZE)
  74 | #define cvt_sf64_sf glue(cvt_sf64_sf, F_SIZE)
  75 | 
  76 | static const F_UINT F_QNAN = (((F_UINT)EXP_MASK << MANT_SIZE) | ((F_UINT)1 << (MANT_SIZE - 1)));
  77 | 
  78 | static inline F_UINT pack_sf(uint32_t a_sign, uint32_t a_exp, F_UINT a_mant)
  79 | {
  80 |     return ((F_UINT)a_sign << (F_SIZE - 1)) |
  81 |         ((F_UINT)a_exp << MANT_SIZE) | 
  82 |         (a_mant & MANT_MASK);
  83 | }
  84 | 
  85 | static inline F_UINT unpack_sf(uint32_t *pa_sign, int32_t *pa_exp,
  86 |                                F_UINT a)
  87 | {
  88 |     *pa_sign = a >> (F_SIZE - 1);
  89 |     *pa_exp = (a >> MANT_SIZE) & EXP_MASK;
  90 |     return a & MANT_MASK;
  91 | } 
  92 | 
  93 | static F_UINT rshift_rnd(F_UINT a, int d)
  94 | {
  95 |     F_UINT mask;
  96 |     if (d != 0) {
  97 |         if (d >= F_SIZE) {
  98 |             a = (a != 0);
  99 |         } else {
 100 |             mask = ((F_UINT)1 << d) - 1;
 101 |             a = (a >> d) | ((a & mask) != 0);
 102 |         }
 103 |     }
 104 |     return a;
 105 | }
 106 | 
 107 | /* a_mant is considered to have its MSB at F_SIZE - 2 bits */
 108 | static F_UINT round_pack_sf(uint32_t a_sign, int a_exp, F_UINT a_mant,
 109 |                             RoundingModeEnum rm, uint32_t *pfflags)
 110 | {
 111 |     int diff;
 112 |     uint32_t addend, rnd_bits;
 113 | 
 114 |     switch(rm) {
 115 |     case RM_RNE:
 116 |     case RM_RMM:
 117 |         addend = (1 << (RND_SIZE - 1));
 118 |         break;
 119 |     case RM_RTZ:
 120 |         addend = 0;
 121 |         break;
 122 |     default:
 123 |     case RM_RDN:
 124 |     case RM_RUP:
 125 |         //        printf("s=%d rm=%d m=%x\n", a_sign, rm, a_mant);
 126 |         if (a_sign ^ (rm & 1))
 127 |             addend = (1 << RND_SIZE) - 1;
 128 |         else
 129 |             addend = 0;
 130 |         break;
 131 |     }
 132 | 
 133 |     /* potentially subnormal */
 134 |     if (a_exp <= 0) {
 135 |         BOOL is_subnormal;
 136 |         /* Note: we set the underflow flag if the rounded result
 137 |            is subnormal and inexact */
 138 |         is_subnormal = (a_exp < 0 || 
 139 |                         (a_mant + addend) < ((F_UINT)1 << (F_SIZE - 1)));
 140 |         diff = 1 - a_exp;
 141 |         a_mant = rshift_rnd(a_mant, diff);
 142 |         rnd_bits = a_mant & ((1 << RND_SIZE ) - 1);
 143 |         if (is_subnormal && rnd_bits != 0) {
 144 |             *pfflags |= FFLAG_UNDERFLOW;
 145 |         }
 146 |         a_exp = 1;
 147 |     } else {
 148 |         rnd_bits = a_mant & ((1 << RND_SIZE ) - 1);
 149 |     }
 150 |     if (rnd_bits != 0)
 151 |         *pfflags |= FFLAG_INEXACT;
 152 |     a_mant = (a_mant + addend) >> RND_SIZE;
 153 |     /* half way: select even result */
 154 |     if (rm == RM_RNE && rnd_bits == (1 << (RND_SIZE - 1)))
 155 |         a_mant &= ~1;
 156 |     /* Note the rounding adds at least 1, so this is the maximum
 157 |        value */
 158 |     a_exp += a_mant >> (MANT_SIZE + 1);
 159 |     if (a_mant <= MANT_MASK) {
 160 |         /* denormalized or zero */
 161 |         a_exp = 0;
 162 |     } else if (a_exp >= EXP_MASK) {
 163 |         /* overflow */
 164 |         if (addend == 0) {
 165 |             a_exp = EXP_MASK - 1;
 166 |             a_mant = MANT_MASK;
 167 |         } else {
 168 |             /* infinity */
 169 |             a_exp = EXP_MASK;
 170 |             a_mant = 0;
 171 |         }
 172 |         *pfflags |= FFLAG_OVERFLOW | FFLAG_INEXACT;
 173 |     }
 174 |     return pack_sf(a_sign, a_exp, a_mant);
 175 | }
 176 | 
 177 | /* a_mant is considered to have at most F_SIZE - 1 bits */
 178 | static F_UINT normalize_sf(uint32_t a_sign, int a_exp, F_UINT a_mant,
 179 |                            RoundingModeEnum rm, uint32_t *pfflags)
 180 | {
 181 |     int shift;
 182 |     shift = clz(a_mant) - (F_SIZE - 1 - IMANT_SIZE);
 183 |     assert(shift >= 0);
 184 |     a_exp -= shift;
 185 |     a_mant <<= shift;
 186 |     return round_pack_sf(a_sign, a_exp, a_mant, rm, pfflags);
 187 | }
 188 | 
 189 | /* same as normalize_sf() but with a double word mantissa. a_mant1 is
 190 |    considered to have at most F_SIZE - 1 bits */
 191 | static F_UINT normalize2_sf(uint32_t a_sign, int a_exp, F_UINT a_mant1, F_UINT a_mant0,
 192 |                             RoundingModeEnum rm, uint32_t *pfflags)
 193 | {
 194 |     int l, shift;
 195 |     if (a_mant1 == 0) {
 196 |         l = F_SIZE + clz(a_mant0);
 197 |     } else {
 198 |         l = clz(a_mant1);
 199 |     }
 200 |     shift = l - (F_SIZE - 1 - IMANT_SIZE);
 201 |     assert(shift >= 0);
 202 |     a_exp -= shift;
 203 |     if (shift == 0) {
 204 |         a_mant1 |= (a_mant0 != 0);
 205 |     } else if (shift < F_SIZE) {
 206 |         a_mant1 = (a_mant1 << shift) | (a_mant0 >> (F_SIZE - shift));
 207 |         a_mant0 <<= shift;
 208 |         a_mant1 |= (a_mant0 != 0);
 209 |     } else {
 210 |         a_mant1 = a_mant0 << (shift - F_SIZE);
 211 |     }
 212 |     return round_pack_sf(a_sign, a_exp, a_mant1, rm, pfflags);
 213 | }
 214 | 
 215 | BOOL issignan_sf(F_UINT a)
 216 | {
 217 |     uint32_t a_exp1;
 218 |     F_UINT a_mant;
 219 |     a_exp1 = (a >> (MANT_SIZE - 1)) & ((1 << (EXP_SIZE + 1)) - 1);
 220 |     a_mant = a & MANT_MASK;
 221 |     return (a_exp1 == (2 * EXP_MASK) && a_mant != 0);
 222 | }
 223 | 
 224 | BOOL isnan_sf(F_UINT a)
 225 | {
 226 |     uint32_t a_exp;
 227 |     F_UINT a_mant;
 228 |     a_exp = (a >> MANT_SIZE) & EXP_MASK;
 229 |     a_mant = a & MANT_MASK;
 230 |     return (a_exp == EXP_MASK && a_mant != 0);
 231 | }
 232 | 
 233 | 
 234 | F_UINT add_sf(F_UINT a, F_UINT b, RoundingModeEnum rm,
 235 |               uint32_t *pfflags)
 236 | {
 237 |     uint32_t a_sign, b_sign, a_exp, b_exp;
 238 |     F_UINT tmp, a_mant, b_mant;
 239 | 
 240 |     /* swap so that  abs(a) >= abs(b) */
 241 |     if ((a & ~SIGN_MASK) < (b & ~SIGN_MASK)) {
 242 |         tmp = a;
 243 |         a = b;
 244 |         b = tmp;
 245 |     }
 246 |     a_sign = a >> (F_SIZE - 1);
 247 |     b_sign = b >> (F_SIZE - 1);
 248 |     a_exp = (a >> MANT_SIZE) & EXP_MASK;
 249 |     b_exp = (b >> MANT_SIZE) & EXP_MASK;
 250 |     a_mant = (a & MANT_MASK) << 3;
 251 |     b_mant = (b & MANT_MASK) << 3;
 252 |     if (unlikely(a_exp == EXP_MASK)) {
 253 |         if (a_mant != 0) {
 254 |             /* NaN result */
 255 |             if (!(a_mant & (QNAN_MASK << 3)) || issignan_sf(b))
 256 |                 *pfflags |= FFLAG_INVALID_OP;
 257 |             return F_QNAN;
 258 |         } else if (b_exp == EXP_MASK && a_sign != b_sign) {
 259 |             *pfflags |= FFLAG_INVALID_OP;
 260 |             return F_QNAN;
 261 |         } else {
 262 |             /* infinity */
 263 |             return a;
 264 |         }
 265 |     }
 266 |     if (a_exp == 0) {
 267 |         a_exp = 1;
 268 |     } else {
 269 |         a_mant |= (F_UINT)1 << (MANT_SIZE + 3);
 270 |     }
 271 |     if (b_exp == 0) {
 272 |         b_exp = 1;
 273 |     } else {
 274 |         b_mant |= (F_UINT)1 << (MANT_SIZE + 3);
 275 |     }
 276 |     b_mant = rshift_rnd(b_mant, a_exp - b_exp);
 277 |     if (a_sign == b_sign) {
 278 |         /* same signs : add the absolute values  */
 279 |         a_mant += b_mant;
 280 |     } else {
 281 |         /* different signs : subtract the absolute values  */
 282 |         a_mant -= b_mant;
 283 |         if (a_mant == 0) {
 284 |             /* zero result : the sign needs a specific handling */
 285 |             a_sign = (rm == RM_RDN);
 286 |         }
 287 |     }
 288 |     a_exp += (RND_SIZE - 3);
 289 |     return normalize_sf(a_sign, a_exp, a_mant, rm, pfflags);
 290 | }
 291 | 
 292 | F_UINT glue(sub_sf, F_SIZE)(F_UINT a, F_UINT b, RoundingModeEnum rm,
 293 |                             uint32_t *pfflags)
 294 | {
 295 |     return add_sf(a, b ^ SIGN_MASK, rm, pfflags);
 296 | }
 297 | 
 298 | static inline F_UINT normalize_subnormal_sf(int32_t *pa_exp, F_UINT a_mant)
 299 | {
 300 |     int shift;
 301 |     shift = MANT_SIZE - ((F_SIZE - 1 - clz(a_mant)));
 302 |     *pa_exp = 1 - shift;
 303 |     return a_mant << shift;
 304 | }
 305 | 
 306 | #ifdef F_ULONG
 307 | 
 308 | static F_UINT mul_u(F_UINT *plow, F_UINT a, F_UINT b)
 309 | {
 310 |     F_ULONG r;
 311 |     r = (F_ULONG)a * (F_ULONG)b;
 312 |     *plow = r;
 313 |     return r >> F_SIZE;
 314 | }
 315 | 
 316 | #else
 317 | 
 318 | #define FH_SIZE (F_SIZE / 2)
 319 | 
 320 | static F_UINT mul_u(F_UINT *plow, F_UINT a, F_UINT b)
 321 | {
 322 |     F_UHALF a0, a1, b0, b1, r0, r1, r2, r3;
 323 |     F_UINT r00, r01, r10, r11, c;
 324 |     a0 = a;
 325 |     a1 = a >> FH_SIZE;
 326 |     b0 = b;
 327 |     b1 = b >> FH_SIZE;
 328 | 
 329 |     r00 = (F_UINT)a0 * (F_UINT)b0;
 330 |     r01 = (F_UINT)a0 * (F_UINT)b1;
 331 |     r10 = (F_UINT)a1 * (F_UINT)b0;
 332 |     r11 = (F_UINT)a1 * (F_UINT)b1;
 333 |     
 334 |     r0 = r00;
 335 |     c = (r00 >> FH_SIZE) + (F_UHALF)r01 + (F_UHALF)r10;
 336 |     r1 = c;
 337 |     c = (c >> FH_SIZE) + (r01 >> FH_SIZE) + (r10 >> FH_SIZE) + (F_UHALF)r11;
 338 |     r2 = c;
 339 |     r3 = (c >> FH_SIZE) + (r11 >> FH_SIZE);
 340 | 
 341 |     *plow = ((F_UINT)r1 << FH_SIZE) | r0;
 342 |     return ((F_UINT)r3 << FH_SIZE) | r2;
 343 | }
 344 | 
 345 | #undef FH_SIZE
 346 | 
 347 | #endif
 348 | 
 349 | F_UINT mul_sf(F_UINT a, F_UINT b, RoundingModeEnum rm,
 350 |               uint32_t *pfflags)
 351 | {
 352 |     uint32_t a_sign, b_sign, r_sign;
 353 |     int32_t a_exp, b_exp, r_exp;
 354 |     F_UINT a_mant, b_mant, r_mant, r_mant_low;
 355 | 
 356 |     a_sign = a >> (F_SIZE - 1);
 357 |     b_sign = b >> (F_SIZE - 1);
 358 |     r_sign = a_sign ^ b_sign;
 359 |     a_exp = (a >> MANT_SIZE) & EXP_MASK;
 360 |     b_exp = (b >> MANT_SIZE) & EXP_MASK;
 361 |     a_mant = a & MANT_MASK;
 362 |     b_mant = b & MANT_MASK;
 363 |     if (a_exp == EXP_MASK || b_exp == EXP_MASK) {
 364 |         if (isnan_sf(a) || isnan_sf(b)) {
 365 |             if (issignan_sf(a) || issignan_sf(b)) {
 366 |                 *pfflags |= FFLAG_INVALID_OP;
 367 |             }
 368 |             return F_QNAN;
 369 |         } else {
 370 |             /* infinity */
 371 |             if ((a_exp == EXP_MASK && (b_exp == 0 && b_mant == 0)) ||
 372 |                 (b_exp == EXP_MASK && (a_exp == 0 && a_mant == 0))) {
 373 |                 *pfflags |= FFLAG_INVALID_OP;
 374 |                 return F_QNAN;
 375 |             } else {
 376 |                 return pack_sf(r_sign, EXP_MASK, 0);
 377 |             }
 378 |         }
 379 |     }
 380 |     if (a_exp == 0) {
 381 |         if (a_mant == 0)
 382 |             return pack_sf(r_sign, 0, 0); /* zero */
 383 |         a_mant = normalize_subnormal_sf(&a_exp, a_mant);
 384 |     } else {
 385 |         a_mant |= (F_UINT)1 << MANT_SIZE;
 386 |     }
 387 |     if (b_exp == 0) {
 388 |         if (b_mant == 0)
 389 |             return pack_sf(r_sign, 0, 0); /* zero */
 390 |         b_mant = normalize_subnormal_sf(&b_exp, b_mant);
 391 |     } else {
 392 |         b_mant |= (F_UINT)1 << MANT_SIZE;
 393 |     }
 394 |     r_exp = a_exp + b_exp - (1 << (EXP_SIZE - 1)) + 2;
 395 |     
 396 |     r_mant = mul_u(&r_mant_low,a_mant << RND_SIZE, b_mant << (RND_SIZE + 1));
 397 |     r_mant |= (r_mant_low != 0);
 398 |     return normalize_sf(r_sign, r_exp, r_mant, rm, pfflags);
 399 | }
 400 | 
 401 | /* fused multiply and add */
 402 | F_UINT fma_sf(F_UINT a, F_UINT b, F_UINT c, RoundingModeEnum rm,
 403 |               uint32_t *pfflags)
 404 | {
 405 |     uint32_t a_sign, b_sign, c_sign, r_sign;
 406 |     int32_t a_exp, b_exp, c_exp, r_exp, shift;
 407 |     F_UINT a_mant, b_mant, c_mant, r_mant1, r_mant0, c_mant1, c_mant0, mask;
 408 | 
 409 |     a_sign = a >> (F_SIZE - 1);
 410 |     b_sign = b >> (F_SIZE - 1);
 411 |     c_sign = c >> (F_SIZE - 1);
 412 |     r_sign = a_sign ^ b_sign;
 413 |     a_exp = (a >> MANT_SIZE) & EXP_MASK;
 414 |     b_exp = (b >> MANT_SIZE) & EXP_MASK;
 415 |     c_exp = (c >> MANT_SIZE) & EXP_MASK;
 416 |     a_mant = a & MANT_MASK;
 417 |     b_mant = b & MANT_MASK;
 418 |     c_mant = c & MANT_MASK;
 419 |     if (a_exp == EXP_MASK || b_exp == EXP_MASK || c_exp == EXP_MASK) {
 420 |         if (isnan_sf(a) || isnan_sf(b) || isnan_sf(c)) {
 421 |             if (issignan_sf(a) || issignan_sf(b) || issignan_sf(c)) {
 422 |                 *pfflags |= FFLAG_INVALID_OP;
 423 |             }
 424 |             return F_QNAN;
 425 |         } else {
 426 |             /* infinities */
 427 |             if ((a_exp == EXP_MASK && (b_exp == 0 && b_mant == 0)) ||
 428 |                 (b_exp == EXP_MASK && (a_exp == 0 && a_mant == 0)) ||
 429 |                 ((a_exp == EXP_MASK || b_exp == EXP_MASK) &&
 430 |                  (c_exp == EXP_MASK && r_sign != c_sign))) {
 431 |                 *pfflags |= FFLAG_INVALID_OP;
 432 |                 return F_QNAN;
 433 |             } else if (c_exp == EXP_MASK) {
 434 |                 return pack_sf(c_sign, EXP_MASK, 0);
 435 |             } else {
 436 |                 return pack_sf(r_sign, EXP_MASK, 0);
 437 |             }
 438 |         }
 439 |     }
 440 |     if (a_exp == 0) {
 441 |         if (a_mant == 0)
 442 |             goto mul_zero;
 443 |         a_mant = normalize_subnormal_sf(&a_exp, a_mant);
 444 |     } else {
 445 |         a_mant |= (F_UINT)1 << MANT_SIZE;
 446 |     }
 447 |     if (b_exp == 0) {
 448 |         if (b_mant == 0) {
 449 |         mul_zero:
 450 |             if (c_exp == 0 && c_mant == 0) {
 451 |                 if (c_sign != r_sign)
 452 |                     r_sign = (rm == RM_RDN);
 453 |                 return pack_sf(r_sign, 0, 0);
 454 |             } else {
 455 |                 return c;
 456 |             }
 457 |         }
 458 |         b_mant = normalize_subnormal_sf(&b_exp, b_mant);
 459 |     } else {
 460 |         b_mant |= (F_UINT)1 << MANT_SIZE;
 461 |     }
 462 |     /* multiply */
 463 |     r_exp = a_exp + b_exp - (1 << (EXP_SIZE - 1)) + 3;
 464 |     
 465 |     r_mant1 = mul_u(&r_mant0, a_mant << RND_SIZE, b_mant << RND_SIZE);
 466 |     /* normalize to F_SIZE - 3 */
 467 |     if (r_mant1 < ((F_UINT)1 << (F_SIZE - 3))) {
 468 |         r_mant1 = (r_mant1 << 1) | (r_mant0 >> (F_SIZE - 1));
 469 |         r_mant0 <<= 1;
 470 |         r_exp--;
 471 |     }
 472 | 
 473 |     /* add */
 474 |     if (c_exp == 0) {
 475 |         if (c_mant == 0) {
 476 |             /* add zero */
 477 |             r_mant1 |= (r_mant0 != 0);
 478 |             return normalize_sf(r_sign, r_exp, r_mant1, rm, pfflags);
 479 |         }
 480 |         c_mant = normalize_subnormal_sf(&c_exp, c_mant);
 481 |     } else {
 482 |         c_mant |= (F_UINT)1 << MANT_SIZE;
 483 |     }
 484 |     c_exp++;
 485 |     c_mant1 = c_mant << (RND_SIZE - 1);
 486 |     c_mant0 = 0;
 487 | 
 488 |     //    printf("r_s=%d r_exp=%d r_mant=%08x %08x\n", r_sign, r_exp, (uint32_t)r_mant1, (uint32_t)r_mant0);
 489 |     //    printf("c_s=%d c_exp=%d c_mant=%08x %08x\n", c_sign, c_exp, (uint32_t)c_mant1, (uint32_t)c_mant0);
 490 | 
 491 |     /* ensure that abs(r) >= abs(c) */
 492 |     if (!(r_exp > c_exp || (r_exp == c_exp && r_mant1 >= c_mant1))) {
 493 |         F_UINT tmp;
 494 |         int32_t c_tmp;
 495 |         /* swap */
 496 |         tmp = r_mant1; r_mant1 = c_mant1; c_mant1 = tmp;
 497 |         tmp = r_mant0; r_mant0 = c_mant0; c_mant0 = tmp;
 498 |         c_tmp = r_exp; r_exp = c_exp; c_exp = c_tmp;
 499 |         c_tmp = r_sign; r_sign = c_sign; c_sign = c_tmp;
 500 |     }
 501 |     /* right shift c_mant */
 502 |     shift = r_exp - c_exp;
 503 |     if (shift >= 2 * F_SIZE) {
 504 |         c_mant0 = (c_mant0 | c_mant1) != 0;
 505 |         c_mant1 = 0;
 506 |     } else if (shift >= F_SIZE + 1) {
 507 |         c_mant0 = rshift_rnd(c_mant1, shift - F_SIZE);
 508 |         c_mant1 = 0;
 509 |     } else if (shift == F_SIZE) {
 510 |         c_mant0 = c_mant1 | (c_mant0 != 0);
 511 |         c_mant1 = 0;
 512 |     } else if (shift != 0) {
 513 |         mask = ((F_UINT)1 << shift) - 1;
 514 |         c_mant0 = (c_mant1 << (F_SIZE - shift)) | (c_mant0 >> shift) | ((c_mant0 & mask) != 0);
 515 |         c_mant1 = c_mant1 >> shift;
 516 |     }
 517 |     //    printf("  r_mant=%08x %08x\n", (uint32_t)r_mant1, (uint32_t)r_mant0);
 518 |     //    printf("  c_mant=%08x %08x\n", (uint32_t)c_mant1, (uint32_t)c_mant0);
 519 |     /* add or subtract */
 520 |     if (r_sign == c_sign) {
 521 |         r_mant0 += c_mant0;
 522 |         r_mant1 += c_mant1 + (r_mant0 < c_mant0);
 523 |     } else {
 524 |         F_UINT tmp;
 525 |         tmp = r_mant0;
 526 |         r_mant0 -= c_mant0;
 527 |         r_mant1 = r_mant1 - c_mant1 - (r_mant0 > tmp);
 528 |         if ((r_mant0 | r_mant1) == 0) {
 529 |             /* zero result : the sign needs a specific handling */
 530 |             r_sign = (rm == RM_RDN);
 531 |         }
 532 |     }
 533 | #if 0
 534 |     //    printf("  r1_mant=%08x %08x\n", (uint32_t)r_mant1, (uint32_t)r_mant0);
 535 |     /* normalize */
 536 |     if (r_mant1 == 0) {
 537 |         r_mant1 = r_mant0;
 538 |         r_exp -= F_SIZE;
 539 |     } else {
 540 |         shift = clz(r_mant1) - (F_SIZE - 1 - IMANT_SIZE);
 541 |         if (shift != 0) {
 542 |             r_mant1 = (r_mant1 << shift) | (r_mant0 >> (F_SIZE - shift));
 543 |             r_mant0 <<= shift;
 544 |             r_exp -= shift;
 545 |         }
 546 |         r_mant1 |= (r_mant0 != 0);
 547 |     }
 548 |     return normalize_sf(r_sign, r_exp, r_mant1, rm, pfflags);
 549 | #else
 550 |     return normalize2_sf(r_sign, r_exp, r_mant1, r_mant0, rm, pfflags);
 551 | #endif
 552 | }
 553 | 
 554 | #ifdef F_ULONG
 555 | 
 556 | static F_UINT divrem_u(F_UINT *pr, F_UINT ah, F_UINT al, F_UINT b)
 557 | {
 558 |     F_ULONG a;
 559 |     a = ((F_ULONG)ah << F_SIZE) | al;
 560 |     *pr = a % b;
 561 |     return a / b;
 562 | }
 563 | 
 564 | #else
 565 | 
 566 | /* XXX: optimize */
 567 | static F_UINT divrem_u(F_UINT *pr, F_UINT a1, F_UINT a0, F_UINT b)
 568 | {
 569 |     int i, qb, ab;
 570 | 
 571 |     assert(a1 < b);
 572 |     for(i = 0; i < F_SIZE; i++) {
 573 |         ab = a1 >> (F_SIZE - 1);
 574 |         a1 = (a1 << 1) | (a0 >> (F_SIZE - 1));
 575 |         if (ab || a1 >= b) {
 576 |             a1 -= b;
 577 |             qb = 1;
 578 |         } else {
 579 |             qb = 0;
 580 |         }
 581 |         a0 = (a0 << 1) | qb;
 582 |     }
 583 |     *pr = a1;
 584 |     return a0;
 585 | }
 586 | 
 587 | #endif
 588 | 
 589 | F_UINT div_sf(F_UINT a, F_UINT b, RoundingModeEnum rm,
 590 |               uint32_t *pfflags)
 591 | {
 592 |     uint32_t a_sign, b_sign, r_sign;
 593 |     int32_t a_exp, b_exp, r_exp;
 594 |     F_UINT a_mant, b_mant, r_mant, r;
 595 | 
 596 |     a_sign = a >> (F_SIZE - 1);
 597 |     b_sign = b >> (F_SIZE - 1);
 598 |     r_sign = a_sign ^ b_sign;
 599 |     a_exp = (a >> MANT_SIZE) & EXP_MASK;
 600 |     b_exp = (b >> MANT_SIZE) & EXP_MASK;
 601 |     a_mant = a & MANT_MASK;
 602 |     b_mant = b & MANT_MASK;
 603 |     if (a_exp == EXP_MASK) {
 604 |         if (a_mant != 0 || isnan_sf(b)) {
 605 |             if (issignan_sf(a) || issignan_sf(b)) {
 606 |                 *pfflags |= FFLAG_INVALID_OP;
 607 |             }
 608 |             return F_QNAN;
 609 |         } else if (b_exp == EXP_MASK) {
 610 |             *pfflags |= FFLAG_INVALID_OP;
 611 |             return F_QNAN;
 612 |         } else {
 613 |             return pack_sf(r_sign, EXP_MASK, 0);
 614 |         }
 615 |     } else if (b_exp == EXP_MASK) {
 616 |         if (b_mant != 0) {
 617 |             if (issignan_sf(a) || issignan_sf(b)) {
 618 |                 *pfflags |= FFLAG_INVALID_OP;
 619 |             }
 620 |             return F_QNAN;
 621 |         } else {
 622 |             return pack_sf(r_sign, 0, 0);
 623 |         }
 624 |     }
 625 | 
 626 |     if (b_exp == 0) {
 627 |         if (b_mant == 0) { 
 628 |             /* zero */
 629 |             if (a_exp == 0 && a_mant == 0) {
 630 |                 *pfflags |= FFLAG_INVALID_OP;
 631 |                 return F_QNAN;
 632 |             } else {
 633 |                 *pfflags |= FFLAG_DIVIDE_ZERO;
 634 |                 return pack_sf(r_sign, EXP_MASK, 0);
 635 |             }
 636 |         }
 637 |         b_mant = normalize_subnormal_sf(&b_exp, b_mant);
 638 |     } else {
 639 |         b_mant |= (F_UINT)1 << MANT_SIZE;
 640 |     }
 641 |     if (a_exp == 0) {
 642 |         if (a_mant == 0)
 643 |             return pack_sf(r_sign, 0, 0); /* zero */
 644 |         a_mant = normalize_subnormal_sf(&a_exp, a_mant);
 645 |     } else {
 646 |         a_mant |= (F_UINT)1 << MANT_SIZE;
 647 |     }
 648 |     r_exp = a_exp - b_exp + (1 << (EXP_SIZE - 1)) - 1;
 649 |     r_mant = divrem_u(&r, a_mant, 0, b_mant << 2);
 650 |     if (r != 0)
 651 |         r_mant |= 1;
 652 |     return normalize_sf(r_sign, r_exp, r_mant, rm, pfflags);
 653 | }
 654 | 
 655 | #ifdef F_ULONG
 656 | 
 657 | /* compute sqrt(a) with a = ah*2^F_SIZE+al and a < 2^(F_SIZE - 2)
 658 |    return true if not exact square. */
 659 | static int sqrtrem_u(F_UINT *pr, F_UINT ah, F_UINT al)
 660 | {
 661 |     F_ULONG a, u, s;
 662 |     int l, inexact;
 663 | 
 664 |     /* 2^l >= a */
 665 |     if (ah != 0) {
 666 |         l = 2 * F_SIZE - clz(ah - 1);
 667 |     } else {
 668 |         if (al == 0) {
 669 |             *pr = 0;
 670 |             return 0;
 671 |         }
 672 |         l = F_SIZE - clz(al - 1);
 673 |     }
 674 |     a = ((F_ULONG)ah << F_SIZE) | al;
 675 |     u = (F_ULONG)1 << ((l + 1) / 2);
 676 |     for(;;) {
 677 |         s = u;
 678 |         u = ((a / s) + s) / 2;
 679 |         if (u >= s)
 680 |             break;
 681 |     }
 682 |     inexact = (a - s * s) != 0;
 683 |     *pr = s;
 684 |     return inexact;
 685 | }
 686 | 
 687 | #else
 688 | 
 689 | static int sqrtrem_u(F_UINT *pr, F_UINT a1, F_UINT a0)
 690 | {
 691 |     int l, inexact;
 692 |     F_UINT u, s, r, q, sq0, sq1;
 693 | 
 694 |     /* 2^l >= a */
 695 |     if (a1 != 0) {
 696 |         l = 2 * F_SIZE - clz(a1 - 1);
 697 |     } else {
 698 |         if (a0 == 0) {
 699 |             *pr = 0;
 700 |             return 0;
 701 |         }
 702 |         l = F_SIZE - clz(a0 - 1);
 703 |     }
 704 |     u = (F_UINT)1 << ((l + 1) / 2);
 705 |     for(;;) {
 706 |         s = u;
 707 |         q = divrem_u(&r, a1, a0, s);
 708 |         u = (q + s) / 2;
 709 |         if (u >= s)
 710 |             break;
 711 |     }
 712 |     sq1 = mul_u(&sq0, s, s);
 713 |     inexact = (sq0 != a0 || sq1 != a1);
 714 |     *pr = s;
 715 |     return inexact;
 716 | }
 717 | 
 718 | #endif
 719 | 
 720 | F_UINT sqrt_sf(F_UINT a, RoundingModeEnum rm,
 721 |                uint32_t *pfflags)
 722 | {
 723 |     uint32_t a_sign;
 724 |     int32_t a_exp;
 725 |     F_UINT a_mant;
 726 | 
 727 |     a_sign = a >> (F_SIZE - 1);
 728 |     a_exp = (a >> MANT_SIZE) & EXP_MASK;
 729 |     a_mant = a & MANT_MASK;
 730 |     if (a_exp == EXP_MASK) {
 731 |         if (a_mant != 0) {
 732 |             if (issignan_sf(a)) {
 733 |                 *pfflags |= FFLAG_INVALID_OP;
 734 |             }
 735 |             return F_QNAN;
 736 |         } else if (a_sign) {
 737 |             goto neg_error;
 738 |         } else {
 739 |             return a; /* +infinity */
 740 |         }
 741 |     }
 742 |     if (a_sign) {
 743 |         if (a_exp == 0 && a_mant == 0)
 744 |             return a; /* -zero */
 745 |     neg_error:
 746 |         *pfflags |= FFLAG_INVALID_OP;
 747 |         return F_QNAN;
 748 |     }
 749 |     if (a_exp == 0) {
 750 |         if (a_mant == 0)
 751 |             return pack_sf(0, 0, 0); /* zero */
 752 |         a_mant = normalize_subnormal_sf(&a_exp, a_mant);
 753 |     } else {
 754 |         a_mant |= (F_UINT)1 << MANT_SIZE;
 755 |     }
 756 |     a_exp -= EXP_MASK / 2;
 757 |     /* simpler to handle an even exponent */
 758 |     if (a_exp & 1) {
 759 |         a_exp--;
 760 |         a_mant <<= 1;
 761 |     }
 762 |     a_exp = (a_exp >> 1) + EXP_MASK / 2;
 763 |     a_mant <<= (F_SIZE - 4 - MANT_SIZE);
 764 |     if (sqrtrem_u(&a_mant, a_mant, 0))
 765 |         a_mant |= 1;
 766 |     return normalize_sf(a_sign, a_exp, a_mant, rm, pfflags);
 767 | }
 768 | 
 769 | /* comparisons */
 770 | 
 771 | F_UINT glue(min_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags)
 772 | {
 773 |     uint32_t a_sign, b_sign;
 774 | 
 775 |     if (isnan_sf(a) || isnan_sf(b)) {
 776 |         if (issignan_sf(a) || issignan_sf(b)) {
 777 |             *pfflags |= FFLAG_INVALID_OP;
 778 |             return F_QNAN;
 779 |         } else if (isnan_sf(a)) {
 780 |             if (isnan_sf(b)) 
 781 |                 return F_QNAN;
 782 |             else
 783 |                 return b;
 784 |         } else {
 785 |             return a;
 786 |         }
 787 |     }
 788 |     a_sign = a >> (F_SIZE - 1);
 789 |     b_sign = b >> (F_SIZE - 1);
 790 | 
 791 |     if (a_sign != b_sign) {
 792 |         if (a_sign)
 793 |             return a;
 794 |         else
 795 |             return b;
 796 |     } else {
 797 |         if ((a < b) ^ a_sign)
 798 |             return a;
 799 |         else
 800 |             return b;
 801 |     }
 802 | }
 803 | 
 804 | F_UINT glue(max_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags)
 805 | {
 806 |     uint32_t a_sign, b_sign;
 807 | 
 808 |     if (isnan_sf(a) || isnan_sf(b)) {
 809 |         if (issignan_sf(a) || issignan_sf(b)) {
 810 |             *pfflags |= FFLAG_INVALID_OP;
 811 |             return F_QNAN;
 812 |         } else if (isnan_sf(a)) {
 813 |             if (isnan_sf(b)) 
 814 |                 return F_QNAN;
 815 |             else
 816 |                 return b;
 817 |         } else {
 818 |             return a;
 819 |         }
 820 |     }
 821 |     a_sign = a >> (F_SIZE - 1);
 822 |     b_sign = b >> (F_SIZE - 1);
 823 | 
 824 |     if (a_sign != b_sign) {
 825 |         if (a_sign)
 826 |             return b;
 827 |         else
 828 |             return a;
 829 |     } else {
 830 |         if ((a < b) ^ a_sign)
 831 |             return b;
 832 |         else
 833 |             return a;
 834 |     }
 835 | }
 836 | 
 837 | int glue(eq_quiet_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags)
 838 | {
 839 |     if (isnan_sf(a) || isnan_sf(b)) {
 840 |         if (issignan_sf(a) || issignan_sf(b)) {
 841 |             *pfflags |= FFLAG_INVALID_OP;
 842 |         }
 843 |         return 0;
 844 |     }
 845 | 
 846 |     if ((F_UINT)((a | b) << 1) == 0)
 847 |         return 1; /* zero case */
 848 |     return (a == b);
 849 | }
 850 | 
 851 | int glue(le_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags)
 852 | {
 853 |     uint32_t a_sign, b_sign;
 854 | 
 855 |     if (isnan_sf(a) || isnan_sf(b)) {
 856 |         *pfflags |= FFLAG_INVALID_OP;
 857 |         return 0;
 858 |     }
 859 | 
 860 |     a_sign = a >> (F_SIZE - 1);
 861 |     b_sign = b >> (F_SIZE - 1);
 862 |     if (a_sign != b_sign) {
 863 |         return (a_sign || ((F_UINT)((a | b) << 1) == 0));
 864 |     } else {
 865 |         if (a_sign) {
 866 |             return (a >= b);
 867 |         } else {
 868 |             return (a <= b);
 869 |         }
 870 |     }
 871 | }
 872 | 
 873 | int glue(lt_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags)
 874 | {
 875 |     uint32_t a_sign, b_sign;
 876 | 
 877 |     if (isnan_sf(a) || isnan_sf(b)) {
 878 |         *pfflags |= FFLAG_INVALID_OP;
 879 |         return 0;
 880 |     }
 881 | 
 882 |     a_sign = a >> (F_SIZE - 1);
 883 |     b_sign = b >> (F_SIZE - 1);
 884 |     if (a_sign != b_sign) {
 885 |         return (a_sign && ((F_UINT)((a | b) << 1) != 0));
 886 |     } else {
 887 |         if (a_sign) {
 888 |             return (a > b);
 889 |         } else {
 890 |             return (a < b);
 891 |         }
 892 |     }
 893 | }
 894 | 
 895 | uint32_t glue(fclass_sf, F_SIZE)(F_UINT a)
 896 | {
 897 |     uint32_t a_sign;
 898 |     int32_t a_exp;
 899 |     F_UINT a_mant;
 900 |     uint32_t ret;
 901 | 
 902 |     a_sign = a >> (F_SIZE - 1);
 903 |     a_exp = (a >> MANT_SIZE) & EXP_MASK;
 904 |     a_mant = a & MANT_MASK;
 905 |     if (a_exp == EXP_MASK) {
 906 |         if (a_mant != 0) {
 907 |             if (a_mant & QNAN_MASK)
 908 |                 ret = FCLASS_QNAN;
 909 |             else
 910 |                 ret = FCLASS_SNAN;
 911 |         } else {
 912 |             if (a_sign)
 913 |                 ret = FCLASS_NINF;
 914 |             else
 915 |                 ret = FCLASS_PINF;
 916 |         }
 917 |     } else if (a_exp == 0) {
 918 |         if (a_mant == 0) {
 919 |             if (a_sign)
 920 |                 ret = FCLASS_NZERO;
 921 |             else
 922 |                 ret = FCLASS_PZERO;
 923 |         } else {
 924 |             if (a_sign)
 925 |                 ret = FCLASS_NSUBNORMAL;
 926 |             else
 927 |                 ret = FCLASS_PSUBNORMAL;
 928 |         }
 929 |     } else {
 930 |         if (a_sign)
 931 |             ret = FCLASS_NNORMAL;
 932 |         else
 933 |             ret = FCLASS_PNORMAL;
 934 |     }
 935 |     return ret;
 936 | }
 937 | 
 938 | /* conversions between floats */
 939 | 
 940 | #if F_SIZE >= 64
 941 | 
 942 | F_UINT cvt_sf32_sf(uint32_t a, uint32_t *pfflags)
 943 | {
 944 |     uint32_t a_sign;
 945 |     int32_t a_exp;
 946 |     F_UINT a_mant;
 947 | 
 948 |     a_mant = unpack_sf32(&a_sign, &a_exp, a);
 949 |     if (a_exp == 0xff) {
 950 |         if (a_mant != 0) {
 951 |             /* NaN */
 952 |             if (issignan_sf32(a)) {
 953 |                 *pfflags |= FFLAG_INVALID_OP;
 954 |             }
 955 |             return F_QNAN;
 956 |         } else {
 957 |             /* infinity */
 958 |             return pack_sf(a_sign, EXP_MASK, 0);
 959 |         }
 960 |     }
 961 |     if (a_exp == 0) {
 962 |         if (a_mant == 0)
 963 |             return pack_sf(a_sign, 0, 0); /* zero */
 964 |         a_mant = normalize_subnormal_sf32(&a_exp, a_mant);
 965 |     }
 966 |     /* convert the exponent value */
 967 |     a_exp = a_exp - 0x7f + (EXP_MASK / 2);
 968 |     /* shift the mantissa */
 969 |     a_mant <<= (MANT_SIZE - 23);
 970 |     /* We assume the target float is large enough to that no
 971 |        normalization is necessary */
 972 |     return pack_sf(a_sign, a_exp, a_mant);
 973 | }
 974 | 
 975 | uint32_t glue(glue(cvt_sf, F_SIZE), _sf32)(F_UINT a, RoundingModeEnum rm,
 976 |                                            uint32_t *pfflags)
 977 | {
 978 |     uint32_t a_sign;
 979 |     int32_t a_exp;
 980 |     F_UINT a_mant;
 981 | 
 982 |     a_mant = unpack_sf(&a_sign, &a_exp, a);
 983 |     if (a_exp == EXP_MASK) {
 984 |         if (a_mant != 0) {
 985 |             /* NaN */
 986 |             if (issignan_sf(a)) {
 987 |                 *pfflags |= FFLAG_INVALID_OP;
 988 |             }
 989 |             return F_QNAN32;
 990 |         } else {
 991 |             /* infinity */
 992 |             return pack_sf32(a_sign, 0xff, 0);
 993 |         }
 994 |     }
 995 |     if (a_exp == 0) {
 996 |         if (a_mant == 0)
 997 |             return pack_sf32(a_sign, 0, 0); /* zero */
 998 |         normalize_subnormal_sf(&a_exp, a_mant);
 999 |     } else {
1000 |         a_mant |= (F_UINT)1 << MANT_SIZE;
1001 |     }
1002 |     /* convert the exponent value */
1003 |     a_exp = a_exp - (EXP_MASK / 2) + 0x7f;
1004 |     /* shift the mantissa */
1005 |     a_mant = rshift_rnd(a_mant, MANT_SIZE - (32 - 2));
1006 |     return normalize_sf32(a_sign, a_exp, a_mant, rm, pfflags);
1007 | }
1008 | 
1009 | #endif
1010 | 
1011 | #if F_SIZE >= 128
1012 | 
1013 | F_UINT cvt_sf64_sf(uint64_t a, uint32_t *pfflags)
1014 | {
1015 |     uint32_t a_sign;
1016 |     int32_t a_exp;
1017 |     F_UINT a_mant;
1018 | 
1019 |     a_mant = unpack_sf64(&a_sign, &a_exp, a);
1020 | 
1021 |     if (a_exp == 0x7ff) {
1022 |         if (a_mant != 0) {
1023 |             /* NaN */
1024 |             if (issignan_sf64(a)) {
1025 |                 *pfflags |= FFLAG_INVALID_OP;
1026 |             }
1027 |             return F_QNAN;
1028 |         } else {
1029 |             /* infinity */
1030 |             return pack_sf(a_sign, EXP_MASK, 0);
1031 |         }
1032 |     }
1033 |     if (a_exp == 0) {
1034 |         if (a_mant == 0)
1035 |             return pack_sf(a_sign, 0, 0); /* zero */
1036 |         a_mant = normalize_subnormal_sf64(&a_exp, a_mant);
1037 |     }
1038 |     /* convert the exponent value */
1039 |     a_exp = a_exp - 0x3ff + (EXP_MASK / 2);
1040 |     /* shift the mantissa */
1041 |     a_mant <<= (MANT_SIZE - 52);
1042 |     return pack_sf(a_sign, a_exp, a_mant);
1043 | }
1044 | 
1045 | uint64_t glue(glue(cvt_sf, F_SIZE), _sf64)(F_UINT a, RoundingModeEnum rm,
1046 |                                                uint32_t *pfflags)
1047 | {
1048 |     uint32_t a_sign;
1049 |     int32_t a_exp;
1050 |     F_UINT a_mant;
1051 | 
1052 |     a_mant = unpack_sf(&a_sign, &a_exp, a);
1053 |     if (a_exp == EXP_MASK) {
1054 |         if (a_mant != 0) {
1055 |             /* NaN */
1056 |             if (issignan_sf(a)) {
1057 |                 *pfflags |= FFLAG_INVALID_OP;
1058 |             }
1059 |             return F_QNAN64;
1060 |         } else {
1061 |             /* infinity */
1062 |             return pack_sf64(a_sign, 0x7ff, 0);
1063 |         }
1064 |     }
1065 |     if (a_exp == 0) {
1066 |         if (a_mant == 0)
1067 |             return pack_sf64(a_sign, 0, 0); /* zero */
1068 |         normalize_subnormal_sf(&a_exp, a_mant);
1069 |     } else {
1070 |         a_mant |= (F_UINT)1 << MANT_SIZE;
1071 |     }
1072 |     /* convert the exponent value */
1073 |     a_exp = a_exp - (EXP_MASK / 2) + 0x3ff;
1074 |     /* shift the mantissa */
1075 |     a_mant = rshift_rnd(a_mant, MANT_SIZE - (64 - 2));
1076 |     return normalize_sf64(a_sign, a_exp, a_mant, rm, pfflags);
1077 | }
1078 | 
1079 | #endif
1080 | 
1081 | #undef clz
1082 | 
1083 | #define ICVT_SIZE 32
1084 | #include "softfp_template_icvt.h"
1085 | 
1086 | #define ICVT_SIZE 64
1087 | #include "softfp_template_icvt.h"
1088 | 
1089 | #ifdef HAVE_INT128
1090 | #define ICVT_SIZE 128
1091 | #include "softfp_template_icvt.h"
1092 | #endif
1093 | 
1094 | #undef F_SIZE
1095 | #undef F_UINT
1096 | #undef F_ULONG
1097 | #undef F_UHALF
1098 | #undef MANT_SIZE
1099 | #undef EXP_SIZE
1100 | #undef EXP_MASK
1101 | #undef MANT_MASK
1102 | #undef SIGN_MASK
1103 | #undef IMANT_SIZE
1104 | #undef RND_SIZE
1105 | #undef QNAN_MASK
1106 | #undef F_QNAN
1107 | 
1108 | #undef pack_sf
1109 | #undef unpack_sf
1110 | #undef rshift_rnd
1111 | #undef round_pack_sf
1112 | #undef normalize_sf
1113 | #undef normalize2_sf
1114 | #undef issignan_sf
1115 | #undef isnan_sf
1116 | #undef add_sf
1117 | #undef mul_sf
1118 | #undef fma_sf
1119 | #undef div_sf
1120 | #undef sqrt_sf
1121 | #undef normalize_subnormal_sf
1122 | #undef divrem_u
1123 | #undef sqrtrem_u
1124 | #undef mul_u
1125 | #undef cvt_sf32_sf
1126 | #undef cvt_sf64_sf
1127 | 


--------------------------------------------------------------------------------
/softfp_template_icvt.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SoftFP Library
  3 |  * 
  4 |  * Copyright (c) 2016 Fabrice Bellard
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | #if ICVT_SIZE == 32
 25 | #define ICVT_UINT uint32_t
 26 | #define ICVT_INT int32_t
 27 | #elif ICVT_SIZE == 64
 28 | #define ICVT_UINT uint64_t
 29 | #define ICVT_INT int64_t
 30 | #elif ICVT_SIZE == 128
 31 | #define ICVT_UINT uint128_t
 32 | #define ICVT_INT int128_t
 33 | #else
 34 | #error unsupported icvt
 35 | #endif
 36 | 
 37 | /* conversions between float and integers */
 38 | static ICVT_INT glue(glue(glue(internal_cvt_sf, F_SIZE), _i), ICVT_SIZE)(F_UINT a, RoundingModeEnum rm,
 39 |                                                                          uint32_t *pfflags, BOOL is_unsigned)
 40 | {
 41 |     uint32_t a_sign, addend, rnd_bits;
 42 |     int32_t a_exp;
 43 |     F_UINT a_mant;
 44 |     ICVT_UINT r, r_max;
 45 | 
 46 |     a_sign = a >> (F_SIZE - 1);
 47 |     a_exp = (a >> MANT_SIZE) & EXP_MASK;
 48 |     a_mant = a & MANT_MASK;
 49 |     if (a_exp == EXP_MASK && a_mant != 0)
 50 |         a_sign = 0; /* NaN is like +infinity */
 51 |     if (a_exp == 0) {
 52 |         a_exp = 1;
 53 |     } else {
 54 |         a_mant |= (F_UINT)1 << MANT_SIZE;
 55 |     }
 56 |     a_mant <<= RND_SIZE;
 57 |     a_exp = a_exp - (EXP_MASK / 2) - MANT_SIZE;
 58 | 
 59 |     if (is_unsigned)
 60 |         r_max = (ICVT_UINT)a_sign - 1;
 61 |     else
 62 |         r_max = ((ICVT_UINT)1 << (ICVT_SIZE - 1)) - (ICVT_UINT)(a_sign ^ 1);
 63 |     if (a_exp >= 0) {
 64 |         if (a_exp <= (ICVT_SIZE - 1 - MANT_SIZE)) {
 65 |             r = (ICVT_UINT)(a_mant >> RND_SIZE) << a_exp;
 66 |             if (r > r_max)
 67 |                 goto overflow;
 68 |         } else {
 69 |         overflow:
 70 |             *pfflags |= FFLAG_INVALID_OP;
 71 |             return r_max;
 72 |         }
 73 |     } else {
 74 |         a_mant = rshift_rnd(a_mant, -a_exp);
 75 | 
 76 |         switch(rm) {
 77 |         case RM_RNE:
 78 |         case RM_RMM:
 79 |             addend = (1 << (RND_SIZE - 1));
 80 |             break;
 81 |         case RM_RTZ:
 82 |             addend = 0;
 83 |             break;
 84 |         default:
 85 |         case RM_RDN:
 86 |         case RM_RUP:
 87 |             if (a_sign ^ (rm & 1))
 88 |                 addend = (1 << RND_SIZE) - 1;
 89 |             else
 90 |                 addend = 0;
 91 |             break;
 92 |         }
 93 |         
 94 |         rnd_bits = a_mant & ((1 << RND_SIZE ) - 1);
 95 |         a_mant = (a_mant + addend) >> RND_SIZE;
 96 |         /* half way: select even result */
 97 |         if (rm == RM_RNE && rnd_bits == (1 << (RND_SIZE - 1)))
 98 |             a_mant &= ~1;
 99 |         if (a_mant > r_max)
100 |             goto overflow;
101 |         r = a_mant;
102 |         if (rnd_bits != 0)
103 |             *pfflags |= FFLAG_INEXACT;
104 |     }
105 |     if (a_sign)
106 |         r = -r;
107 |     return r;
108 | }
109 | 
110 | ICVT_INT glue(glue(glue(cvt_sf, F_SIZE), _i), ICVT_SIZE)(F_UINT a, RoundingModeEnum rm,
111 |                                                           uint32_t *pfflags)
112 | {
113 |     return glue(glue(glue(internal_cvt_sf, F_SIZE), _i), ICVT_SIZE)(a, rm, 
114 |                                                                     pfflags, FALSE);
115 | }
116 | 
117 | ICVT_UINT glue(glue(glue(cvt_sf, F_SIZE), _u), ICVT_SIZE)(F_UINT a, RoundingModeEnum rm,
118 |                                                           uint32_t *pfflags)
119 | {
120 |     return glue(glue(glue(internal_cvt_sf, F_SIZE), _i), ICVT_SIZE) (a, rm, 
121 |                                                                      pfflags, TRUE);
122 | }
123 | 
124 | /* conversions between float and integers */
125 | static F_UINT glue(glue(glue(internal_cvt_i, ICVT_SIZE), _sf), F_SIZE)(ICVT_INT a, 
126 |                                                                        RoundingModeEnum rm,
127 |                                                                        uint32_t *pfflags,
128 |                                                                        BOOL is_unsigned)
129 | {
130 |     uint32_t a_sign;
131 |     int32_t a_exp;
132 |     F_UINT a_mant;
133 |     ICVT_UINT r, mask;
134 |     int l;
135 | 
136 |     if (!is_unsigned && a < 0) {
137 |         a_sign = 1;
138 |         r = -a;
139 |     } else {
140 |         a_sign = 0;
141 |         r = a;
142 |     }
143 |     a_exp = (EXP_MASK / 2) + F_SIZE - 2;
144 |     /* need to reduce range before generic float normalization */
145 |     l = ICVT_SIZE - glue(clz, ICVT_SIZE)(r) - (F_SIZE - 1);
146 |     if (l > 0) {
147 |         mask = r & (((ICVT_UINT)1 << l) - 1);
148 |         r = (r >> l) | ((r & mask) != 0);
149 |         a_exp += l;
150 |     }
151 |     a_mant = r;
152 |     return normalize_sf(a_sign, a_exp, a_mant, rm, pfflags);
153 | }
154 | 
155 | F_UINT glue(glue(glue(cvt_i, ICVT_SIZE), _sf), F_SIZE)(ICVT_INT a, 
156 |                                                        RoundingModeEnum rm,
157 |                                                        uint32_t *pfflags)
158 | {
159 |     return glue(glue(glue(internal_cvt_i, ICVT_SIZE), _sf), F_SIZE)(a, rm, pfflags, FALSE);
160 | }
161 | 
162 | F_UINT glue(glue(glue(cvt_u, ICVT_SIZE), _sf), F_SIZE)(ICVT_UINT a, 
163 |                                                        RoundingModeEnum rm,
164 |                                                        uint32_t *pfflags)
165 | {
166 |     return glue(glue(glue(internal_cvt_i, ICVT_SIZE), _sf), F_SIZE)(a, rm, pfflags, TRUE);
167 | }
168 | 
169 | #undef ICVT_SIZE
170 | #undef ICVT_INT
171 | #undef ICVT_UINT
172 | 


--------------------------------------------------------------------------------
/tinypi.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Tiny PI computation
  3 |  * 
  4 |  * Copyright (c) 2017 Fabrice Bellard
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | #include <stdlib.h>
 25 | #include <stdio.h>
 26 | #include <inttypes.h>
 27 | #include <math.h>
 28 | #include <string.h>
 29 | #include <sys/time.h>
 30 | 
 31 | #include "libbf.h"
 32 | 
 33 | #define CHUD_A 13591409
 34 | #define CHUD_B 545140134
 35 | #define CHUD_C 640320
 36 | /* log2(C/12)*3 */
 37 | #define CHUD_BITS_PER_TERM 47.11041313821584202247
 38 | 
 39 | /* number of bits per base 10 digit */
 40 | #define BITS_PER_DIGIT 3.32192809488736234786
 41 | 
 42 | static bf_context_t bf_ctx;
 43 | 
 44 | static void *my_bf_realloc(void *opaque, void *ptr, size_t size)
 45 | {
 46 |     return realloc(ptr, size);
 47 | }
 48 | 
 49 | static void chud_bs(bf_t *P, bf_t *Q, bf_t *G, int64_t a, int64_t b, int need_g,
 50 |                     limb_t prec)
 51 | {
 52 |     int64_t c;
 53 | 
 54 |     if (a == (b - 1)) {
 55 |         bf_t T0, T1;
 56 |         
 57 |         bf_init(&bf_ctx, &T0);
 58 |         bf_init(&bf_ctx, &T1);
 59 |         bf_set_ui(G, 2 * b - 1);
 60 |         bf_mul_ui(G, G, 6 * b - 1, prec, BF_RNDN);
 61 |         bf_mul_ui(G, G, 6 * b - 5, prec, BF_RNDN);
 62 |         bf_set_ui(&T0, CHUD_B);
 63 |         bf_mul_ui(&T0, &T0, b, prec, BF_RNDN);
 64 |         bf_set_ui(&T1, CHUD_A);
 65 |         bf_add(&T0, &T0, &T1, prec, BF_RNDN);
 66 |         bf_mul(P, G, &T0, prec, BF_RNDN);
 67 |         P->sign = b & 1;
 68 | 
 69 |         bf_set_ui(Q, b);
 70 |         bf_mul_ui(Q, Q, b, prec, BF_RNDN);
 71 |         bf_mul_ui(Q, Q, b, prec, BF_RNDN);
 72 | #if LIMB_BITS == 64
 73 |         bf_mul_ui(Q, Q, (uint64_t)CHUD_C * CHUD_C * CHUD_C / 24, prec, BF_RNDN);
 74 | #else
 75 |         bf_mul_ui(Q, Q, CHUD_C, prec, BF_RNDN);
 76 |         bf_mul_ui(Q, Q, CHUD_C, prec, BF_RNDN);
 77 |         bf_mul_ui(Q, Q, CHUD_C / 24, prec, BF_RNDN);
 78 | #endif
 79 |         bf_delete(&T0);
 80 |         bf_delete(&T1);
 81 |     } else {
 82 |         bf_t P2, Q2, G2;
 83 |         
 84 |         bf_init(&bf_ctx, &P2);
 85 |         bf_init(&bf_ctx, &Q2);
 86 |         bf_init(&bf_ctx, &G2);
 87 | 
 88 |         c = (a + b) / 2;
 89 |         chud_bs(P, Q, G, a, c, 1, prec);
 90 |         chud_bs(&P2, &Q2, &G2, c, b, need_g, prec);
 91 |         
 92 |         /* Q = Q1 * Q2 */
 93 |         /* G = G1 * G2 */
 94 |         /* P = P1 * Q2 + P2 * G1 */
 95 |         bf_mul(&P2, &P2, G, prec, BF_RNDN);
 96 |         if (!need_g)
 97 |             bf_set_ui(G, 0);
 98 |         bf_mul(P, P, &Q2, prec, BF_RNDN);
 99 |         bf_add(P, P, &P2, prec, BF_RNDN);
100 |         bf_delete(&P2);
101 | 
102 |         bf_mul(Q, Q, &Q2, prec, BF_RNDN);
103 |         bf_delete(&Q2);
104 |         if (need_g)
105 |             bf_mul(G, G, &G2, prec, BF_RNDN);
106 |         bf_delete(&G2);
107 | #if 0
108 |         printf("%" PRId64 "-%" PRId64 " limbs: P=%" PRId64 " Q=%" PRId64 " G=%" PRId64 "\n",
109 |                a, b, P->len, Q->len, G->len);
110 | #endif
111 |     }
112 | }
113 | 
114 | static int64_t time_start;
115 | int verbose;
116 | 
117 | static int64_t get_clock_msec(void)
118 | {
119 |     struct timeval tv;
120 |     gettimeofday(&tv, NULL);
121 |     return tv.tv_sec * 1000LL + (tv.tv_usec / 1000);
122 | }
123 | 
124 | static void step_start(const char *str)
125 | {
126 |     if (verbose) {
127 |         printf("%-20s", str);
128 |         fflush(stdout);
129 |         time_start = get_clock_msec();
130 |     }
131 | }
132 | 
133 | static void step_end(void)
134 | {
135 |     int64_t ti;
136 |     if (verbose) {
137 |         ti = get_clock_msec() - time_start;
138 |         printf("(%0.3f s)\n", ti / 1000.0);
139 |     }
140 | }
141 | 
142 | static void pi_chud(bf_t *Q, int64_t prec)
143 | {
144 |     int64_t n, prec1;
145 |     bf_t P, G;
146 | 
147 |     /* number of serie terms */
148 |     n = (int64_t)ceil(prec / CHUD_BITS_PER_TERM) + 10;
149 |     prec1 = prec + 32;
150 | 
151 |     bf_init(&bf_ctx, &P);
152 |     bf_init(&bf_ctx, &G);
153 | 
154 |     step_start("chud_bs");
155 |     chud_bs(&P, Q, &G, 0, n, 0, prec1);
156 |     
157 |     bf_mul_ui(&G, Q, CHUD_A, prec1, BF_RNDN);
158 |     bf_add(&P, &G, &P, prec1, BF_RNDN);
159 |     step_end();
160 |     
161 |     step_start("div");
162 |     bf_div(Q, Q, &P, prec1, BF_RNDF);
163 |     step_end();
164 |  
165 |     step_start("sqrt");
166 |     bf_set_ui(&P, CHUD_C);
167 |     bf_sqrt(&G, &P, prec1, BF_RNDF);
168 |     bf_mul_ui(&G, &G, (uint64_t)CHUD_C / 12, prec1, BF_RNDF);
169 |     step_end();
170 | 
171 |     step_start("final mul");
172 |     bf_mul(Q, Q, &G, prec, BF_RNDN);
173 |     step_end();
174 |     
175 |     bf_delete(&P);
176 |     bf_delete(&G);
177 | }
178 | 
179 | int main(int argc, char **argv)
180 | {
181 |     int64_t n_digits, prec, n_bits, ti_tot;
182 |     bf_t PI;
183 |     const char *output_filename;
184 |     FILE *f;
185 |     int arg_idx, dec_output;
186 |     char *digits;
187 |     size_t digits_len;
188 |     
189 |     dec_output = 1;
190 |     verbose = 0;
191 |     arg_idx = 1;
192 |     while (arg_idx < argc) {
193 |         if (!strcmp(argv[arg_idx], "-b")) {
194 |             dec_output = 0;
195 |             arg_idx++;
196 |         } else if (!strcmp(argv[arg_idx], "-v")) {
197 |             verbose = 1;
198 |             arg_idx++;
199 |         } else {
200 |             break;
201 |         }
202 |     }
203 |         
204 |     if (arg_idx >= argc) {
205 |         printf("usage: tinypi [options] n_digits [output_file]\n"
206 |                "\n"
207 |                "Options:\n"
208 |                "-b : output in binary (hexa) instead of base 10\n"
209 |                "-v : dump computation steps\n");
210 |         exit(1);
211 |     }
212 |     
213 |     n_digits = (int64_t)strtod(argv[arg_idx++], NULL);
214 |     output_filename = NULL;
215 |     if (arg_idx < argc)
216 |         output_filename = argv[arg_idx++];
217 |     
218 |     ti_tot = get_clock_msec();
219 |     n_digits = bf_max(n_digits, 50);
220 |     n_bits = (limb_t)ceil(n_digits * BITS_PER_DIGIT);
221 |     /* we add more bits to reduce the probability of bad rounding for
222 |        the last digits */
223 |     prec = n_bits + 32;
224 |     bf_context_init(&bf_ctx, my_bf_realloc, NULL);
225 |     bf_init(&bf_ctx, &PI);
226 | 
227 |     pi_chud(&PI, prec);
228 | 
229 |     if (dec_output) {
230 |         step_start("base conversion");
231 |         digits = bf_ftoa(&digits_len, &PI, 10, n_digits + 1,
232 |                              BF_FTOA_FORMAT_FIXED | BF_RNDZ);
233 |         step_end();
234 |     } else {
235 |         digits = bf_ftoa(&digits_len, &PI, 16, n_bits / 4,
236 |                          BF_FTOA_FORMAT_FIXED | BF_RNDZ);
237 |     }
238 |     ti_tot = get_clock_msec() - ti_tot;
239 |     if (verbose) {
240 |         printf("%-20s(%0.3f s)\n", "total", ti_tot / 1000.0);
241 |     }
242 |     
243 |     if (output_filename) {
244 |         f = fopen(output_filename, "wb");
245 |         if (!f) {
246 |             perror(output_filename);
247 |             exit(1);
248 |         }
249 |         fwrite(digits, 1, digits_len, f);
250 |         fclose(f);
251 |     }
252 |     free(digits);
253 |     bf_delete(&PI);
254 |     bf_context_end(&bf_ctx);
255 |     return 0;
256 | }
257 | 


--------------------------------------------------------------------------------