├── Makefile ├── VERSION ├── bfbench.c ├── bftest.c ├── cutils.c ├── cutils.h ├── libbf.c ├── libbf.h ├── pi_1e5.sha1sum ├── pi_1e6.sha1sum ├── pi_1e7.sha1sum ├── pi_1e8.sha1sum ├── pi_1e9.sha1sum ├── readme.txt ├── softfp.c ├── softfp.h ├── softfp_template.h ├── softfp_template_icvt.h └── tinypi.c /Makefile: -------------------------------------------------------------------------------- 1 | # Tiny arbitrary precision floating point library 2 | # 3 | # Copyright (c) 2017-2018 Fabrice Bellard 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | # Enable Windows compilation 24 | #CONFIG_WIN32=y 25 | # build AVX2 version 26 | CONFIG_AVX2=y 27 | # Enable profiling with gprof 28 | #CONFIG_PROFILE=y 29 | # compile the bftest utility to do regression tests and benchmarks. Must have 30 | # the MPFR and MPDecimal libraries 31 | #CONFIG_BFTEST=y 32 | # 32 bit compilation 33 | #CONFIG_M32=y 34 | 35 | #CONFIG_ASAN=y 36 | 37 | ifdef CONFIG_WIN32 38 | CROSS_PREFIX=x86_64-w64-mingw32- 39 | EXE:=.exe 40 | else 41 | EXE:= 42 | endif 43 | 44 | CC=$(CROSS_PREFIX)gcc 45 | CFLAGS=-Wall -g $(PROFILE) -MMD 46 | CFLAGS+=-O2 47 | CFLAGS+=-flto 48 | #CFLAGS+=-Os 49 | LDFLAGS= 50 | ifdef CONFIG_PROFILE 51 | CFLAGS+=-p 52 | LDFLAGS+=-p 53 | else 54 | #LDFLAGS+=-s # strip output 55 | endif 56 | ifdef CONFIG_ASAN 57 | CFLAGS+=-fsanitize=address 58 | LDFLAGS+=-fsanitize=address 59 | endif 60 | LIBS=-lm 61 | 62 | PROGS+=bfbench$(EXE) tinypi$(EXE) 63 | ifdef CONFIG_BFTEST 64 | PROGS+=bftest$(EXE) 65 | ifdef CONFIG_M32 66 | PROGS+=bftest32$(EXE) 67 | endif 68 | endif 69 | ifdef CONFIG_AVX2 70 | PROGS+=bfbench-avx2$(EXE) tinypi-avx2$(EXE) 71 | endif 72 | 73 | all: $(PROGS) 74 | 75 | tinypi$(EXE): tinypi.o libbf.o cutils.o 76 | $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) 77 | 78 | tinypi-avx2$(EXE): tinypi.avx2.o libbf.avx2.o cutils.avx2.o 79 | $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) 80 | 81 | BFTEST_LIBS:=$(LIBS) 82 | 83 | ifdef CONFIG_BFTEST 84 | BFTEST_LIBS:=-lmpfr -lgmp $(BFTEST_LIBS) 85 | bfbench.o bfbench.avx2.o: CFLAGS+=-DCONFIG_MPFR 86 | 87 | bftest$(EXE): bftest.o libbf.o cutils.o softfp.o 88 | $(CC) $(LDFLAGS) -o $@ $^ -lmpdec $(BFTEST_LIBS) 89 | 90 | ifdef CONFIG_M32 91 | bftest32$(EXE): bftest.m32.o libbf.m32.o cutils.m32.o softfp.m32.o 92 | $(CC) $(LDFLAGS) -m32 -o $@ $^ -lmpdec $(BFTEST_LIBS) 93 | endif 94 | endif 95 | 96 | bfbench$(EXE): bfbench.o libbf.o cutils.o 97 | $(CC) $(LDFLAGS) -o $@ $^ $(BFTEST_LIBS) 98 | 99 | bfbench-avx2$(EXE): bfbench.avx2.o libbf.avx2.o cutils.avx2.o 100 | $(CC) $(LDFLAGS) -o $@ $^ $(BFTEST_LIBS) 101 | 102 | test: all 103 | time ./tinypi 1e5 pi_1e5.txt 104 | sha1sum -c pi_1e5.sha1sum 105 | ifdef CONFIG_AVX2 106 | time ./tinypi-avx2 1e5 pi_1e5.txt 107 | sha1sum -c pi_1e5.sha1sum 108 | endif 109 | # 110 | time ./tinypi 1e6 pi_1e6.txt 111 | sha1sum -c pi_1e6.sha1sum 112 | ifdef CONFIG_AVX2 113 | time ./tinypi-avx2 1e6 pi_1e6.txt 114 | sha1sum -c pi_1e6.sha1sum 115 | # 116 | time ./tinypi-avx2 1e7 pi_1e7.txt 117 | sha1sum -c pi_1e7.sha1sum 118 | # 119 | # time ./tinypi-avx2 1e8 pi_1e8.txt 120 | # sha1sum -c pi_1e8.sha1sum 121 | endif 122 | 123 | %.o: %.c 124 | $(CC) $(CFLAGS) -c -o $@ $< 125 | 126 | %.m32.o: %.c 127 | $(CC) -m32 $(CFLAGS) -c -o $@ $< 128 | 129 | %.avx2.o: %.c 130 | $(CC) $(CFLAGS) -mavx -mavx2 -mfma -mbmi2 -c -o $@ $< 131 | 132 | clean: 133 | rm -f $(PROGS) *.o *.d *~ 134 | 135 | -include $(wildcard *.d) 136 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 2021-03-27 2 | -------------------------------------------------------------------------------- /bfbench.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Big float tests 3 | * 4 | * Copyright (c) 2017 Fabrice Bellard 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | * THE SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #ifdef CONFIG_MPFR 32 | #include 33 | #endif 34 | 35 | #include "libbf.h" 36 | 37 | /* number of bits per base 10 digit */ 38 | #define BITS_PER_DIGIT 3.32192809488736234786 39 | 40 | static bf_context_t bf_ctx; 41 | 42 | static void *my_bf_realloc(void *opaque, void *ptr, size_t size) 43 | { 44 | return realloc(ptr, size); 45 | } 46 | 47 | static int64_t get_clock_msec(void) 48 | { 49 | struct timeval tv; 50 | gettimeofday(&tv, NULL); 51 | return tv.tv_sec * 1000LL + (tv.tv_usec / 1000); 52 | } 53 | 54 | /* we print at least 3 significant digits with at most 5 chars, except 55 | if larger than 9999T. The value is rounded to zero. */ 56 | char *get_si_prefix(char *buf, int buf_size, uint64_t val) 57 | { 58 | static const char suffixes[4] = "kMGT"; 59 | uint64_t base; 60 | int i; 61 | 62 | if (val <= 999) { 63 | snprintf(buf, buf_size, "%" PRId64, val); 64 | } else { 65 | base = 1000; 66 | for(i=0;i<4;i++) { 67 | /* Note: we round to 0 */ 68 | if (val < base * 10) { 69 | snprintf(buf, buf_size, "%0.2f%c", 70 | floor((val * 100.0) / base) / 100.0, 71 | suffixes[i]); 72 | break; 73 | } else if (val < base * 100) { 74 | snprintf(buf, buf_size, "%0.1f%c", 75 | floor((val * 10.0) / base) / 10.0, 76 | suffixes[i]); 77 | break; 78 | } else if (val < base * 1000 || (i == 3)) { 79 | snprintf(buf, buf_size, 80 | "%" PRId64 "%c", 81 | val / base, 82 | suffixes[i]); 83 | break; 84 | } 85 | base = base * 1000; 86 | } 87 | } 88 | return buf; 89 | } 90 | 91 | static uint64_t mp_random64(uint64_t *pseed) 92 | { 93 | *pseed = *pseed * 6364136223846793005 + 1; 94 | return *pseed; 95 | } 96 | 97 | typedef enum { 98 | BF_OP_MUL, 99 | BF_OP_DIV, 100 | BF_OP_SQRT, 101 | 102 | BF_OP_COUNT, 103 | } BFOPEnum; 104 | 105 | const char *op_str[BF_OP_COUNT] = { 106 | "mul", 107 | "div", 108 | "sqrt", 109 | }; 110 | 111 | static BFOPEnum get_op_from_str(const char *str) 112 | { 113 | BFOPEnum op; 114 | for(op = 0; op < BF_OP_COUNT; op++) { 115 | if (!strcmp(str, op_str[op])) 116 | break; 117 | } 118 | if (op == BF_OP_COUNT) { 119 | fprintf(stderr, "Unknown operation: %s\n", str); 120 | exit(1); 121 | } 122 | return op; 123 | } 124 | 125 | #define K_STEPS 10 126 | 127 | static void bf_op_speed(double k_start1, double k_end1, 128 | const char *filename, int log_scale, BFOPEnum op) 129 | { 130 | int k, nb_its, it, dpl, fft_len_log2, nb_mods, k_end, k_start; 131 | bf_t A, B, C; 132 | limb_t n, i, prec; 133 | int64_t start_time, ti, n_digits; 134 | FILE *f; 135 | double tpl, K; 136 | char buf1[32], buf2[32]; 137 | uint64_t seed = 2; 138 | 139 | f = fopen(filename, "wb"); 140 | printf("%5s %5s %5s", "K", "BITS", "DIGIT"); 141 | if (op == BF_OP_MUL) { 142 | printf(" %3s %3s %2s", "FFT", "DPL", "M"); 143 | } 144 | printf(" %10s %10s\n", "ms", "ns/limb"); 145 | 146 | k_start = lrint(k_start1 * K_STEPS); 147 | k_end = lrint(k_end1 * K_STEPS); 148 | for(k = k_start; k <= k_end; k++) { 149 | K = (double)k / K_STEPS; 150 | n_digits = (int64_t)ceil(pow(10.0, K)); 151 | n = (limb_t)ceil(n_digits * BITS_PER_DIGIT / LIMB_BITS); 152 | prec = n * LIMB_BITS; 153 | fft_len_log2 = bf_get_fft_size(&dpl, &nb_mods, 2 * n); 154 | printf("%5.1f %5s %5s", 155 | K, 156 | get_si_prefix(buf1, sizeof(buf1), prec), 157 | get_si_prefix(buf2, sizeof(buf2), 158 | (int64_t)ceil(prec / BITS_PER_DIGIT))); 159 | if (op == BF_OP_MUL) { 160 | printf(" %3d %3d %2d", 161 | fft_len_log2, 162 | dpl, 163 | nb_mods); 164 | } 165 | fflush(stdout); 166 | bf_init(&bf_ctx, &A); 167 | bf_init(&bf_ctx, &B); 168 | bf_init(&bf_ctx, &C); 169 | bf_resize(&A, n); 170 | bf_resize(&B, n); 171 | A.expn = n * LIMB_BITS; 172 | B.expn = n * LIMB_BITS; 173 | for(i = 0; i < n; i++) { 174 | A.tab[i] = mp_random64(&seed); 175 | B.tab[i] = mp_random64(&seed); 176 | } 177 | /* normalize */ 178 | A.tab[n - 1] |= (limb_t)1 << (LIMB_BITS - 1); 179 | B.tab[n - 1] |= (limb_t)1 << (LIMB_BITS - 1); 180 | 181 | /* one multiplication to initialize the constants */ 182 | if (fft_len_log2 <= 20) { 183 | bf_mul(&C, &A, &B, n, BF_RNDN); 184 | bf_set_ui(&C, 0); 185 | } 186 | nb_its = 1; 187 | for(;;) { 188 | start_time = get_clock_msec(); 189 | switch(op) { 190 | case BF_OP_MUL: 191 | for(it = 0; it < nb_its; it++) { 192 | bf_mul(&C, &A, &B, prec, BF_RNDN); 193 | } 194 | break; 195 | case BF_OP_DIV: 196 | for(it = 0; it < nb_its; it++) { 197 | bf_div(&C, &A, &B, prec, BF_RNDF); 198 | } 199 | break; 200 | case BF_OP_SQRT: 201 | for(it = 0; it < nb_its; it++) { 202 | bf_sqrt(&C, &A, prec, BF_RNDF); 203 | } 204 | break; 205 | default: 206 | break; 207 | } 208 | ti = get_clock_msec() - start_time; 209 | if (ti >= 100) 210 | break; 211 | nb_its *= 2; 212 | } 213 | bf_delete(&A); 214 | bf_delete(&B); 215 | bf_delete(&C); 216 | tpl = (double)ti / nb_its / n * 1e6; 217 | printf(" %10.3f %10.1f\n", 218 | (double)ti / nb_its, 219 | tpl); 220 | if (log_scale) 221 | fprintf(f, "%f %f\n", K, tpl); 222 | else 223 | fprintf(f, "%" PRIu64 " %f\n", n_digits, tpl); 224 | fflush(f); 225 | } 226 | fclose(f); 227 | } 228 | 229 | #ifdef CONFIG_MPFR 230 | 231 | static void mpfr_mul_speed(double k_start1, double k_end1, 232 | const char *filename) 233 | { 234 | int k, nb_its, it, k_end, k_start; 235 | mpfr_t A, B, C; 236 | limb_t n, prec; 237 | int64_t start_time, ti, n_digits; 238 | FILE *f; 239 | double tpl, K; 240 | char buf1[32], buf2[32]; 241 | gmp_randstate_t rnd_state; 242 | 243 | gmp_randinit_mt(rnd_state); 244 | f = fopen(filename, "wb"); 245 | printf("%5s %5s %5s %10s %10s\n", "K", "BITS", "DIGIT", 246 | "ms", "ns/limb"); 247 | k_start = lrint(k_start1 * K_STEPS); 248 | k_end = lrint(k_end1 * K_STEPS); 249 | for(k = k_start; k <= k_end; k++) { 250 | K = (double)k / K_STEPS; 251 | n_digits = (int64_t)ceil(pow(10.0, K)); 252 | n = (limb_t)ceil(n_digits * BITS_PER_DIGIT / LIMB_BITS); 253 | printf("%5.1f %5s %5s", 254 | K, 255 | get_si_prefix(buf1, sizeof(buf1), n * LIMB_BITS), 256 | get_si_prefix(buf2, sizeof(buf2), 257 | (int64_t)ceil(n * LIMB_BITS / BITS_PER_DIGIT))); 258 | fflush(stdout); 259 | prec = n * LIMB_BITS; 260 | mpfr_init2(A, prec); 261 | mpfr_init2(B, prec); 262 | mpfr_init2(C, prec); 263 | mpfr_urandomb(A, rnd_state); 264 | mpfr_urandomb(B, rnd_state); 265 | nb_its = 1; 266 | for(;;) { 267 | start_time = get_clock_msec(); 268 | for(it = 0; it < nb_its; it++) { 269 | mpfr_mul(C, A, B, MPFR_RNDZ); 270 | } 271 | ti = get_clock_msec() - start_time; 272 | if (ti >= 100) 273 | break; 274 | nb_its *= 2; 275 | } 276 | mpfr_clear(A); 277 | mpfr_clear(B); 278 | mpfr_clear(C); 279 | tpl = (double)ti / nb_its / n * 1e6; 280 | printf(" %10.3f %10.1f\n", 281 | (double)ti / nb_its, 282 | tpl); 283 | fprintf(f, "%" PRIu64 " %f\n", n_digits, tpl); 284 | fflush(f); 285 | } 286 | fclose(f); 287 | gmp_randclear(rnd_state); 288 | } 289 | 290 | static void mpfr_bench(double k_start, double k_end, 291 | const char *output_filename) 292 | { 293 | FILE *f; 294 | const char *name; 295 | 296 | printf("LIBBF:\n"); 297 | bf_op_speed(k_start, k_end, "/tmp/bf_mul.txt", 0, BF_OP_MUL); 298 | printf("MPFR:\n"); 299 | mpfr_mul_speed(k_start, k_end, "/tmp/mpfr_mul.txt"); 300 | 301 | f = fopen("/tmp/gnuplot.cmd", "wb"); 302 | if (output_filename) { 303 | fprintf(f, "set terminal png\n" 304 | "set output \"%s\"\n", 305 | output_filename); 306 | } 307 | fprintf(f, "set xlabel \"Number of digits\"\n"); 308 | fprintf(f, "set ylabel \"ns/limb\"\n"); 309 | fprintf(f, "set logscale x 10\n"); 310 | fprintf(f, "plot "); 311 | #ifdef __AVX2__ 312 | name = "LIBBF(AVX2)"; 313 | #else 314 | name = "LIBBF"; 315 | #endif 316 | fprintf(f, "\"/tmp/bf_mul.txt\" with linespoints title \"%s\"," 317 | "\"/tmp/mpfr_mul.txt\" with linespoints title \"MPFR\"\n", name); 318 | if (!output_filename) { 319 | fprintf(f, "pause -1\n"); 320 | } 321 | fclose(f); 322 | 323 | system("gnuplot /tmp/gnuplot.cmd"); 324 | } 325 | 326 | #endif /* CONFIG_MPFR */ 327 | 328 | int main(int argc, char **argv) 329 | { 330 | const char *cmd; 331 | 332 | if (argc < 2) { 333 | printf("usage: bftest cmd [arguments...]\n" 334 | "cmd is:\n" 335 | "[mul|div|sqrt] [k_start] [k_end] test function on numbers of 10^k digits\n" 336 | #ifdef CONFIG_MPFR 337 | "mpfr_bench [k_start] [k_end] [png_file] benchmark with MPFR\n" 338 | #endif 339 | ); 340 | exit(1); 341 | } 342 | bf_context_init(&bf_ctx, my_bf_realloc, NULL); 343 | cmd = argv[1]; 344 | #ifdef CONFIG_MPFR 345 | if (!strcmp(cmd, "mpfr_bench")) { 346 | double k_start, k_end; 347 | const char *filename; 348 | k_start = 4; 349 | if (argc > 2) 350 | k_start = strtod(argv[2], NULL); 351 | k_end = k_start; 352 | if (argc > 3) 353 | k_end = strtod(argv[3], NULL); 354 | filename = NULL; 355 | if (argc > 4) 356 | filename = argv[4]; 357 | mpfr_bench(k_start, k_end, filename); 358 | } else 359 | #endif 360 | { 361 | double k_start, k_end; 362 | BFOPEnum op; 363 | op = get_op_from_str(cmd); 364 | k_start = 4; 365 | if (argc > 2) 366 | k_start = strtod(argv[2], NULL); 367 | k_end = k_start; 368 | if (argc > 3) 369 | k_end = strtod(argv[3], NULL); 370 | bf_op_speed(k_start, k_end, "/tmp/plot.txt", 1, op); 371 | } 372 | return 0; 373 | } 374 | -------------------------------------------------------------------------------- /bftest.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Tiny arbitrary precision floating point library tests 3 | * 4 | * Copyright (c) 2017 Fabrice Bellard 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | * THE SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "libbf.h" 36 | #include "cutils.h" 37 | #include "softfp.h" 38 | #include "mpdecimal.h" 39 | 40 | typedef enum { 41 | /* low level operations */ 42 | BF_OP_MP_SQRTREM, 43 | BF_OP_MP_RECIP, 44 | 45 | /* binary floating point */ 46 | BF_OP_MUL, 47 | BF_OP_ADD, 48 | BF_OP_SUB, 49 | BF_OP_RINT, 50 | BF_OP_ROUND, 51 | BF_OP_CMP_EQ, 52 | BF_OP_CMP_LT, 53 | BF_OP_CMP_LE, 54 | BF_OP_DIV, 55 | BF_OP_FMOD, 56 | BF_OP_REM, 57 | BF_OP_SQRT, 58 | BF_OP_OR, 59 | BF_OP_XOR, 60 | BF_OP_AND, 61 | BF_OP_CAN_ROUND, 62 | BF_OP_MUL_L2RADIX, 63 | BF_OP_DIV_L2RADIX, 64 | BF_OP_ATOF, 65 | BF_OP_FTOA, 66 | BF_OP_EXP, 67 | BF_OP_LOG, 68 | BF_OP_COS, 69 | BF_OP_SIN, 70 | BF_OP_TAN, 71 | BF_OP_ATAN, 72 | BF_OP_ATAN2, 73 | BF_OP_ASIN, 74 | BF_OP_ACOS, 75 | BF_OP_POW, 76 | 77 | /* decimal floating point */ 78 | BF_OP_ADD_DEC, 79 | BF_OP_MUL_DEC, 80 | BF_OP_DIV_DEC, 81 | BF_OP_SQRT_DEC, 82 | BF_OP_FMOD_DEC, 83 | BF_OP_DIVREM_DEC, 84 | BF_OP_RINT_DEC, 85 | 86 | BF_OP_COUNT, 87 | } MPFTestOPEnum; 88 | 89 | const char *op_str[BF_OP_COUNT] = { 90 | "mp_sqrtrem", 91 | "mp_recip", 92 | "mul", 93 | "add", 94 | "sub", 95 | "rint", 96 | "round", 97 | "cmp_eq", 98 | "cmp_lt", 99 | "cmp_le", 100 | "div", 101 | "fmod", 102 | "rem", 103 | "sqrt", 104 | "or", 105 | "xor", 106 | "and", 107 | "can_round", 108 | "mul_l2radix", 109 | "div_l2radix", 110 | "atof", 111 | "ftoa", 112 | "exp", 113 | "log", 114 | "cos", 115 | "sin", 116 | "tan", 117 | "atan", 118 | "atan2", 119 | "asin", 120 | "acos", 121 | "pow", 122 | 123 | "add_dec", 124 | "mul_dec", 125 | "div_dec", 126 | "sqrt_dec", 127 | "fmod_dec", 128 | "divrem_dec", 129 | "rint_dec", 130 | }; 131 | 132 | const char *rnd_str[7] = { 133 | "N", 134 | "Z", 135 | "D", 136 | "U", 137 | "NA", 138 | "A", 139 | "F", 140 | }; 141 | 142 | #define SPECIAL_COUNT 7 143 | 144 | static bf_context_t bf_ctx; 145 | 146 | static void *my_bf_realloc(void *opaque, void *ptr, size_t size) 147 | { 148 | return realloc(ptr, size); 149 | } 150 | 151 | int mp_cmp(const limb_t *taba, size_t na, const limb_t *tabb, size_t nb) 152 | { 153 | slimb_t n, i; 154 | limb_t a, b; 155 | 156 | n = na; 157 | if (nb > n) 158 | n = nb; 159 | for(i = n - 1; i >= 0; i--) { 160 | if (i < na) 161 | a = taba[i]; 162 | else 163 | a = 0; 164 | if (i < nb) 165 | b = tabb[i]; 166 | else 167 | b = 0; 168 | if (a != b) { 169 | if (a < b) 170 | return -1; 171 | else 172 | return 1; 173 | } 174 | } 175 | return 0; 176 | } 177 | 178 | static void set_special(bf_t *a, int idx) 179 | { 180 | switch(idx) { 181 | case 0: 182 | bf_set_zero(a, 0); 183 | break; 184 | case 1: 185 | bf_set_zero(a, 1); /* -0 */ 186 | break; 187 | case 2: 188 | bf_set_inf(a, 0); 189 | break; 190 | case 3: 191 | bf_set_inf(a, 1); 192 | break; 193 | case 4: 194 | bf_set_si(a, 1); 195 | break; 196 | case 5: 197 | bf_set_si(a, -1); 198 | break; 199 | case 6: 200 | bf_set_nan(a); 201 | break; 202 | default: 203 | abort(); 204 | } 205 | } 206 | 207 | static void set_special_dec(bfdec_t *a, int idx) 208 | { 209 | switch(idx) { 210 | case 0: 211 | bfdec_set_zero(a, 0); 212 | break; 213 | case 1: 214 | bfdec_set_zero(a, 1); /* -0 */ 215 | break; 216 | case 2: 217 | bfdec_set_inf(a, 0); 218 | break; 219 | case 3: 220 | bfdec_set_inf(a, 1); 221 | break; 222 | case 4: 223 | bfdec_set_si(a, 1); 224 | break; 225 | case 5: 226 | bfdec_set_si(a, -1); 227 | break; 228 | case 6: 229 | bfdec_set_nan(a); 230 | break; 231 | default: 232 | abort(); 233 | } 234 | } 235 | 236 | typedef struct mp_randstate_t { 237 | uint64_t val; 238 | } mp_randstate_t; 239 | 240 | void mp_randinit(mp_randstate_t *state, uint64_t seed) 241 | { 242 | state->val = seed; 243 | } 244 | 245 | static inline uint64_t mp_random64(mp_randstate_t *s) 246 | { 247 | s->val = s->val * 6364136223846793005 + 1; 248 | /* avoid bad modulo properties 249 | XXX: use mersenne twistter generator */ 250 | return (s->val << 32) | (s->val >> 32); 251 | } 252 | 253 | /* random number between 0 and 1 with large sequences of identical bits */ 254 | static void mp_rrandom(limb_t *tab, limb_t prec, mp_randstate_t *state) 255 | { 256 | slimb_t n, max_run_len, cur_len, j, len, bit_index, nb_bits; 257 | int cur_state, m; 258 | 259 | n = (prec + LIMB_BITS - 1) / LIMB_BITS; 260 | /* same idea as GMP. It would be probably better to use a non 261 | uniform law */ 262 | m = mp_random64(state) % 4 + 1; 263 | max_run_len = bf_max(prec / m, 1); 264 | cur_state = mp_random64(state) & 1; 265 | cur_len = mp_random64(state) % max_run_len + 1; 266 | nb_bits = n * LIMB_BITS; 267 | 268 | memset(tab, 0, sizeof(limb_t) * n); 269 | bit_index = nb_bits - prec; 270 | while (bit_index < nb_bits) { 271 | len = bf_min(cur_len, nb_bits - bit_index); 272 | if (cur_state) { 273 | /* XXX: inefficient */ 274 | for(j = 0; j < len; j++) { 275 | tab[bit_index >> LIMB_LOG2_BITS] |= (limb_t)1 << (bit_index & (LIMB_BITS - 1)); 276 | bit_index++; 277 | } 278 | } 279 | bit_index += len; 280 | cur_len -= len; 281 | if (cur_len == 0) { 282 | cur_len = mp_random64(state) % max_run_len + 1; 283 | cur_state ^= 1; 284 | } 285 | } 286 | } 287 | 288 | static void bf_rrandom(bf_t *a, limb_t prec, mp_randstate_t *state) 289 | { 290 | slimb_t n; 291 | 292 | n = (prec + LIMB_BITS - 1) / LIMB_BITS; 293 | bf_resize(a, n); 294 | mp_rrandom(a->tab, prec, state); 295 | a->sign = 0; 296 | a->expn = 0; 297 | bf_normalize_and_round(a, prec, BF_RNDZ); 298 | } 299 | 300 | static void bf_rrandom_large(bf_t *a, limb_t prec, mp_randstate_t *s) 301 | { 302 | limb_t prec1; 303 | prec1 = mp_random64(s) % (2 * prec) + 1; 304 | bf_rrandom(a, prec1, s); 305 | a->sign = mp_random64(s) & 1; 306 | } 307 | 308 | /* random number between 0 and 1 with large sequences zeros, nines or 309 | random digits */ 310 | static void bfdec_rrandom(bfdec_t *a, limb_t prec, mp_randstate_t *state) 311 | { 312 | slimb_t n, max_run_len, cur_len, j, len, digit_index, nb_digits; 313 | int cur_state, m; 314 | 315 | n = (prec + LIMB_DIGITS - 1) / LIMB_DIGITS; 316 | bfdec_resize(a, n); 317 | 318 | /* same idea as GMP. It would be probably better to use a non 319 | uniform law */ 320 | m = mp_random64(state) % 4 + 1; 321 | max_run_len = bf_max(prec / m, 1); 322 | cur_state = mp_random64(state) % 3; 323 | cur_len = mp_random64(state) % max_run_len + 1; 324 | nb_digits = n * LIMB_DIGITS; 325 | 326 | memset(a->tab, 0, sizeof(limb_t) * n); 327 | digit_index = nb_digits - prec; 328 | while (digit_index < nb_digits) { 329 | len = bf_min(cur_len, nb_digits - digit_index); 330 | switch(cur_state) { 331 | case 0: 332 | /* zeros */ 333 | break; 334 | case 1: 335 | /* nines */ 336 | for(j = 0; j < len; j++) { 337 | a->tab[digit_index / LIMB_DIGITS] += 338 | 9 * mp_pow_dec[digit_index % LIMB_DIGITS]; 339 | digit_index++; 340 | } 341 | break; 342 | case 2: 343 | /* random */ 344 | for(j = 0; j < len; j++) { 345 | a->tab[digit_index / LIMB_DIGITS] += 346 | (mp_random64(state) % 10) * 347 | mp_pow_dec[digit_index % LIMB_DIGITS]; 348 | digit_index++; 349 | } 350 | break; 351 | } 352 | digit_index += len; 353 | cur_len -= len; 354 | if (cur_len == 0) { 355 | cur_len = mp_random64(state) % max_run_len + 1; 356 | cur_state ^= 1; 357 | } 358 | } 359 | a->sign = 0; 360 | a->expn = 0; 361 | bfdec_normalize_and_round(a, prec, BF_RNDZ); 362 | } 363 | 364 | static void bfdec_rrandom_large(bfdec_t *a, limb_t prec, mp_randstate_t *s) 365 | { 366 | limb_t prec1; 367 | 368 | prec1 = mp_random64(s) % (2 * prec) + 1; 369 | bfdec_rrandom(a, prec1, s); 370 | a->sign = mp_random64(s) & 1; 371 | } 372 | 373 | /* random integer with 0 to prec bits */ 374 | static void bf_rrandom_int(bf_t *a, limb_t prec, mp_randstate_t *rnd_state) 375 | { 376 | limb_t prec1; 377 | prec1 = mp_random64(rnd_state) % prec + 1; 378 | bf_rrandom(a, prec1, rnd_state); 379 | if (a->expn != BF_EXP_ZERO) 380 | a->expn += prec1; 381 | a->sign = mp_random64(rnd_state) & 1; 382 | } 383 | 384 | /* random integer with long sequences of '0' and '1' */ 385 | uint64_t rrandom_u(int len, mp_randstate_t *s) 386 | { 387 | int bit, pos, n, end; 388 | uint64_t a; 389 | 390 | bit = mp_random64(s) & 1; 391 | pos = 0; 392 | a = 0; 393 | for(;;) { 394 | n = (mp_random64(s) % len) + 1; 395 | end = pos + n; 396 | if (end > len) 397 | end = len; 398 | if (bit) { 399 | n = end - pos; 400 | a |= ((uint64_t)(1 << n) - 1) << pos; 401 | } 402 | if (end >= len) 403 | break; 404 | pos = end; 405 | bit ^= 1; 406 | } 407 | return a; 408 | } 409 | 410 | #define F64_MANT_SIZE 52 411 | #define F64_EXP_MASK ((1 << 11) - 1) 412 | 413 | uint64_t rrandom_sf64(mp_randstate_t *s) 414 | { 415 | uint32_t a_exp, a_sign; 416 | uint64_t a_mant; 417 | a_sign = mp_random64(s) & 1; 418 | 419 | /* generate exponent close to the min/max more often than random */ 420 | switch(mp_random64(s) & 15) { 421 | case 0: 422 | a_exp = (mp_random64(s) % (2 * F64_MANT_SIZE)) & F64_EXP_MASK; 423 | break; 424 | case 1: 425 | a_exp = (F64_EXP_MASK - (mp_random64(s) % (2 * F64_MANT_SIZE))) & F64_EXP_MASK; 426 | break; 427 | default: 428 | a_exp = mp_random64(s) & F64_EXP_MASK; 429 | break; 430 | } 431 | a_mant = rrandom_u(F64_MANT_SIZE, s); 432 | return ((uint64_t)a_sign << 63) | ((uint64_t)a_exp << F64_MANT_SIZE) | a_mant; 433 | } 434 | 435 | static int64_t get_clock_msec(void) 436 | { 437 | struct timeval tv; 438 | gettimeofday(&tv, NULL); 439 | return tv.tv_sec * 1000LL + (tv.tv_usec / 1000); 440 | } 441 | 442 | static inline uint64_t get_cycles(void) 443 | { 444 | uint32_t low,high; 445 | uint64_t val; 446 | asm volatile("rdtsc" : "=a" (low), "=d" (high)); 447 | val = high; 448 | val <<= 32; 449 | val |= low; 450 | return val; 451 | } 452 | 453 | static mpfr_rnd_t mpfr_get_rnd_mode(bf_rnd_t rnd_mode) 454 | { 455 | const mpfr_rnd_t rnd_mode_tab[] = { 456 | MPFR_RNDN, 457 | MPFR_RNDZ, 458 | MPFR_RNDD, 459 | MPFR_RNDU, 460 | MPFR_RNDNA, 461 | MPFR_RNDA, 462 | }; 463 | return rnd_mode_tab[rnd_mode]; 464 | } 465 | 466 | static void mpfr_to_bf(bf_t *r1, mpfr_t r) 467 | { 468 | char *str; 469 | mpfr_asprintf(&str, "%Ra", r); 470 | // printf("mpfr r=%s\n", str); 471 | assert(bf_atof(r1, str, NULL, 16, BF_PREC_INF, BF_RNDZ) == 0); 472 | mpfr_free_str(str); 473 | } 474 | 475 | static void bf_to_mpfr(mpfr_t a, const bf_t *a1) 476 | { 477 | char *str; 478 | // bf_print_str("a", a1); 479 | str = bf_ftoa(NULL, a1, 16, BF_PREC_INF, BF_RNDZ | BF_FTOA_FORMAT_FREE | 480 | BF_FTOA_ADD_PREFIX); 481 | // printf("mpfr a=%s\n", str); 482 | mpfr_set_str(a, str, 0, MPFR_RNDZ); 483 | free(str); 484 | } 485 | 486 | void mpfr_exec_init(void) 487 | { 488 | slimb_t e_max, e_min; 489 | e_max = (limb_t)1 << (BF_EXP_BITS_MAX - 1); 490 | e_min = -e_max + 3; 491 | mpfr_set_emin(e_min); 492 | mpfr_set_emax(e_max); 493 | } 494 | 495 | int mpfr_exec_op(MPFTestOPEnum op, bf_t *r1, bf_t *a1, bf_t *b1, 496 | int64_t prec, int rnd_mode1, int64_t *pcycles) 497 | { 498 | mpfr_t a, b, r; 499 | mpfr_rnd_t rnd_mode; 500 | int ret, mpfr_ret; 501 | 502 | mpfr_init2(a, bf_max(a1->len, 1) * LIMB_BITS); 503 | mpfr_init2(b, bf_max(b1->len, 1) * LIMB_BITS); 504 | if (op == BF_OP_RINT) { 505 | /* infinite precision for rint */ 506 | mpfr_init2(r, bf_max(a1->len, 1) * LIMB_BITS); 507 | } else { 508 | mpfr_init2(r, prec); 509 | } 510 | 511 | bf_to_mpfr(a, a1); 512 | bf_to_mpfr(b, b1); 513 | 514 | rnd_mode = mpfr_get_rnd_mode(rnd_mode1); 515 | 516 | ret = 0; 517 | mpfr_ret = 0; 518 | *pcycles -= get_cycles(); 519 | switch(op) { 520 | case BF_OP_MUL: 521 | mpfr_ret = mpfr_mul(r, a, b, rnd_mode); 522 | break; 523 | case BF_OP_ADD: 524 | mpfr_ret = mpfr_add(r, a, b, rnd_mode); 525 | break; 526 | case BF_OP_SUB: 527 | mpfr_ret = mpfr_sub(r, a, b, rnd_mode); 528 | break; 529 | case BF_OP_RINT: 530 | mpfr_ret = mpfr_rint(r, a, rnd_mode); 531 | break; 532 | case BF_OP_ROUND: 533 | mpfr_ret = mpfr_set(r, a, rnd_mode); 534 | break; 535 | case BF_OP_CMP_EQ: 536 | ret = mpfr_equal_p(a, b); 537 | break; 538 | case BF_OP_CMP_LT: 539 | ret = mpfr_less_p(a, b); 540 | break; 541 | case BF_OP_CMP_LE: 542 | ret = mpfr_lessequal_p(a, b); 543 | break; 544 | case BF_OP_DIV: 545 | mpfr_ret = mpfr_div(r, a, b, rnd_mode); 546 | break; 547 | case BF_OP_FMOD: 548 | mpfr_ret = mpfr_fmod(r, a, b, rnd_mode); 549 | break; 550 | case BF_OP_REM: 551 | mpfr_ret = mpfr_remainder(r, a, b, rnd_mode); 552 | break; 553 | case BF_OP_SQRT: 554 | mpfr_ret = mpfr_sqrt(r, a, rnd_mode); 555 | break; 556 | case BF_OP_OR: 557 | case BF_OP_XOR: 558 | case BF_OP_AND: 559 | { 560 | mpz_t ai, bi; 561 | 562 | mpz_init(ai); 563 | mpz_init(bi); 564 | mpfr_get_z(ai, a, MPFR_RNDZ); 565 | mpfr_get_z(bi, b, MPFR_RNDZ); 566 | switch(op) { 567 | case BF_OP_OR: 568 | mpz_ior(ai, ai, bi); 569 | break; 570 | case BF_OP_XOR: 571 | mpz_xor(ai, ai, bi); 572 | break; 573 | case BF_OP_AND: 574 | mpz_and(ai, ai, bi); 575 | break; 576 | default: 577 | break; 578 | } 579 | mpfr_set_z(r, ai, MPFR_RNDZ); 580 | mpz_clear(ai); 581 | mpz_clear(bi); 582 | } 583 | break; 584 | case BF_OP_EXP: 585 | mpfr_ret = mpfr_exp(r, a, rnd_mode); 586 | break; 587 | case BF_OP_LOG: 588 | mpfr_ret = mpfr_log(r, a, rnd_mode); 589 | break; 590 | case BF_OP_COS: 591 | mpfr_ret = mpfr_cos(r, a, rnd_mode); 592 | break; 593 | case BF_OP_SIN: 594 | mpfr_ret = mpfr_sin(r, a, rnd_mode); 595 | break; 596 | case BF_OP_TAN: 597 | mpfr_ret = mpfr_tan(r, a, rnd_mode); 598 | break; 599 | case BF_OP_ATAN: 600 | mpfr_ret = mpfr_atan(r, a, rnd_mode); 601 | break; 602 | case BF_OP_ATAN2: 603 | mpfr_ret = mpfr_atan2(r, a, b, rnd_mode); 604 | break; 605 | case BF_OP_ASIN: 606 | mpfr_ret = mpfr_asin(r, a, rnd_mode); 607 | break; 608 | case BF_OP_ACOS: 609 | mpfr_ret = mpfr_acos(r, a, rnd_mode); 610 | break; 611 | case BF_OP_POW: 612 | mpfr_ret = mpfr_pow(r, a, b, rnd_mode); 613 | break; 614 | default: 615 | abort(); 616 | } 617 | *pcycles += get_cycles(); 618 | if (mpfr_ret != 0) 619 | ret |= BF_ST_INEXACT; 620 | mpfr_to_bf(r1, r); 621 | mpfr_clear(a); 622 | mpfr_clear(b); 623 | mpfr_clear(r); 624 | return ret; 625 | } 626 | 627 | int mpfr_exec_setstr(bf_t *r, const char *str, int radix, 628 | int64_t prec, int rnd_mode) 629 | { 630 | mpfr_t r1; 631 | int mpfr_ret, ret; 632 | mpfr_init2(r1, prec); 633 | mpfr_ret = mpfr_strtofr(r1, str, NULL, radix, mpfr_get_rnd_mode(rnd_mode)); 634 | ret = 0; 635 | if (mpfr_ret != 0) 636 | ret |= BF_ST_INEXACT; 637 | mpfr_to_bf(r, r1); 638 | mpfr_clear(r1); 639 | return ret; 640 | } 641 | 642 | static int softfp_get_rnd_mode(bf_rnd_t rnd_mode) 643 | { 644 | switch(rnd_mode) { 645 | case BF_RNDN: 646 | return RM_RNE; 647 | case BF_RNDZ: 648 | return RM_RTZ; 649 | case BF_RNDU: 650 | return RM_RUP; 651 | case BF_RNDD: 652 | return RM_RDN; 653 | case BF_RNDNA: 654 | return RM_RMM; 655 | default: 656 | abort(); 657 | } 658 | } 659 | 660 | static int softfp_set_status(uint32_t fflags) 661 | { 662 | int ret = 0; 663 | if (fflags & FFLAG_INVALID_OP) 664 | ret |= BF_ST_INVALID_OP; 665 | if (fflags & FFLAG_DIVIDE_ZERO) 666 | ret |= BF_ST_DIVIDE_ZERO; 667 | if (fflags & FFLAG_OVERFLOW) 668 | ret |= BF_ST_OVERFLOW; 669 | if (fflags & FFLAG_UNDERFLOW) 670 | ret |= BF_ST_UNDERFLOW; 671 | if (fflags & FFLAG_INEXACT) 672 | ret |= BF_ST_INEXACT; 673 | return ret; 674 | } 675 | 676 | typedef union { 677 | double d; 678 | sfloat64 u; 679 | } Float64Union; 680 | 681 | int softfp_exec_op(MPFTestOPEnum op, bf_t *r1, bf_t *a1, bf_t *b1, 682 | limb_t prec, bf_rnd_t rnd_mode, int64_t *pcycles) 683 | { 684 | sfloat64 r, a, b; 685 | int ret = 0; 686 | uint32_t fflags, rm; 687 | Float64Union u; 688 | 689 | *pcycles -= get_cycles(); 690 | /* Note: the inputs must already be float64 */ 691 | bf_get_float64(a1, &u.d, BF_RNDZ); 692 | // printf("ad=%a\n", u.d); 693 | a = u.u; 694 | /* Note: the inputs must already be float64 */ 695 | bf_get_float64(b1, &u.d, BF_RNDZ); 696 | // printf("bd=%a\n", u.d); 697 | b = u.u; 698 | 699 | rm = softfp_get_rnd_mode(rnd_mode); 700 | fflags = 0; 701 | switch(op) { 702 | case BF_OP_MUL: 703 | r = mul_sf64(a, b, rm, &fflags); 704 | ret = softfp_set_status(fflags); 705 | break; 706 | case BF_OP_ADD: 707 | r = add_sf64(a, b, rm, &fflags); 708 | ret = softfp_set_status(fflags); 709 | break; 710 | case BF_OP_SUB: 711 | r = sub_sf64(a, b, rm, &fflags); 712 | ret = softfp_set_status(fflags); 713 | break; 714 | case BF_OP_CMP_EQ: 715 | r = 0; 716 | ret = eq_quiet_sf64(a, b, &fflags); 717 | break; 718 | case BF_OP_CMP_LT: 719 | r = 0; 720 | ret = lt_sf64(a, b, &fflags); 721 | break; 722 | case BF_OP_CMP_LE: 723 | r = 0; 724 | ret = le_sf64(a, b, &fflags); 725 | break; 726 | case BF_OP_DIV: 727 | r = div_sf64(a, b, rm, &fflags); 728 | ret = softfp_set_status(fflags); 729 | break; 730 | case BF_OP_SQRT: 731 | r = sqrt_sf64(a, rm, &fflags); 732 | ret = softfp_set_status(fflags); 733 | break; 734 | // case BF_OP_RINT: 735 | // case BF_OP_OR: 736 | // case BF_OP_XOR: 737 | // case BF_OP_AND: 738 | default: 739 | abort(); 740 | } 741 | /* Note: the inputs must already be float64 */ 742 | u.u = r; 743 | // printf("rd=%a\n", u.d); 744 | bf_set_float64(r1, u.d); 745 | *pcycles += get_cycles(); 746 | return ret; 747 | } 748 | 749 | mpd_context_t mpd_ctx; 750 | 751 | static void bfdec_to_mpd(mpd_t *a1, const bfdec_t *a) 752 | { 753 | char *a_str; 754 | a_str = bfdec_ftoa(NULL, a, BF_PREC_INF, BF_RNDZ | BF_FTOA_FORMAT_FREE); 755 | // printf("a_str=%s\n", a_str); 756 | mpd_qsetprec(&mpd_ctx, a->len * LIMB_DIGITS); 757 | mpd_set_string(a1, a_str, &mpd_ctx); 758 | free(a_str); 759 | } 760 | 761 | static void mpd_to_bfdec(bfdec_t *r, const mpd_t *r1) 762 | { 763 | char *r1_str; 764 | r1_str = mpd_to_sci(r1, 0); 765 | // printf("r1_str=%s\n", r1_str); 766 | bfdec_atof(r, r1_str, NULL, BF_PREC_INF, BF_RNDZ); 767 | // bfdec_print_str("ref", r); 768 | free(r1_str); 769 | } 770 | 771 | int mpdecimal_exec_op(MPFTestOPEnum op, bfdec_t *r, bfdec_t *a, bfdec_t *b, 772 | limb_t prec, bf_rnd_t rnd_mode, int64_t *pcycles) 773 | { 774 | mpd_t *a1, *b1, *r1; 775 | uint32_t status; 776 | int ret; 777 | 778 | a1 = mpd_new(&mpd_ctx); 779 | b1 = mpd_new(&mpd_ctx); 780 | r1 = mpd_new(&mpd_ctx); 781 | 782 | bfdec_to_mpd(a1, a); 783 | bfdec_to_mpd(b1, b); 784 | 785 | mpd_qsetprec(&mpd_ctx, prec); 786 | 787 | // printf("rnd_mode1=%d\n", rnd_mode); 788 | switch(rnd_mode) { 789 | case BF_RNDN: 790 | mpd_qsetround(&mpd_ctx, MPD_ROUND_HALF_EVEN); 791 | break; 792 | case BF_RNDZ: 793 | mpd_qsetround(&mpd_ctx, MPD_ROUND_DOWN); 794 | break; 795 | case BF_RNDU: 796 | mpd_qsetround(&mpd_ctx, MPD_ROUND_CEILING); 797 | break; 798 | case BF_RNDD: 799 | mpd_qsetround(&mpd_ctx, MPD_ROUND_FLOOR); 800 | break; 801 | case BF_RNDNA: 802 | mpd_qsetround(&mpd_ctx, MPD_ROUND_HALF_UP); 803 | break; 804 | case BF_RNDA: 805 | mpd_qsetround(&mpd_ctx, MPD_ROUND_UP); 806 | break; 807 | default: 808 | abort(); 809 | } 810 | 811 | *pcycles -= get_cycles(); 812 | 813 | status = 0; 814 | switch(op) { 815 | case BF_OP_ADD_DEC: 816 | mpd_qadd(r1, a1, b1, &mpd_ctx, &status); 817 | break; 818 | case BF_OP_MUL_DEC: 819 | mpd_qmul(r1, a1, b1, &mpd_ctx, &status); 820 | break; 821 | case BF_OP_DIV_DEC: 822 | mpd_qdiv(r1, a1, b1, &mpd_ctx, &status); 823 | break; 824 | case BF_OP_SQRT_DEC: 825 | mpd_qsqrt(r1, a1, &mpd_ctx, &status); 826 | break; 827 | case BF_OP_FMOD_DEC: 828 | mpd_qrem(r1, a1, b1, &mpd_ctx, &status); 829 | break; 830 | case BF_OP_RINT_DEC: 831 | mpd_qround_to_intx(r1, a1, &mpd_ctx, &status); 832 | break; 833 | default: 834 | abort(); 835 | } 836 | 837 | *pcycles += get_cycles(); 838 | 839 | ret = 0; 840 | if (status & MPD_Inexact) 841 | ret |= BF_ST_INEXACT; 842 | if (status & MPD_Overflow) 843 | ret |= BF_ST_OVERFLOW; 844 | if (status & MPD_Underflow) 845 | ret |= BF_ST_UNDERFLOW; 846 | if (status & MPD_Invalid_operation) 847 | ret |= BF_ST_INVALID_OP; 848 | 849 | mpd_to_bfdec(r, r1); 850 | 851 | mpd_del(a1); 852 | mpd_del(b1); 853 | mpd_del(r1); 854 | 855 | return ret; 856 | } 857 | 858 | 859 | int bf_exec_op(MPFTestOPEnum op, bf_t *r, bf_t *a, bf_t *b, 860 | limb_t prec, bf_flags_t flags, int64_t *pcycles) 861 | { 862 | int ret = 0; 863 | 864 | *pcycles -= get_cycles(); 865 | switch(op) { 866 | case BF_OP_MUL: 867 | ret = bf_mul(r, a, b, prec, flags); 868 | break; 869 | case BF_OP_ADD: 870 | ret = bf_add(r, a, b, prec, flags); 871 | break; 872 | case BF_OP_SUB: 873 | ret = bf_sub(r, a, b, prec, flags); 874 | break; 875 | case BF_OP_RINT: 876 | bf_set(r, a); 877 | ret = bf_rint(r, flags); 878 | break; 879 | case BF_OP_ROUND: 880 | bf_set(r, a); 881 | ret = bf_round(r, prec, flags); 882 | break; 883 | case BF_OP_CMP_EQ: 884 | ret = bf_cmp_eq(a, b); 885 | break; 886 | case BF_OP_CMP_LT: 887 | ret = bf_cmp_lt(a, b); 888 | break; 889 | case BF_OP_CMP_LE: 890 | ret = bf_cmp_le(a, b); 891 | break; 892 | case BF_OP_DIV: 893 | ret = bf_div(r, a, b, prec, flags); 894 | break; 895 | case BF_OP_FMOD: 896 | ret = bf_rem(r, a, b, prec, flags, BF_RNDZ); 897 | break; 898 | case BF_OP_REM: 899 | ret = bf_rem(r, a, b, prec, flags, BF_RNDN); 900 | break; 901 | case BF_OP_SQRT: 902 | ret = bf_sqrt(r, a, prec, flags); 903 | break; 904 | case BF_OP_OR: 905 | bf_logic_or(r, a, b); 906 | break; 907 | case BF_OP_XOR: 908 | bf_logic_xor(r, a, b); 909 | break; 910 | case BF_OP_AND: 911 | bf_logic_and(r, a, b); 912 | break; 913 | case BF_OP_EXP: 914 | ret = bf_exp(r, a, prec, flags); 915 | break; 916 | case BF_OP_LOG: 917 | ret = bf_log(r, a, prec, flags); 918 | break; 919 | case BF_OP_COS: 920 | ret = bf_cos(r, a, prec, flags); 921 | break; 922 | case BF_OP_SIN: 923 | ret = bf_sin(r, a, prec, flags); 924 | break; 925 | case BF_OP_TAN: 926 | ret = bf_tan(r, a, prec, flags); 927 | break; 928 | case BF_OP_ATAN: 929 | ret = bf_atan(r, a, prec, flags); 930 | break; 931 | case BF_OP_ATAN2: 932 | ret = bf_atan2(r, a, b, prec, flags); 933 | break; 934 | case BF_OP_ASIN: 935 | ret = bf_asin(r, a, prec, flags); 936 | break; 937 | case BF_OP_ACOS: 938 | ret = bf_acos(r, a, prec, flags); 939 | break; 940 | case BF_OP_POW: 941 | ret = bf_pow(r, a, b, prec, flags); 942 | break; 943 | default: 944 | abort(); 945 | } 946 | *pcycles += get_cycles(); 947 | return ret; 948 | } 949 | 950 | int bfdec_exec_op(MPFTestOPEnum op, bfdec_t *r, 951 | const bfdec_t *a, const bfdec_t *b, 952 | limb_t prec, bf_flags_t flags, int64_t *pcycles) 953 | { 954 | int ret; 955 | 956 | *pcycles -= get_cycles(); 957 | switch(op) { 958 | case BF_OP_ADD_DEC: 959 | ret = bfdec_add(r, a, b, prec, flags); 960 | break; 961 | case BF_OP_MUL_DEC: 962 | ret = bfdec_mul(r, a, b, prec, flags); 963 | break; 964 | case BF_OP_DIV_DEC: 965 | ret = bfdec_div(r, a, b, prec, flags); 966 | break; 967 | case BF_OP_SQRT_DEC: 968 | ret = bfdec_sqrt(r, a, prec, flags); 969 | break; 970 | case BF_OP_FMOD_DEC: 971 | ret = bfdec_rem(r, a, b, prec, flags, BF_RNDZ); 972 | break; 973 | case BF_OP_RINT_DEC: 974 | bfdec_set(r, a); 975 | ret = bfdec_rint(r, flags); 976 | break; 977 | default: 978 | abort(); 979 | } 980 | *pcycles += get_cycles(); 981 | return ret; 982 | } 983 | 984 | void print_status(int status) 985 | { 986 | printf("%c%c%c%c%c", 987 | (status & BF_ST_INVALID_OP) ? 'I' : '-', 988 | (status & BF_ST_DIVIDE_ZERO) ? 'Z' : '-', 989 | (status & BF_ST_OVERFLOW) ? 'O' : '-', 990 | (status & BF_ST_UNDERFLOW) ? 'U' : '-', 991 | (status & BF_ST_INEXACT) ? 'X' : '-'); 992 | } 993 | 994 | static BOOL bf_is_same(const bf_t *a, const bf_t *b) 995 | { 996 | return a->sign == b->sign && bf_cmpu(a, b) == 0; 997 | } 998 | 999 | void test_atof(limb_t prec, int duration_ms, 1000 | int exp_bits, bf_rnd_t rnd_mode, int seed) 1001 | { 1002 | DynBuf dbuf; 1003 | int radix, it, c, e, status, ref_status, err, rnd_mode1, test_loop; 1004 | mp_randstate_t rnd_state; 1005 | slimb_t n_digits, prec1, i; 1006 | char *str; 1007 | bf_t r, r_ref; 1008 | int64_t ti, ti_ref, nb_limbs, start_time; 1009 | 1010 | mp_randinit(&rnd_state, seed); 1011 | 1012 | bf_init(&bf_ctx, &r); 1013 | bf_init(&bf_ctx, &r_ref); 1014 | ti = 0; 1015 | ti_ref = 0; 1016 | start_time = get_clock_msec(); 1017 | test_loop = 1; 1018 | it = 0; 1019 | for(;;) { 1020 | /* build a random string representing a number */ 1021 | if (mp_random64(&rnd_state) & 1) 1022 | radix = (mp_random64(&rnd_state) % 35) + 2; 1023 | else 1024 | radix = 10; 1025 | prec1 = (limb_t)ceil(prec / log2(radix)); 1026 | n_digits = mp_random64(&rnd_state) % (prec1 * 3) + 1; 1027 | dbuf_init(&dbuf); 1028 | if (mp_random64(&rnd_state) & 1) 1029 | dbuf_putc(&dbuf, '-'); 1030 | 1031 | for(i = 0; i < n_digits; i++) { 1032 | c = mp_random64(&rnd_state) % radix; 1033 | if (c < 10) 1034 | c += '0'; 1035 | else 1036 | c += 'a' - 10; 1037 | dbuf_putc(&dbuf, c); 1038 | } 1039 | if (radix == 10) 1040 | dbuf_putc(&dbuf, 'e'); 1041 | else 1042 | dbuf_putc(&dbuf, '@'); 1043 | e = prec1 * 20; 1044 | e = (mp_random64(&rnd_state) % (2 * e + 1)) - e; 1045 | dbuf_printf(&dbuf, "%d", e); 1046 | dbuf_putc(&dbuf, '\0'); 1047 | str = (char *)dbuf.buf; 1048 | 1049 | ti -= get_cycles(); 1050 | status = bf_atof(&r, str, NULL, radix, prec, rnd_mode) & 1051 | BF_ST_INEXACT; 1052 | ti += get_cycles(); 1053 | rnd_mode1 = rnd_mode; 1054 | if (rnd_mode == BF_RNDF) 1055 | rnd_mode1 = BF_RNDD; 1056 | 1057 | ti_ref -= get_cycles(); 1058 | ref_status = mpfr_exec_setstr(&r_ref, str, radix, prec, rnd_mode1); 1059 | ti_ref += get_cycles(); 1060 | 1061 | if (rnd_mode == BF_RNDF) { 1062 | err = !bf_is_same(&r, &r_ref); 1063 | if (err && rnd_mode == BF_RNDF) { 1064 | ref_status = mpfr_exec_setstr(&r_ref, str, radix, prec, BF_RNDU); 1065 | err = !bf_is_same(&r, &r_ref); 1066 | } 1067 | } else { 1068 | err = !bf_is_same(&r, &r_ref) || status != ref_status; 1069 | } 1070 | 1071 | if (err) { 1072 | printf("\nERROR (%d):\n", it); 1073 | printf("radix=%d\n", radix); 1074 | printf("str=%s\n", str); 1075 | bf_print_str("r ", &r); 1076 | bf_print_str("ref", &r_ref); 1077 | printf("st ="); print_status(status); printf("\n"); 1078 | printf("ref_st="); print_status(ref_status); printf("\n"); 1079 | exit(1); 1080 | } 1081 | free(str); 1082 | it++; 1083 | if ((it & (test_loop - 1)) == 0) { 1084 | if ((get_clock_msec() - start_time) >= duration_ms) 1085 | break; 1086 | test_loop *= 2; 1087 | } 1088 | } 1089 | bf_delete(&r); 1090 | bf_delete(&r_ref); 1091 | 1092 | nb_limbs = (prec + 63) / 64; 1093 | printf(" %8u %8.1f %8.1f\n", 1094 | it, 1095 | (double)ti / it / nb_limbs, 1096 | (double)ti_ref / it / nb_limbs); 1097 | } 1098 | 1099 | void test_ftoa(limb_t prec, int duration_ms, 1100 | int exp_bits, bf_rnd_t rnd_mode, int seed) 1101 | { 1102 | int radix, it, e, test_loop; 1103 | mp_randstate_t rnd_state; 1104 | slimb_t n_digits, prec1, nb_limbs; 1105 | char *r_str, *r_ref_str; 1106 | bf_t a; 1107 | int64_t ti, ti_ref, start_time; 1108 | 1109 | mp_randinit(&rnd_state, seed); 1110 | bf_init(&bf_ctx, &a); 1111 | ti_ref = 0; 1112 | ti = 0; 1113 | start_time = get_clock_msec(); 1114 | test_loop = 1; 1115 | it = 0; 1116 | for(;;) { 1117 | /* build a random string representing a number */ 1118 | if ((mp_random64(&rnd_state) & 1) && 0) 1119 | radix = (mp_random64(&rnd_state) % 35) + 2; 1120 | else 1121 | radix = 10; 1122 | n_digits = (limb_t)ceil(prec / log2(radix)); 1123 | prec1 = mp_random64(&rnd_state) % (3 * prec) + 2; 1124 | bf_rrandom(&a, prec1, &rnd_state); 1125 | e = prec * 20; 1126 | if (a.expn != BF_EXP_ZERO) 1127 | a.expn += (mp_random64(&rnd_state) % (2 * e + 1)) - e; 1128 | ti -= get_cycles(); 1129 | r_str = bf_ftoa(NULL, &a, radix, n_digits, rnd_mode | 1130 | BF_FTOA_FORMAT_FIXED | BF_FTOA_FORCE_EXP); 1131 | ti += get_cycles(); 1132 | { 1133 | mpfr_t a1; 1134 | mpfr_exp_t expn; 1135 | DynBuf s_s, *s = &s_s; 1136 | char *str, *p; 1137 | slimb_t i; 1138 | BOOL is_zero; 1139 | 1140 | mpfr_init2(a1, bf_max(a.len, 1) * LIMB_BITS); 1141 | bf_to_mpfr(a1, &a); 1142 | ti_ref -= get_cycles(); 1143 | str = mpfr_get_str(NULL, &expn, radix, n_digits, a1, 1144 | mpfr_get_rnd_mode(rnd_mode)); 1145 | ti_ref += get_cycles(); 1146 | /* add the decimal point and exponent */ 1147 | is_zero = TRUE; 1148 | for(i = 0; i < n_digits; i++) { 1149 | if (str[i] != '0') { 1150 | is_zero = FALSE; 1151 | break; 1152 | } 1153 | } 1154 | dbuf_init(s); 1155 | p = str; 1156 | if (*p == '-') 1157 | dbuf_putc(s, *p++); 1158 | dbuf_putc(s, *p++); 1159 | if (n_digits > 1) { 1160 | dbuf_putc(s, '.'); 1161 | for(i = 1; i < n_digits; i++) { 1162 | dbuf_putc(s, *p++); 1163 | } 1164 | } 1165 | if (!is_zero) 1166 | expn--; 1167 | if ((radix & (radix - 1)) == 0 && radix <= 16) { 1168 | int radix_bits = 1; 1169 | while ((1 << radix_bits) != radix) 1170 | radix_bits++; 1171 | dbuf_printf(s, "p%" PRId64 , (int64_t)(expn * radix_bits)); 1172 | } else { 1173 | dbuf_printf(s, "%c%" PRId64 , radix <= 10 ? 'e' : '@', (int64_t)expn); 1174 | } 1175 | dbuf_putc(s, '\0'); 1176 | 1177 | r_ref_str = (char *)s->buf; 1178 | mpfr_clear(a1); 1179 | mpfr_free_str(str); 1180 | } 1181 | 1182 | if (strcmp(r_ref_str, r_str) != 0) { 1183 | printf("\nERROR (%d):\n", it); 1184 | printf("radix=%d\n", radix); 1185 | bf_print_str("a ", &a); 1186 | printf("r =%s\n", r_str); 1187 | printf("ref=%s\n", r_ref_str); 1188 | exit(1); 1189 | } 1190 | free(r_str); 1191 | free(r_ref_str); 1192 | it++; 1193 | if ((it & (test_loop - 1)) == 0) { 1194 | if ((get_clock_msec() - start_time) >= duration_ms) 1195 | break; 1196 | test_loop *= 2; 1197 | } 1198 | } 1199 | bf_delete(&a); 1200 | 1201 | nb_limbs = (prec + 63) / 64; 1202 | printf(" %8u %8.1f %8.1f\n", 1203 | it, 1204 | (double)ti / it / nb_limbs, 1205 | (double)ti_ref / it / nb_limbs); 1206 | } 1207 | 1208 | void test_can_round(limb_t prec, int duration_ms, bf_rnd_t rnd_mode, int seed) 1209 | { 1210 | mp_randstate_t rnd_state; 1211 | bf_t a, b, a_rounded, c; 1212 | limb_t prec1, k; 1213 | int res, it, i, res1, test_loop; 1214 | int64_t start_time; 1215 | 1216 | mp_randinit(&rnd_state, seed); 1217 | bf_init(&bf_ctx, &a); 1218 | bf_init(&bf_ctx, &a_rounded); 1219 | bf_init(&bf_ctx, &b); 1220 | bf_init(&bf_ctx, &c); 1221 | start_time = get_clock_msec(); 1222 | test_loop = 1; 1223 | it = 0; 1224 | for(;;) { 1225 | prec1 = mp_random64(&rnd_state) % (3 * prec) + 2; 1226 | bf_rrandom(&a, prec1, &rnd_state); 1227 | a.sign = mp_random64(&rnd_state) & 1; 1228 | 1229 | k = prec + (mp_random64(&rnd_state) % 10); 1230 | bf_set(&a_rounded, &a); 1231 | bf_round(&a_rounded, prec, rnd_mode); 1232 | res = bf_can_round(&a, prec, rnd_mode, k); 1233 | if (res) { 1234 | for(i = 0; i < 100; i++) { 1235 | bf_rrandom(&c, prec1, &rnd_state); 1236 | c.sign = mp_random64(&rnd_state) & 1; 1237 | if (c.expn != BF_EXP_ZERO) 1238 | c.expn += a.expn - k; 1239 | 1240 | bf_add(&b, &a, &c, BF_PREC_INF, BF_RNDZ); 1241 | bf_round(&b, prec, rnd_mode); 1242 | res1 = !bf_is_same(&b, &a_rounded); 1243 | if (res1) { 1244 | printf("\nERROR (%d):\n", it); 1245 | printf("k=%" PRId64 "\n", (int64_t)k); 1246 | bf_print_str("a ", &a); 1247 | bf_print_str("a_rnd", &a_rounded); 1248 | bf_print_str("e ", &c); 1249 | bf_print_str("b ", &b); 1250 | exit(1); 1251 | } 1252 | } 1253 | } 1254 | it++; 1255 | if ((it & (test_loop - 1)) == 0) { 1256 | if ((get_clock_msec() - start_time) >= duration_ms) 1257 | break; 1258 | test_loop *= 2; 1259 | } 1260 | } 1261 | bf_delete(&a); 1262 | bf_delete(&a_rounded); 1263 | bf_delete(&b); 1264 | bf_delete(&c); 1265 | printf(" %8u\n", it); 1266 | } 1267 | 1268 | void test_mul_log2(int duration_ms, BOOL is_inv, BOOL is_ceil, int seed) 1269 | { 1270 | mp_randstate_t rnd_state; 1271 | int it, radix, err, test_loop; 1272 | slimb_t a, v_max, r, r_ref, prec, d; 1273 | mpfr_t a1, log2_radix[BF_RADIX_MAX - 1]; 1274 | int64_t start_time; 1275 | 1276 | mp_randinit(&rnd_state, seed); 1277 | prec = 256; 1278 | mpfr_init2(a1, prec); 1279 | 1280 | for(radix = 2; radix <= BF_RADIX_MAX; radix++) { 1281 | mpfr_init2(log2_radix[radix - 2], prec); 1282 | mpfr_set_ui(a1, radix, MPFR_RNDN); 1283 | mpfr_log2(log2_radix[radix - 2], a1, MPFR_RNDN); 1284 | } 1285 | 1286 | if (is_inv) 1287 | v_max = BF_PREC_MAX; 1288 | else 1289 | v_max = BF_PREC_MAX / 6; 1290 | start_time = get_clock_msec(); 1291 | test_loop = 1; 1292 | it = 0; 1293 | for(;;) { 1294 | for(radix = 2; radix <= BF_RADIX_MAX; radix++) { 1295 | a = (mp_random64(&rnd_state) % (2 * v_max + 1)) - v_max; 1296 | r = bf_mul_log2_radix(a, radix, is_inv, is_ceil); 1297 | 1298 | mpfr_set_si(a1, a, MPFR_RNDN); 1299 | if (is_inv) 1300 | mpfr_div(a1, a1, log2_radix[radix - 2], MPFR_RNDN); 1301 | else 1302 | mpfr_mul(a1, a1, log2_radix[radix - 2], MPFR_RNDN); 1303 | if (is_ceil) 1304 | mpfr_ceil(a1, a1); 1305 | else 1306 | mpfr_floor(a1, a1); 1307 | r_ref = mpfr_get_si(a1, MPFR_RNDN); 1308 | if (is_inv) { 1309 | err = (r != r_ref); 1310 | } else { 1311 | d = r - r_ref; 1312 | err = (d > 1 || d < -1); 1313 | } 1314 | if (err) { 1315 | printf("\nERROR (%d):\n", it); 1316 | printf("a=%" PRId64 " radix=%d inv=%d ceil=%d res=%" PRId64 " ref=%" PRId64 "\n", 1317 | (int64_t)a, radix, is_inv, is_ceil, 1318 | (int64_t)r, (int64_t)r_ref); 1319 | exit(1); 1320 | } 1321 | } 1322 | it++; 1323 | if ((it & (test_loop - 1)) == 0) { 1324 | if ((get_clock_msec() - start_time) >= duration_ms) 1325 | break; 1326 | test_loop *= 2; 1327 | } 1328 | } 1329 | 1330 | for(radix = 2; radix <= BF_RADIX_MAX; radix++) 1331 | mpfr_clear(log2_radix[radix - 2]); 1332 | mpfr_clear(a1); 1333 | printf(" %8u\n", it); 1334 | } 1335 | 1336 | void test_op_rm_dec(MPFTestOPEnum op, limb_t rprec, int duration_ms, 1337 | int exp_bits, bf_rnd_t rnd_mode, int seed) 1338 | { 1339 | bfdec_t a, b, r, r_ref; 1340 | uint32_t status, ref_status; 1341 | int op_count, test_loop, it; 1342 | int nb_limbs; 1343 | int64_t ti, ti_ref; 1344 | mp_randstate_t rnd_state; 1345 | BOOL res; 1346 | bf_rnd_t rnd_mode1; 1347 | bf_flags_t bf_flags; 1348 | int64_t start_time; 1349 | limb_t prec; 1350 | 1351 | bf_flags = rnd_mode | bf_set_exp_bits(exp_bits); 1352 | 1353 | mp_randinit(&rnd_state, seed); 1354 | bfdec_init(&bf_ctx, &a); 1355 | bfdec_init(&bf_ctx, &b); 1356 | bfdec_init(&bf_ctx, &r); 1357 | bfdec_init(&bf_ctx, &r_ref); 1358 | bfdec_set_ui(&b, 0); 1359 | bfdec_set_ui(&r, 0); 1360 | bfdec_set_ui(&r_ref, 0); 1361 | 1362 | ti = 0; 1363 | ti_ref = 0; 1364 | start_time = get_clock_msec(); 1365 | test_loop = 1; 1366 | it = 0; 1367 | for(;;) { 1368 | if (rprec == 0) { 1369 | prec = (mp_random64(&rnd_state) % 1000) + 24; 1370 | } else { 1371 | prec = rprec; 1372 | } 1373 | switch(op) { 1374 | case BF_OP_RINT_DEC: 1375 | case BF_OP_SQRT_DEC: 1376 | op_count = 1; 1377 | break; 1378 | default: 1379 | op_count = 2; 1380 | break; 1381 | } 1382 | if (op_count == 1) { 1383 | if (it < SPECIAL_COUNT) { 1384 | set_special_dec(&a, it); 1385 | } else { 1386 | limb_t prec1; 1387 | 1388 | prec1 = mp_random64(&rnd_state) % (3 * prec) + 1; 1389 | bfdec_rrandom(&a, prec1, &rnd_state); 1390 | if (a.expn != BF_EXP_ZERO) 1391 | a.expn += prec1 / 2; 1392 | if (op == BF_OP_SQRT_DEC) { 1393 | a.sign = 0; 1394 | } else { 1395 | a.sign = mp_random64(&rnd_state) & 1; 1396 | } 1397 | } 1398 | } else { 1399 | if (it < SPECIAL_COUNT * SPECIAL_COUNT) { 1400 | set_special_dec(&a, it % SPECIAL_COUNT); 1401 | set_special_dec(&b, it / SPECIAL_COUNT); 1402 | } else { 1403 | bfdec_rrandom_large(&a, prec, &rnd_state); 1404 | bfdec_rrandom_large(&b, prec, &rnd_state); 1405 | } 1406 | } 1407 | 1408 | if (op == BF_OP_DIVREM_DEC) { 1409 | bfdec_t q, a_ref; 1410 | bfdec_init(&bf_ctx, &q); 1411 | bfdec_init(&bf_ctx, &a_ref); 1412 | bfdec_divrem(&q, &r, &a, &b, BF_PREC_INF, BF_RNDZ, rnd_mode); 1413 | if (bf_is_finite((bf_t *)&r) && 1414 | bf_is_finite((bf_t *)&a) && 1415 | bf_is_finite((bf_t *)&b)) { 1416 | bfdec_mul(&a_ref, &q, &b, BF_PREC_INF, BF_RNDZ); 1417 | bfdec_add(&a_ref, &a_ref, &r, BF_PREC_INF, BF_RNDZ); 1418 | res = !bfdec_cmp_eq(&a, &a_ref); 1419 | if (res) { 1420 | printf("\nERROR (%d):\n", it); 1421 | bfdec_print_str("a ", &a); 1422 | bfdec_print_str("b ", &b); 1423 | bfdec_print_str("q ", &q); 1424 | bfdec_print_str("r ", &r); 1425 | bfdec_print_str("a_ref", &a_ref); 1426 | exit(1); 1427 | } 1428 | } 1429 | bfdec_delete(&q); 1430 | bfdec_delete(&a_ref); 1431 | } else { 1432 | // bfdec_print_str("a", &a); 1433 | // bfdec_print_str("b", &b); 1434 | status = bfdec_exec_op(op, &r, &a, &b, prec, bf_flags, &ti); 1435 | // bfdec_print_str("r", &r); 1436 | 1437 | rnd_mode1 = rnd_mode; 1438 | ref_status = mpdecimal_exec_op(op, &r_ref, &a, &b, prec, rnd_mode1, 1439 | &ti_ref); 1440 | 1441 | if (op == BF_OP_CMP_EQ || 1442 | op == BF_OP_CMP_LE || 1443 | op == BF_OP_CMP_LT) { 1444 | res = (status != ref_status); 1445 | } else { 1446 | res = (bfdec_cmp_full(&r, &r_ref) != 0); 1447 | if ((status & BF_ST_INEXACT) != 1448 | (ref_status & BF_ST_INEXACT)) 1449 | res = 1; 1450 | } 1451 | 1452 | if (res) { 1453 | printf("\nERROR (%d):\n", it); 1454 | 1455 | bfdec_print_str("a ", &a); 1456 | if (op_count > 1) { 1457 | bfdec_print_str("b ", &b); 1458 | } 1459 | bfdec_print_str("r ", &r); 1460 | bfdec_print_str("ref", &r_ref); 1461 | printf("st ="); print_status(status); printf("\n"); 1462 | printf("ref_st="); print_status(ref_status); printf("\n"); 1463 | exit(1); 1464 | } 1465 | } 1466 | 1467 | it++; 1468 | if ((it & (test_loop - 1)) == 0) { 1469 | if ((get_clock_msec() - start_time) >= duration_ms) 1470 | break; 1471 | test_loop *= 2; 1472 | } 1473 | } 1474 | 1475 | nb_limbs = (prec + 63) / 64; 1476 | printf(" %8u %8.1f %8.1f\n", 1477 | it, 1478 | (double)ti / it / nb_limbs, 1479 | (double)ti_ref / it / nb_limbs); 1480 | 1481 | bfdec_delete(&a); 1482 | bfdec_delete(&b); 1483 | bfdec_delete(&r); 1484 | bfdec_delete(&r_ref); 1485 | } 1486 | 1487 | static void test_mp_sqrtrem(limb_t rprec, int duration_ms, int seed) 1488 | { 1489 | int it, test_loop; 1490 | int64_t start_time, ti; 1491 | limb_t *tabs, *tabr, *taba, *tabb, c; 1492 | slimb_t n, i, n_max; 1493 | mp_randstate_t rnd_state; 1494 | 1495 | n_max = rprec; 1496 | 1497 | mp_randinit(&rnd_state, seed); 1498 | taba = malloc(2 * n_max * sizeof(limb_t)); 1499 | tabb = malloc(2 * n_max * sizeof(limb_t)); 1500 | tabs = malloc(n_max * sizeof(limb_t)); 1501 | tabr = malloc(2 * n_max * sizeof(limb_t)); 1502 | 1503 | test_loop = 1; 1504 | it = 0; 1505 | start_time = get_clock_msec(); 1506 | ti = 0; 1507 | for(;;) { 1508 | n = (mp_random64(&rnd_state) % n_max) + 1; 1509 | 1510 | mp_rrandom(taba, 2 * n * LIMB_BITS, &rnd_state); 1511 | taba[2 * n - 1] |= (limb_t)1 << (LIMB_BITS - 2); 1512 | 1513 | for(i = 0; i < n * 2; i++) 1514 | tabr[i] = taba[i]; 1515 | ti -= get_cycles(); 1516 | mp_sqrtrem(&bf_ctx, tabs, tabr, n); 1517 | ti += get_cycles(); 1518 | 1519 | /* check the result */ 1520 | mp_mul(&bf_ctx, tabb, tabs, n, tabs, n); 1521 | c = mp_add(tabb, tabb, tabr, n + 1, 0); 1522 | c = mp_add_ui(tabb + n + 1, c, n - 1); 1523 | if (mp_cmp(taba, n * 2, tabb, n * 2) != 0) 1524 | goto error; 1525 | tabb[n] = mp_add(tabb, tabs, tabs, n, 0); 1526 | if (mp_cmp(tabr, n + 1, tabb, n + 1) > 0) { 1527 | error: 1528 | printf("ERROR %d\n", it); 1529 | mp_print_str("a", taba, n * 2); 1530 | mp_print_str("s", tabs, n); 1531 | mp_print_str("r", tabr, n + 1); 1532 | exit(1); 1533 | } 1534 | 1535 | it++; 1536 | if (it == test_loop) { 1537 | if ((get_clock_msec() - start_time) >= duration_ms) 1538 | break; 1539 | test_loop *= 2; 1540 | } 1541 | } 1542 | printf(" %8u %8.1f\n", 1543 | it, 1544 | (double)ti / it / n); 1545 | free(taba); 1546 | free(tabb); 1547 | free(tabr); 1548 | free(tabs); 1549 | } 1550 | 1551 | static void test_mp_recip(limb_t rprec, int duration_ms, int seed) 1552 | { 1553 | int it, test_loop, incr; 1554 | int64_t start_time, ti; 1555 | limb_t *tabr, *taba, *tabb, *tabc; 1556 | slimb_t n, n_max, i; 1557 | mp_randstate_t rnd_state; 1558 | 1559 | n_max = rprec; 1560 | 1561 | mp_randinit(&rnd_state, seed); 1562 | taba = malloc(n_max * sizeof(limb_t)); 1563 | tabb = malloc((2 * n_max + 1) * sizeof(limb_t)); 1564 | tabc = malloc((n_max + 1) * sizeof(limb_t)); 1565 | tabr = malloc((n_max + 1) * sizeof(limb_t)); 1566 | 1567 | test_loop = 1; 1568 | it = 0; 1569 | start_time = get_clock_msec(); 1570 | ti = 0; 1571 | for(;;) { 1572 | n = (mp_random64(&rnd_state) % n_max) + 1; 1573 | 1574 | mp_rrandom(taba, n * LIMB_BITS, &rnd_state); 1575 | taba[n - 1] |= (limb_t)1 << (LIMB_BITS - 1); 1576 | 1577 | ti -= get_cycles(); 1578 | mp_recip(&bf_ctx, tabr, taba, n); 1579 | ti += get_cycles(); 1580 | 1581 | /* check the result */ 1582 | mp_mul(&bf_ctx, tabb, tabr, n + 1, taba, n); 1583 | incr = 0; 1584 | if (tabb[2 * n] >= 1) 1585 | goto error; 1586 | 1587 | for(i = 0; i < n + 1; i++) 1588 | tabc[i] = tabr[i]; 1589 | mp_add_ui(tabc, 2, n + 1); 1590 | mp_mul(&bf_ctx, tabb, tabc, n + 1, taba, n); 1591 | 1592 | incr = 2; 1593 | if (tabb[2 * n] < 1) { 1594 | error: 1595 | printf("ERROR %d\n", it); 1596 | printf("n=%d incr=%d\n", (int)n, incr); 1597 | mp_print_str("a", taba, n); 1598 | mp_print_str("r", tabr, n + 1); 1599 | mp_print_str("b", tabb, 2 * n + 1); 1600 | exit(1); 1601 | } 1602 | 1603 | it++; 1604 | if (it == test_loop) { 1605 | if ((get_clock_msec() - start_time) >= duration_ms) 1606 | break; 1607 | test_loop *= 2; 1608 | } 1609 | } 1610 | printf(" %8u %8.1f\n", 1611 | it, 1612 | (double)ti / it / n); 1613 | free(taba); 1614 | free(tabb); 1615 | free(tabr); 1616 | free(tabc); 1617 | } 1618 | 1619 | void test_op_rm(MPFTestOPEnum op, limb_t rprec, int duration_ms, 1620 | int exp_bits, bf_rnd_t rnd_mode, int seed) 1621 | { 1622 | bf_t a, b, r, r_ref; 1623 | int op_count, status, ref_status, test_loop, it, it_perf; 1624 | int nb_limbs; 1625 | int64_t ti, ti_ref, ti_dummy; 1626 | mp_randstate_t rnd_state; 1627 | BOOL res, use_float64_ref; 1628 | bf_rnd_t rnd_mode1; 1629 | bf_flags_t bf_flags; 1630 | int64_t start_time; 1631 | limb_t prec; 1632 | 1633 | printf("%-20s %5d %3d %3s %5d", op_str[op], (int)rprec, exp_bits, 1634 | rnd_str[rnd_mode], seed); 1635 | fflush(stdout); 1636 | 1637 | switch(op) { 1638 | case BF_OP_MP_SQRTREM: 1639 | test_mp_sqrtrem(rprec, duration_ms, seed); 1640 | return; 1641 | case BF_OP_MP_RECIP: 1642 | test_mp_recip(rprec, duration_ms, seed); 1643 | return; 1644 | case BF_OP_ATOF: 1645 | test_atof(rprec, duration_ms, exp_bits, rnd_mode, seed); 1646 | return; 1647 | case BF_OP_FTOA: 1648 | test_ftoa(rprec, duration_ms, exp_bits, rnd_mode, seed); 1649 | return; 1650 | case BF_OP_CAN_ROUND: 1651 | test_can_round(rprec, duration_ms, rnd_mode, seed); 1652 | return; 1653 | case BF_OP_MUL_L2RADIX: 1654 | case BF_OP_DIV_L2RADIX: 1655 | test_mul_log2(duration_ms, (op == BF_OP_DIV_L2RADIX), rnd_mode == BF_RNDU, seed); 1656 | return; 1657 | case BF_OP_ADD_DEC: 1658 | case BF_OP_MUL_DEC: 1659 | case BF_OP_DIV_DEC: 1660 | case BF_OP_SQRT_DEC: 1661 | case BF_OP_FMOD_DEC: 1662 | case BF_OP_DIVREM_DEC: 1663 | case BF_OP_RINT_DEC: 1664 | test_op_rm_dec(op, rprec, duration_ms, exp_bits, rnd_mode, seed); 1665 | return; 1666 | default: 1667 | break; 1668 | } 1669 | 1670 | use_float64_ref = (rprec == 53 && exp_bits == 11); 1671 | bf_flags = rnd_mode | bf_set_exp_bits(exp_bits); 1672 | if (use_float64_ref) 1673 | bf_flags |= BF_FLAG_SUBNORMAL; 1674 | 1675 | mp_randinit(&rnd_state, seed); 1676 | bf_init(&bf_ctx, &a); 1677 | bf_init(&bf_ctx, &b); 1678 | bf_init(&bf_ctx, &r); 1679 | bf_init(&bf_ctx, &r_ref); 1680 | bf_set_ui(&b, 0); 1681 | bf_set_ui(&r, 0); 1682 | bf_set_ui(&r_ref, 0); 1683 | ti = 0; 1684 | ti_ref = 0; 1685 | ti_dummy = 0; 1686 | start_time = get_clock_msec(); 1687 | test_loop = 1; 1688 | it = 0; 1689 | it_perf = 0; 1690 | for(;;) { 1691 | if (rprec == 0) { 1692 | prec = (mp_random64(&rnd_state) % 1000) + 24; 1693 | } else { 1694 | prec = rprec; 1695 | } 1696 | switch(op) { 1697 | case BF_OP_RINT: 1698 | case BF_OP_SQRT: 1699 | case BF_OP_EXP: 1700 | case BF_OP_LOG: 1701 | case BF_OP_COS: 1702 | case BF_OP_SIN: 1703 | case BF_OP_TAN: 1704 | case BF_OP_ATAN: 1705 | case BF_OP_ASIN: 1706 | case BF_OP_ACOS: 1707 | op_count = 1; 1708 | break; 1709 | default: 1710 | op_count = 2; 1711 | break; 1712 | } 1713 | 1714 | if (op_count == 1) { 1715 | if (it < SPECIAL_COUNT) { 1716 | set_special(&a, it); 1717 | } else { 1718 | limb_t prec1; 1719 | 1720 | if (use_float64_ref) { 1721 | Float64Union u; 1722 | u.u = rrandom_sf64(&rnd_state); 1723 | bf_set_float64(&a, u.d); 1724 | } else { 1725 | prec1 = mp_random64(&rnd_state) % (3 * prec) + 1; 1726 | bf_rrandom(&a, prec1, &rnd_state); 1727 | if (op == BF_OP_COS || op == BF_OP_SIN || op == BF_OP_TAN) { 1728 | int k; 1729 | bf_t c_s, *c = &c_s; 1730 | if (a.expn != BF_EXP_ZERO) 1731 | a.expn++; 1732 | k = (mp_random64(&rnd_state) % 2000) - 1000; 1733 | bf_init(&bf_ctx, c); 1734 | bf_const_pi(c, prec1 + 1, BF_RNDN); 1735 | c->expn--; /* pi/2 */ 1736 | bf_mul_si(c, c, k, prec1 + 1, BF_RNDN); 1737 | bf_add(&a, &a, c, prec1, BF_RNDN); 1738 | bf_delete(c); 1739 | } else if (op == BF_OP_ACOS || op == BF_OP_ASIN) { 1740 | } else { 1741 | if (a.expn != BF_EXP_ZERO) 1742 | a.expn += prec1 / 2; 1743 | } 1744 | } 1745 | if (op == BF_OP_SQRT || op == BF_OP_LOG) { 1746 | a.sign = 0; 1747 | } else { 1748 | a.sign = mp_random64(&rnd_state) & 1; 1749 | } 1750 | } 1751 | } else if (op == BF_OP_OR || 1752 | op == BF_OP_XOR || 1753 | op == BF_OP_AND) { 1754 | bf_rrandom_int(&a, prec, &rnd_state); 1755 | bf_rrandom_int(&b, prec, &rnd_state); 1756 | } else { 1757 | if (it < SPECIAL_COUNT * SPECIAL_COUNT) { 1758 | set_special(&a, it % SPECIAL_COUNT); 1759 | set_special(&b, it / SPECIAL_COUNT); 1760 | } else { 1761 | if (op == BF_OP_POW) { 1762 | bf_rrandom_large(&a, prec, &rnd_state); 1763 | if ((it % 10) == 0) { 1764 | bf_set_si(&b, (int32_t)mp_random64(&rnd_state)); 1765 | } else { 1766 | bf_rrandom_large(&b, prec, &rnd_state); 1767 | } 1768 | } else if (use_float64_ref) { 1769 | Float64Union u; 1770 | u.u = rrandom_sf64(&rnd_state); 1771 | bf_set_float64(&a, u.d); 1772 | u.u = rrandom_sf64(&rnd_state); 1773 | bf_set_float64(&b, u.d); 1774 | } else { 1775 | bf_rrandom_large(&a, prec, &rnd_state); 1776 | bf_rrandom_large(&b, prec, &rnd_state); 1777 | } 1778 | } 1779 | } 1780 | 1781 | status = bf_exec_op(op, &r, &a, &b, prec, bf_flags, &ti); 1782 | // bf_print_str("r", &r); 1783 | 1784 | rnd_mode1 = rnd_mode; 1785 | if (rnd_mode == BF_RNDF) 1786 | rnd_mode1 = BF_RNDD; 1787 | if (use_float64_ref) { 1788 | ref_status = softfp_exec_op(op, &r_ref, &a, &b, prec, rnd_mode1, &ti_ref); 1789 | } else { 1790 | ref_status = mpfr_exec_op(op, &r_ref, &a, &b, prec, rnd_mode1, &ti_ref); 1791 | } 1792 | // bf_print_str("r_ref", &r_ref); 1793 | 1794 | if (op == BF_OP_CMP_EQ || 1795 | op == BF_OP_CMP_LE || 1796 | op == BF_OP_CMP_LT) { 1797 | res = (status != ref_status); 1798 | } else { 1799 | res = !bf_is_same(&r, &r_ref); 1800 | if (rnd_mode == BF_RNDF) { 1801 | if (res) { 1802 | if (use_float64_ref) { 1803 | softfp_exec_op(op, &r_ref, &a, &b, prec, BF_RNDU, &ti_dummy); 1804 | } else { 1805 | mpfr_exec_op(op, &r_ref, &a, &b, prec, BF_RNDU, &ti_dummy); 1806 | } 1807 | res = !bf_is_same(&r, &r_ref); 1808 | } 1809 | } else { 1810 | if ((status & BF_ST_INEXACT) != 1811 | (ref_status & BF_ST_INEXACT)) 1812 | res = 1; 1813 | } 1814 | } 1815 | 1816 | if (res) { 1817 | printf("\nERROR (%d):\n", it); 1818 | 1819 | bf_print_str("a ", &a); 1820 | if (op_count > 1) { 1821 | bf_print_str("b ", &b); 1822 | } 1823 | bf_print_str("r ", &r); 1824 | bf_print_str("ref", &r_ref); 1825 | printf("st ="); print_status(status); printf("\n"); 1826 | printf("ref_st="); print_status(ref_status); printf("\n"); 1827 | exit(1); 1828 | } 1829 | /* excluding special value from CPU time */ 1830 | if ((op_count == 1 && it < SPECIAL_COUNT) || 1831 | (op_count == 2 && it < SPECIAL_COUNT * SPECIAL_COUNT)) { 1832 | ti = 0; 1833 | ti_ref = 0; 1834 | } else { 1835 | it_perf++; 1836 | } 1837 | 1838 | it++; 1839 | if ((it & (test_loop - 1)) == 0) { 1840 | if ((get_clock_msec() - start_time) >= duration_ms) 1841 | break; 1842 | test_loop *= 2; 1843 | } 1844 | } 1845 | 1846 | nb_limbs = (prec + 63) / 64; 1847 | printf(" %8u %8.1f %8.1f\n", 1848 | it, 1849 | (double)ti / it_perf / nb_limbs, 1850 | (double)ti_ref / it_perf / nb_limbs); 1851 | 1852 | bf_delete(&a); 1853 | bf_delete(&b); 1854 | bf_delete(&r); 1855 | bf_delete(&r_ref); 1856 | } 1857 | 1858 | void test_op(MPFTestOPEnum op, limb_t prec, int duration_ms, int exp_bits, 1859 | int seed) 1860 | { 1861 | BOOL use_float64_ref; 1862 | uint8_t rm_allowed[BF_RNDF + 1]; 1863 | bf_rnd_t rnd_mode; 1864 | 1865 | use_float64_ref = (prec == 53 && exp_bits == 11); 1866 | memset(rm_allowed, 0, sizeof(rm_allowed)); 1867 | if (use_float64_ref) { 1868 | rm_allowed[BF_RNDN] = 1; 1869 | rm_allowed[BF_RNDZ] = 1; 1870 | rm_allowed[BF_RNDU] = 1; 1871 | rm_allowed[BF_RNDD] = 1; 1872 | rm_allowed[BF_RNDNA] = 1; 1873 | } else { 1874 | switch(op) { 1875 | case BF_OP_ADD: 1876 | case BF_OP_MUL: 1877 | case BF_OP_DIV: 1878 | case BF_OP_FMOD: 1879 | case BF_OP_REM: 1880 | case BF_OP_RINT: 1881 | case BF_OP_ROUND: 1882 | case BF_OP_SQRT: 1883 | case BF_OP_ATOF: 1884 | case BF_OP_EXP: 1885 | case BF_OP_LOG: 1886 | case BF_OP_COS: 1887 | case BF_OP_SIN: 1888 | case BF_OP_TAN: 1889 | case BF_OP_ATAN: 1890 | case BF_OP_ATAN2: 1891 | case BF_OP_ASIN: 1892 | case BF_OP_ACOS: 1893 | case BF_OP_POW: 1894 | rm_allowed[BF_RNDN] = 1; 1895 | rm_allowed[BF_RNDZ] = 1; 1896 | rm_allowed[BF_RNDU] = 1; 1897 | rm_allowed[BF_RNDD] = 1; 1898 | rm_allowed[BF_RNDF] = 1; 1899 | break; 1900 | case BF_OP_CAN_ROUND: 1901 | rm_allowed[BF_RNDN] = 1; 1902 | rm_allowed[BF_RNDZ] = 1; 1903 | rm_allowed[BF_RNDU] = 1; 1904 | rm_allowed[BF_RNDD] = 1; 1905 | rm_allowed[BF_RNDA] = 1; 1906 | rm_allowed[BF_RNDNA] = 1; 1907 | break; 1908 | case BF_OP_FTOA: 1909 | rm_allowed[BF_RNDN] = 1; 1910 | rm_allowed[BF_RNDZ] = 1; 1911 | rm_allowed[BF_RNDU] = 1; 1912 | rm_allowed[BF_RNDD] = 1; 1913 | rm_allowed[BF_RNDA] = 1; 1914 | break; 1915 | case BF_OP_SUB: 1916 | /* minimal test for SUB which is like ADD */ 1917 | rm_allowed[BF_RNDN] = 1; 1918 | break; 1919 | case BF_OP_MUL_L2RADIX: 1920 | case BF_OP_DIV_L2RADIX: 1921 | rm_allowed[BF_RNDU] = 1; 1922 | rm_allowed[BF_RNDD] = 1; 1923 | break; 1924 | case BF_OP_ADD_DEC: 1925 | case BF_OP_MUL_DEC: 1926 | case BF_OP_DIV_DEC: 1927 | case BF_OP_RINT_DEC: 1928 | rm_allowed[BF_RNDN] = 1; 1929 | rm_allowed[BF_RNDZ] = 1; 1930 | rm_allowed[BF_RNDU] = 1; 1931 | rm_allowed[BF_RNDD] = 1; 1932 | rm_allowed[BF_RNDA] = 1; 1933 | rm_allowed[BF_RNDNA] = 1; 1934 | break; 1935 | case BF_OP_SQRT_DEC: 1936 | rm_allowed[BF_RNDN] = 1; 1937 | //* bug in mpd_qsqrt() */ 1938 | // rm_allowed[BF_RNDZ] = 1; 1939 | // rm_allowed[BF_RNDU] = 1; 1940 | // rm_allowed[BF_RNDD] = 1; 1941 | break; 1942 | case BF_OP_FMOD_DEC: 1943 | break; /* bug in mpd_qrem() */ 1944 | case BF_OP_DIVREM_DEC: 1945 | rm_allowed[BF_RNDZ] = 1; 1946 | rm_allowed[BF_RNDN] = 1; 1947 | break; 1948 | default: 1949 | rm_allowed[BF_RNDZ] = 1; 1950 | break; 1951 | } 1952 | } 1953 | for(rnd_mode = 0; rnd_mode < countof(rm_allowed); rnd_mode++) { 1954 | if (rm_allowed[rnd_mode]) { 1955 | test_op_rm(op, prec, duration_ms, exp_bits, rnd_mode, seed); 1956 | } 1957 | } 1958 | } 1959 | 1960 | static MPFTestOPEnum get_op_from_str(const char *str) 1961 | { 1962 | MPFTestOPEnum op; 1963 | for(op = 0; op < BF_OP_COUNT; op++) { 1964 | if (!strcmp(str, op_str[op])) 1965 | break; 1966 | } 1967 | if (op == BF_OP_COUNT) { 1968 | fprintf(stderr, "Unknown operation: %s\n", str); 1969 | exit(1); 1970 | } 1971 | return op; 1972 | } 1973 | 1974 | void help(void) 1975 | { 1976 | printf("usage: bftest [options] [first_op [last_op]]\n" 1977 | "\n" 1978 | "Options:\n" 1979 | "-h this help\n" 1980 | "-s seed set the initial seed\n" 1981 | "-S single iteration of tests\n" 1982 | "-p prec force precision\n" 1983 | ); 1984 | exit(1); 1985 | } 1986 | 1987 | int main(int argc, char **argv) 1988 | { 1989 | int seed, duration_ms, c; 1990 | limb_t prec; 1991 | MPFTestOPEnum op, op_start, op_last; 1992 | BOOL short_test = FALSE; 1993 | 1994 | seed = 1234; 1995 | duration_ms = 100; 1996 | prec = 0; 1997 | for(;;) { 1998 | c = getopt(argc, argv, "hs:Sp:"); 1999 | if (c == -1) 2000 | break; 2001 | switch(c) { 2002 | case 'h': 2003 | help(); 2004 | case 's': 2005 | seed = strtoul(optarg, NULL, 0); 2006 | duration_ms = 1000; 2007 | break; 2008 | case 'S': 2009 | short_test = TRUE; 2010 | break; 2011 | case 'p': 2012 | prec = (limb_t)strtod(optarg, NULL); 2013 | break; 2014 | default: 2015 | exit(1); 2016 | } 2017 | } 2018 | 2019 | op_start = 0; 2020 | op_last = BF_OP_COUNT - 1; 2021 | if (optind < argc) 2022 | op_start = get_op_from_str(argv[optind++]); 2023 | if (optind < argc) 2024 | op_last = get_op_from_str(argv[optind++]); 2025 | 2026 | mpfr_exec_init(); 2027 | bf_context_init(&bf_ctx, my_bf_realloc, NULL); 2028 | mpd_init(&mpd_ctx, 16); 2029 | 2030 | printf("%-20s %5s %3s %3s %5s %8s %8s %8s\n", "OP", "PREC", "EXP", "RND", "SEED", "CNT", "c/64bit", "ref"); 2031 | 2032 | for(;;) { 2033 | for(op = op_start; op <= op_last; op++) { 2034 | if (prec != 0) { 2035 | test_op(op, prec, duration_ms, BF_EXP_BITS_MAX, seed); 2036 | } else { 2037 | if (op == BF_OP_MUL_L2RADIX || op == BF_OP_DIV_L2RADIX) { 2038 | test_op(op, LIMB_BITS, duration_ms, 0, seed); 2039 | } else if (op == BF_OP_CAN_ROUND) { 2040 | test_op(op, 8, duration_ms, BF_EXP_BITS_MAX, seed); 2041 | test_op(op, 53, duration_ms, BF_EXP_BITS_MAX, seed); 2042 | test_op(op, 256, duration_ms, BF_EXP_BITS_MAX, seed); 2043 | } else if (op >= BF_OP_ADD_DEC && op <= BF_OP_RINT_DEC) { 2044 | test_op(op, 16, duration_ms, BF_EXP_BITS_MAX, seed); 2045 | test_op(op, 100, duration_ms, BF_EXP_BITS_MAX, seed); 2046 | } else if (op == BF_OP_MP_SQRTREM || 2047 | op == BF_OP_MP_RECIP) { 2048 | test_op(op, 100, duration_ms, BF_EXP_BITS_MAX, seed); 2049 | } else { 2050 | if (op == BF_OP_MUL || 2051 | op == BF_OP_ADD || 2052 | op == BF_OP_DIV || 2053 | op == BF_OP_SQRT || 2054 | op == BF_OP_CMP_EQ || 2055 | op == BF_OP_CMP_LT || 2056 | op == BF_OP_CMP_LE) { 2057 | test_op(op, 53, duration_ms, 11, seed); 2058 | } 2059 | test_op(op, 53, duration_ms, BF_EXP_BITS_MAX, seed); 2060 | test_op(op, 112, duration_ms, BF_EXP_BITS_MAX, seed); 2061 | /* mpfr bug ? */ 2062 | if (op != BF_OP_SQRT) 2063 | test_op(op, 256, duration_ms, BF_EXP_BITS_MAX, seed); 2064 | test_op(op, 3000, duration_ms, BF_EXP_BITS_MAX, seed); 2065 | } 2066 | } 2067 | } 2068 | seed++; 2069 | duration_ms = 1000; 2070 | if (short_test) 2071 | break; 2072 | } 2073 | return 0; 2074 | } 2075 | -------------------------------------------------------------------------------- /cutils.c: -------------------------------------------------------------------------------- 1 | /* 2 | * C utilities 3 | * 4 | * Copyright (c) 2017 Fabrice Bellard 5 | * Copyright (c) 2018 Charlie Gordon 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef HAVE_CONFIG_H 26 | #include "quickjs-config.h" 27 | #else 28 | #include "config.h" 29 | #endif 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "quickjs.h" 37 | #include "cutils.h" 38 | 39 | void qjs_assert(const char* msg, const char* file, int line) 40 | { 41 | fprintf(stderr, "\nAssertion failed (%s, %d): %s\n", file, line, msg); 42 | fflush(stderr); 43 | if (IsDebuggerPresent()) 44 | DebugBreak(); 45 | 46 | fprintf(stderr, "Triggering SEH exception\n"); 47 | fflush(stderr); 48 | volatile int* pInt = 0x00000000; 49 | *pInt = 20; 50 | #if 0 51 | abort(); 52 | #endif 53 | } 54 | 55 | 56 | void pstrcpy(char *buf, int buf_size, const char *str) 57 | { 58 | int c; 59 | char *q = buf; 60 | 61 | if (buf_size <= 0) 62 | return; 63 | 64 | for(;;) { 65 | c = *str++; 66 | if (c == 0 || q >= buf + buf_size - 1) 67 | break; 68 | *q++ = c; 69 | } 70 | *q = '\0'; 71 | } 72 | 73 | /* strcat and truncate. */ 74 | char *pstrcat(char *buf, int buf_size, const char *s) 75 | { 76 | int len; 77 | len = strlen(buf); 78 | if (len < buf_size) 79 | pstrcpy(buf + len, buf_size - len, s); 80 | return buf; 81 | } 82 | 83 | int strstart(const char *str, const char *val, const char **ptr) 84 | { 85 | const char *p, *q; 86 | p = str; 87 | q = val; 88 | while (*q != '\0') { 89 | if (*p != *q) 90 | return 0; 91 | p++; 92 | q++; 93 | } 94 | if (ptr) 95 | *ptr = p; 96 | return 1; 97 | } 98 | 99 | int has_suffix(const char *str, const char *suffix) 100 | { 101 | size_t len = strlen(str); 102 | size_t slen = strlen(suffix); 103 | return (len >= slen && !memcmp(str + len - slen, suffix, slen)); 104 | } 105 | 106 | /* Dynamic buffer package */ 107 | 108 | static void *dbuf_default_realloc(void *opaque, void *ptr, size_t size) 109 | { 110 | return realloc(ptr, size); 111 | } 112 | 113 | void dbuf_init2(DynBuf *s, void *opaque, DynBufReallocFunc *realloc_func) 114 | { 115 | memset(s, 0, sizeof(*s)); 116 | if (!realloc_func) 117 | realloc_func = dbuf_default_realloc; 118 | s->opaque = opaque; 119 | s->realloc_func = realloc_func; 120 | } 121 | 122 | void dbuf_init(DynBuf *s) 123 | { 124 | dbuf_init2(s, NULL, NULL); 125 | } 126 | 127 | /* return < 0 if error */ 128 | int dbuf_realloc(DynBuf *s, size_t new_size) 129 | { 130 | size_t size; 131 | uint8_t *new_buf; 132 | if (new_size > s->allocated_size) { 133 | if (s->error) 134 | return -1; 135 | size = s->allocated_size * 3 / 2; 136 | if (size > new_size) 137 | new_size = size; 138 | new_buf = s->realloc_func(s->opaque, s->buf, new_size); 139 | if (!new_buf) { 140 | s->error = TRUE; 141 | return -1; 142 | } 143 | s->buf = new_buf; 144 | s->allocated_size = new_size; 145 | } 146 | return 0; 147 | } 148 | 149 | int dbuf_write(DynBuf *s, size_t offset, const uint8_t *data, size_t len) 150 | { 151 | size_t end; 152 | end = offset + len; 153 | if (dbuf_realloc(s, end)) 154 | return -1; 155 | memcpy(s->buf + offset, data, len); 156 | if (end > s->size) 157 | s->size = end; 158 | return 0; 159 | } 160 | 161 | int dbuf_put(DynBuf *s, const uint8_t *data, size_t len) 162 | { 163 | if (unlikely((s->size + len) > s->allocated_size)) { 164 | if (dbuf_realloc(s, s->size + len)) 165 | return -1; 166 | } 167 | memcpy(s->buf + s->size, data, len); 168 | s->size += len; 169 | return 0; 170 | } 171 | 172 | int dbuf_put_self(DynBuf *s, size_t offset, size_t len) 173 | { 174 | if (unlikely((s->size + len) > s->allocated_size)) { 175 | if (dbuf_realloc(s, s->size + len)) 176 | return -1; 177 | } 178 | memcpy(s->buf + s->size, s->buf + offset, len); 179 | s->size += len; 180 | return 0; 181 | } 182 | 183 | int dbuf_putc(DynBuf *s, uint8_t c) 184 | { 185 | return dbuf_put(s, &c, 1); 186 | } 187 | 188 | int dbuf_putstr(DynBuf *s, const char *str) 189 | { 190 | return dbuf_put(s, (const uint8_t *)str, strlen(str)); 191 | } 192 | 193 | // __attribute__((format(printf, 2, 3))) 194 | int __js_printf_like(2, 3) dbuf_printf(DynBuf* s, const char* fmt, ...) 195 | { 196 | va_list ap; 197 | char buf[128]; 198 | int len; 199 | 200 | va_start(ap, fmt); 201 | len = vsnprintf(buf, sizeof(buf), fmt, ap); 202 | va_end(ap); 203 | if (len < sizeof(buf)) { 204 | /* fast case */ 205 | return dbuf_put(s, (uint8_t *)buf, len); 206 | } else { 207 | va_start(ap, fmt); 208 | int real_len = vsnprintf(0, 0, fmt, ap); 209 | va_end(ap); 210 | 211 | if (dbuf_realloc(s, s->size + real_len + 1)) 212 | return -1; 213 | 214 | va_start(ap, fmt); 215 | vsnprintf((char *)(s->buf + s->size), s->allocated_size - s->size, 216 | fmt, ap); 217 | va_end(ap); 218 | s->size += real_len; 219 | } 220 | return 0; 221 | } 222 | 223 | void dbuf_free(DynBuf *s) 224 | { 225 | /* we test s->buf as a fail safe to avoid crashing if dbuf_free() 226 | is called twice */ 227 | if (s->buf) { 228 | s->realloc_func(s->opaque, s->buf, 0); 229 | } 230 | memset(s, 0, sizeof(*s)); 231 | } 232 | 233 | /* Note: at most 31 bits are encoded. At most UTF8_CHAR_LEN_MAX bytes 234 | are output. */ 235 | int unicode_to_utf8(uint8_t *buf, unsigned int c) 236 | { 237 | uint8_t *q = buf; 238 | 239 | if (c < 0x80) { 240 | *q++ = c; 241 | } else { 242 | if (c < 0x800) { 243 | *q++ = (c >> 6) | 0xc0; 244 | } else { 245 | if (c < 0x10000) { 246 | *q++ = (c >> 12) | 0xe0; 247 | } else { 248 | if (c < 0x00200000) { 249 | *q++ = (c >> 18) | 0xf0; 250 | } else { 251 | if (c < 0x04000000) { 252 | *q++ = (c >> 24) | 0xf8; 253 | } else if (c < 0x80000000) { 254 | *q++ = (c >> 30) | 0xfc; 255 | *q++ = ((c >> 24) & 0x3f) | 0x80; 256 | } else { 257 | return 0; 258 | } 259 | *q++ = ((c >> 18) & 0x3f) | 0x80; 260 | } 261 | *q++ = ((c >> 12) & 0x3f) | 0x80; 262 | } 263 | *q++ = ((c >> 6) & 0x3f) | 0x80; 264 | } 265 | *q++ = (c & 0x3f) | 0x80; 266 | } 267 | return (int)(q - buf); 268 | } 269 | 270 | static const unsigned int utf8_min_code[5] = { 271 | 0x80, 0x800, 0x10000, 0x00200000, 0x04000000, 272 | }; 273 | 274 | static const unsigned char utf8_first_code_mask[5] = { 275 | 0x1f, 0xf, 0x7, 0x3, 0x1, 276 | }; 277 | 278 | /* return -1 if error. *pp is not updated in this case. max_len must 279 | be >= 1. The maximum length for a UTF8 byte sequence is 6 bytes. */ 280 | int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp) 281 | { 282 | int l, c, b, i; 283 | 284 | c = *p++; 285 | if (c < 0x80) { 286 | *pp = p; 287 | return c; 288 | } 289 | switch(c) { 290 | case 0xc0: case 0xc1: case 0xc2: case 0xc3: 291 | case 0xc4: case 0xc5: case 0xc6: case 0xc7: 292 | case 0xc8: case 0xc9: case 0xca: case 0xcb: 293 | case 0xcc: case 0xcd: case 0xce: case 0xcf: 294 | case 0xd0: case 0xd1: case 0xd2: case 0xd3: 295 | case 0xd4: case 0xd5: case 0xd6: case 0xd7: 296 | case 0xd8: case 0xd9: case 0xda: case 0xdb: 297 | case 0xdc: case 0xdd: case 0xde: case 0xdf: 298 | l = 1; 299 | break; 300 | case 0xe0: case 0xe1: case 0xe2: case 0xe3: 301 | case 0xe4: case 0xe5: case 0xe6: case 0xe7: 302 | case 0xe8: case 0xe9: case 0xea: case 0xeb: 303 | case 0xec: case 0xed: case 0xee: case 0xef: 304 | l = 2; 305 | break; 306 | case 0xf0: case 0xf1: case 0xf2: case 0xf3: 307 | case 0xf4: case 0xf5: case 0xf6: case 0xf7: 308 | l = 3; 309 | break; 310 | case 0xf8: case 0xf9: case 0xfa: case 0xfb: 311 | l = 4; 312 | break; 313 | case 0xfc: case 0xfd: 314 | l = 5; 315 | break; 316 | default: 317 | return -1; 318 | } 319 | /* check that we have enough characters */ 320 | if (l > (max_len - 1)) 321 | return -1; 322 | c &= utf8_first_code_mask[l - 1]; 323 | for(i = 0; i < l; i++) { 324 | b = *p++; 325 | if (b < 0x80 || b >= 0xc0) 326 | return -1; 327 | c = (c << 6) | (b & 0x3f); 328 | } 329 | if (c < (int)utf8_min_code[l - 1]) 330 | return -1; 331 | *pp = p; 332 | return c; 333 | } 334 | 335 | #if 0 336 | 337 | #if defined(EMSCRIPTEN) || defined(__ANDROID__) 338 | 339 | static void *rqsort_arg; 340 | static int (*rqsort_cmp)(const void *, const void *, void *); 341 | 342 | static int rqsort_cmp2(const void *p1, const void *p2) 343 | { 344 | return rqsort_cmp(p1, p2, rqsort_arg); 345 | } 346 | 347 | /* not reentrant, but not needed with emscripten */ 348 | void rqsort(void *base, size_t nmemb, size_t size, 349 | int (*cmp)(const void *, const void *, void *), 350 | void *arg) 351 | { 352 | rqsort_arg = arg; 353 | rqsort_cmp = cmp; 354 | qsort(base, nmemb, size, rqsort_cmp2); 355 | } 356 | 357 | #endif 358 | 359 | #else 360 | 361 | typedef void (*exchange_f)(void *a, void *b, size_t size); 362 | typedef int (*cmp_f)(const void *, const void *, void *opaque); 363 | 364 | static void exchange_bytes(void *a, void *b, size_t size) { 365 | uint8_t *ap = (uint8_t *)a; 366 | uint8_t *bp = (uint8_t *)b; 367 | 368 | while (size-- != 0) { 369 | uint8_t t = *ap; 370 | *ap++ = *bp; 371 | *bp++ = t; 372 | } 373 | } 374 | 375 | static void exchange_one_byte(void *a, void *b, size_t size) { 376 | uint8_t *ap = (uint8_t *)a; 377 | uint8_t *bp = (uint8_t *)b; 378 | uint8_t t = *ap; 379 | *ap = *bp; 380 | *bp = t; 381 | } 382 | 383 | static void exchange_int16s(void *a, void *b, size_t size) { 384 | uint16_t *ap = (uint16_t *)a; 385 | uint16_t *bp = (uint16_t *)b; 386 | 387 | for (size /= sizeof(uint16_t); size-- != 0;) { 388 | uint16_t t = *ap; 389 | *ap++ = *bp; 390 | *bp++ = t; 391 | } 392 | } 393 | 394 | static void exchange_one_int16(void *a, void *b, size_t size) { 395 | uint16_t *ap = (uint16_t *)a; 396 | uint16_t *bp = (uint16_t *)b; 397 | uint16_t t = *ap; 398 | *ap = *bp; 399 | *bp = t; 400 | } 401 | 402 | static void exchange_int32s(void *a, void *b, size_t size) { 403 | uint32_t *ap = (uint32_t *)a; 404 | uint32_t *bp = (uint32_t *)b; 405 | 406 | for (size /= sizeof(uint32_t); size-- != 0;) { 407 | uint32_t t = *ap; 408 | *ap++ = *bp; 409 | *bp++ = t; 410 | } 411 | } 412 | 413 | static void exchange_one_int32(void *a, void *b, size_t size) { 414 | uint32_t *ap = (uint32_t *)a; 415 | uint32_t *bp = (uint32_t *)b; 416 | uint32_t t = *ap; 417 | *ap = *bp; 418 | *bp = t; 419 | } 420 | 421 | static void exchange_int64s(void *a, void *b, size_t size) { 422 | uint64_t *ap = (uint64_t *)a; 423 | uint64_t *bp = (uint64_t *)b; 424 | 425 | for (size /= sizeof(uint64_t); size-- != 0;) { 426 | uint64_t t = *ap; 427 | *ap++ = *bp; 428 | *bp++ = t; 429 | } 430 | } 431 | 432 | static void exchange_one_int64(void *a, void *b, size_t size) { 433 | uint64_t *ap = (uint64_t *)a; 434 | uint64_t *bp = (uint64_t *)b; 435 | uint64_t t = *ap; 436 | *ap = *bp; 437 | *bp = t; 438 | } 439 | 440 | static void exchange_int128s(void *a, void *b, size_t size) { 441 | uint64_t *ap = (uint64_t *)a; 442 | uint64_t *bp = (uint64_t *)b; 443 | 444 | for (size /= sizeof(uint64_t) * 2; size-- != 0; ap += 2, bp += 2) { 445 | uint64_t t = ap[0]; 446 | uint64_t u = ap[1]; 447 | ap[0] = bp[0]; 448 | ap[1] = bp[1]; 449 | bp[0] = t; 450 | bp[1] = u; 451 | } 452 | } 453 | 454 | static void exchange_one_int128(void *a, void *b, size_t size) { 455 | uint64_t *ap = (uint64_t *)a; 456 | uint64_t *bp = (uint64_t *)b; 457 | uint64_t t = ap[0]; 458 | uint64_t u = ap[1]; 459 | ap[0] = bp[0]; 460 | ap[1] = bp[1]; 461 | bp[0] = t; 462 | bp[1] = u; 463 | } 464 | 465 | static inline exchange_f exchange_func(const void *base, size_t size) { 466 | switch (((uintptr_t)base | (uintptr_t)size) & 15) { 467 | case 0: 468 | if (size == sizeof(uint64_t) * 2) 469 | return exchange_one_int128; 470 | else 471 | return exchange_int128s; 472 | case 8: 473 | if (size == sizeof(uint64_t)) 474 | return exchange_one_int64; 475 | else 476 | return exchange_int64s; 477 | case 4: 478 | case 12: 479 | if (size == sizeof(uint32_t)) 480 | return exchange_one_int32; 481 | else 482 | return exchange_int32s; 483 | case 2: 484 | case 6: 485 | case 10: 486 | case 14: 487 | if (size == sizeof(uint16_t)) 488 | return exchange_one_int16; 489 | else 490 | return exchange_int16s; 491 | default: 492 | if (size == 1) 493 | return exchange_one_byte; 494 | else 495 | return exchange_bytes; 496 | } 497 | } 498 | 499 | static void heapsortx(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque) 500 | { 501 | uint8_t *basep = (uint8_t *)base; 502 | size_t i, n, c, r; 503 | exchange_f swap = exchange_func(base, size); 504 | 505 | if (nmemb > 1) { 506 | i = (nmemb / 2) * size; 507 | n = nmemb * size; 508 | 509 | while (i > 0) { 510 | i -= size; 511 | for (r = i; (c = r * 2 + size) < n; r = c) { 512 | if (c < n - size && cmp(basep + c, basep + c + size, opaque) <= 0) 513 | c += size; 514 | if (cmp(basep + r, basep + c, opaque) > 0) 515 | break; 516 | swap(basep + r, basep + c, size); 517 | } 518 | } 519 | for (i = n - size; i > 0; i -= size) { 520 | swap(basep, basep + i, size); 521 | 522 | for (r = 0; (c = r * 2 + size) < i; r = c) { 523 | if (c < i - size && cmp(basep + c, basep + c + size, opaque) <= 0) 524 | c += size; 525 | if (cmp(basep + r, basep + c, opaque) > 0) 526 | break; 527 | swap(basep + r, basep + c, size); 528 | } 529 | } 530 | } 531 | } 532 | 533 | static inline void *med3(void *a, void *b, void *c, cmp_f cmp, void *opaque) 534 | { 535 | return cmp(a, b, opaque) < 0 ? 536 | (cmp(b, c, opaque) < 0 ? b : (cmp(a, c, opaque) < 0 ? c : a )) : 537 | (cmp(b, c, opaque) > 0 ? b : (cmp(a, c, opaque) < 0 ? a : c )); 538 | } 539 | 540 | /* pointer based version with local stack and insertion sort threshold */ 541 | void rqsort(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque) 542 | { 543 | struct { uint8_t *base; size_t count; int depth; } stack[50], *sp = stack; 544 | uint8_t *ptr, *pi, *pj, *plt, *pgt, *top, *m; 545 | size_t m4, i, lt, gt, span, span2; 546 | int c, depth; 547 | exchange_f swap = exchange_func(base, size); 548 | exchange_f swap_block = exchange_func(base, size | 128); 549 | 550 | if (nmemb < 2 || size <= 0) 551 | return; 552 | 553 | sp->base = (uint8_t *)base; 554 | sp->count = nmemb; 555 | sp->depth = 0; 556 | sp++; 557 | 558 | while (sp > stack) { 559 | sp--; 560 | ptr = sp->base; 561 | nmemb = sp->count; 562 | depth = sp->depth; 563 | 564 | while (nmemb > 6) { 565 | if (++depth > 50) { 566 | /* depth check to ensure worst case logarithmic time */ 567 | heapsortx(ptr, nmemb, size, cmp, opaque); 568 | nmemb = 0; 569 | break; 570 | } 571 | /* select median of 3 from 1/4, 1/2, 3/4 positions */ 572 | /* should use median of 5 or 9? */ 573 | m4 = (nmemb >> 2) * size; 574 | m = med3(ptr + m4, ptr + 2 * m4, ptr + 3 * m4, cmp, opaque); 575 | swap(ptr, m, size); /* move the pivot to the start or the array */ 576 | i = lt = 1; 577 | pi = plt = ptr + size; 578 | gt = nmemb; 579 | pj = pgt = top = ptr + nmemb * size; 580 | for (;;) { 581 | while (pi < pj && (c = cmp(ptr, pi, opaque)) >= 0) { 582 | if (c == 0) { 583 | swap(plt, pi, size); 584 | lt++; 585 | plt += size; 586 | } 587 | i++; 588 | pi += size; 589 | } 590 | while (pi < (pj -= size) && (c = cmp(ptr, pj, opaque)) <= 0) { 591 | if (c == 0) { 592 | gt--; 593 | pgt -= size; 594 | swap(pgt, pj, size); 595 | } 596 | } 597 | if (pi >= pj) 598 | break; 599 | swap(pi, pj, size); 600 | i++; 601 | pi += size; 602 | } 603 | /* array has 4 parts: 604 | * from 0 to lt excluded: elements identical to pivot 605 | * from lt to pi excluded: elements smaller than pivot 606 | * from pi to gt excluded: elements greater than pivot 607 | * from gt to n excluded: elements identical to pivot 608 | */ 609 | /* move elements identical to pivot in the middle of the array: */ 610 | /* swap values in ranges [0..lt[ and [i-lt..i[ 611 | swapping the smallest span between lt and i-lt is sufficient 612 | */ 613 | span = plt - ptr; 614 | span2 = pi - plt; 615 | lt = i - lt; 616 | if (span > span2) 617 | span = span2; 618 | swap_block(ptr, pi - span, span); 619 | /* swap values in ranges [gt..top[ and [i..top-(top-gt)[ 620 | swapping the smallest span between top-gt and gt-i is sufficient 621 | */ 622 | span = top - pgt; 623 | span2 = pgt - pi; 624 | pgt = top - span2; 625 | gt = nmemb - (gt - i); 626 | if (span > span2) 627 | span = span2; 628 | swap_block(pi, top - span, span); 629 | 630 | /* now array has 3 parts: 631 | * from 0 to lt excluded: elements smaller than pivot 632 | * from lt to gt excluded: elements identical to pivot 633 | * from gt to n excluded: elements greater than pivot 634 | */ 635 | /* stack the larger segment and keep processing the smaller one 636 | to minimize stack use for pathological distributions */ 637 | if (lt > nmemb - gt) { 638 | sp->base = ptr; 639 | sp->count = lt; 640 | sp->depth = depth; 641 | sp++; 642 | ptr = pgt; 643 | nmemb -= gt; 644 | } else { 645 | sp->base = pgt; 646 | sp->count = nmemb - gt; 647 | sp->depth = depth; 648 | sp++; 649 | nmemb = lt; 650 | } 651 | } 652 | /* Use insertion sort for small fragments */ 653 | for (pi = ptr + size, top = ptr + nmemb * size; pi < top; pi += size) { 654 | for (pj = pi; pj > ptr && cmp(pj - size, pj, opaque) > 0; pj -= size) 655 | swap(pj, pj - size, size); 656 | } 657 | } 658 | } 659 | 660 | #endif 661 | -------------------------------------------------------------------------------- /cutils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * C utilities 3 | * 4 | * Copyright (c) 2017 Fabrice Bellard 5 | * Copyright (c) 2018 Charlie Gordon 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | #ifndef CUTILS_H 26 | #define CUTILS_H 27 | 28 | #include 29 | #include 30 | 31 | #include "quickjs.h" 32 | 33 | #ifdef _MSC_VER 34 | #include 35 | #include 36 | #include 37 | #else 38 | #include 39 | #endif 40 | 41 | 42 | /* set if CPU is big endian */ 43 | #undef WORDS_BIGENDIAN 44 | 45 | #if !defined(__GNUC__) && !defined(__clang__) 46 | #undef __attribute__ 47 | #define __attribute__(x) 48 | 49 | #undef __builtin_expect 50 | #define __builtin_expect(cond, m) (cond) 51 | #endif 52 | 53 | #if defined(_MSC_VER) 54 | #define likely(x) (x) 55 | #define unlikely(x) (x) 56 | #define force_inline __forceinline 57 | #define no_inline __declspec(noinline) 58 | #define __maybe_unused 59 | #define __js_printf_like(a, b) 60 | #define __attribute__(x) 61 | #define __attribute(x) 62 | typedef intptr_t ssize_t; 63 | #else 64 | #define likely(x) __builtin_expect(!!(x), 1) 65 | #define unlikely(x) __builtin_expect(!!(x), 0) 66 | #define force_inline inline __attribute__((always_inline)) 67 | #define no_inline __attribute__((noinline)) 68 | #define __maybe_unused __attribute__((unused)) 69 | #define __js_printf_like(f, a) __attribute__((format(printf, f, a))) 70 | #endif 71 | 72 | #define xglue(x, y) x ## y 73 | #define glue(x, y) xglue(x, y) 74 | #define stringify(s) tostring(s) 75 | #define tostring(s) #s 76 | 77 | #ifndef offsetof 78 | #define offsetof(type, field) ((size_t) &((type *)0)->field) 79 | #endif 80 | #ifndef countof 81 | #define countof(x) (sizeof(x) / sizeof((x)[0])) 82 | #endif 83 | 84 | typedef int BOOL; 85 | 86 | #ifndef FALSE 87 | enum { 88 | FALSE = 0, 89 | TRUE = 1, 90 | }; 91 | #endif 92 | 93 | #ifndef no_return 94 | #if defined(__GNUC__) || defined(__clang__) 95 | #define no_return __attribute__ ((noreturn)) 96 | #elif defined(_MSC_VER) 97 | #define no_return __declspec(noreturn) 98 | #else 99 | #define no_return 100 | #endif 101 | #endif 102 | 103 | void pstrcpy(char *buf, int buf_size, const char *str); 104 | char *pstrcat(char *buf, int buf_size, const char *s); 105 | int strstart(const char *str, const char *val, const char **ptr); 106 | int has_suffix(const char *str, const char *suffix); 107 | 108 | static inline int max_int(int a, int b) 109 | { 110 | if (a > b) 111 | return a; 112 | else 113 | return b; 114 | } 115 | 116 | static inline int min_int(int a, int b) 117 | { 118 | if (a < b) 119 | return a; 120 | else 121 | return b; 122 | } 123 | 124 | static inline uint32_t max_uint32(uint32_t a, uint32_t b) 125 | { 126 | if (a > b) 127 | return a; 128 | else 129 | return b; 130 | } 131 | 132 | static inline uint32_t min_uint32(uint32_t a, uint32_t b) 133 | { 134 | if (a < b) 135 | return a; 136 | else 137 | return b; 138 | } 139 | 140 | static inline int64_t max_int64(int64_t a, int64_t b) 141 | { 142 | if (a > b) 143 | return a; 144 | else 145 | return b; 146 | } 147 | 148 | static inline int64_t min_int64(int64_t a, int64_t b) 149 | { 150 | if (a < b) 151 | return a; 152 | else 153 | return b; 154 | } 155 | 156 | 157 | // this chunk ripped from https://github.com/llvm-mirror/libcxx/blob/9dcbb46826fd4d29b1485f25e8986d36019a6dca/include/support/win32/support.h#L106-L182 158 | #if defined(_MSC_VER) 159 | 160 | // Bit builtin's make these assumptions when calling _BitScanForward/Reverse 161 | // etc. These assumptions are expected to be true for Win32/Win64 which this 162 | // file supports. 163 | static_assert(sizeof(unsigned long long) == 8, ""); 164 | static_assert(sizeof(unsigned long) == 4, ""); 165 | static_assert(sizeof(unsigned int) == 4, ""); 166 | 167 | static inline int __builtin_popcount(unsigned int x) 168 | { 169 | // Binary: 0101... 170 | static const unsigned int m1 = 0x55555555; 171 | // Binary: 00110011.. 172 | static const unsigned int m2 = 0x33333333; 173 | // Binary: 4 zeros, 4 ones ... 174 | static const unsigned int m4 = 0x0f0f0f0f; 175 | // The sum of 256 to the power of 0,1,2,3... 176 | static const unsigned int h01 = 0x01010101; 177 | // Put count of each 2 bits into those 2 bits. 178 | x -= (x >> 1) & m1; 179 | // Put count of each 4 bits into those 4 bits. 180 | x = (x & m2) + ((x >> 2) & m2); 181 | // Put count of each 8 bits into those 8 bits. 182 | x = (x + (x >> 4)) & m4; 183 | // Returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24). 184 | return (x * h01) >> 24; 185 | } 186 | 187 | static inline int __builtin_popcountl(unsigned long x) 188 | { 189 | return __builtin_popcount((int)(x)); 190 | } 191 | 192 | static inline int __builtin_popcountll(unsigned long long x) 193 | { 194 | // Binary: 0101... 195 | static const unsigned long long m1 = 0x5555555555555555; 196 | // Binary: 00110011.. 197 | static const unsigned long long m2 = 0x3333333333333333; 198 | // Binary: 4 zeros, 4 ones ... 199 | static const unsigned long long m4 = 0x0f0f0f0f0f0f0f0f; 200 | // The sum of 256 to the power of 0,1,2,3... 201 | static const unsigned long long h01 = 0x0101010101010101; 202 | // Put count of each 2 bits into those 2 bits. 203 | x -= (x >> 1) & m1; 204 | // Put count of each 4 bits into those 4 bits. 205 | x = (x & m2) + ((x >> 2) & m2); 206 | // Put count of each 8 bits into those 8 bits. 207 | x = (x + (x >> 4)) & m4; 208 | // Returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... 209 | return (int)((x * h01) >> 56); 210 | } 211 | 212 | // Returns the number of trailing 0-bits in x, starting at the least significant 213 | // bit position. If x is 0, the result is undefined. 214 | static inline int __builtin_ctzll(unsigned long long mask) 215 | { 216 | unsigned long where; 217 | // Search from LSB to MSB for first set bit. 218 | // Returns zero if no set bit is found. 219 | #if INTPTR_MAX >= INT64_MAX // 64-bit 220 | if (_BitScanForward64(&where, mask)) 221 | return (int)(where); 222 | #else 223 | // Win32 doesn't have _BitScanForward64 so emulate it with two 32 bit calls. 224 | // Scan the Low Word. 225 | if (_BitScanForward(&where, (unsigned long)(mask))) 226 | return (int)(where); 227 | // Scan the High Word. 228 | if (_BitScanForward(&where, (unsigned long)(mask >> 32))) 229 | return (int)(where + 32); // Create a bit offset from the LSB. 230 | #endif 231 | return 64; 232 | } 233 | 234 | static inline int __builtin_ctzl(unsigned long mask) 235 | { 236 | unsigned long where; 237 | // Search from LSB to MSB for first set bit. 238 | // Returns zero if no set bit is found. 239 | if (_BitScanForward(&where, mask)) 240 | return (int)(where); 241 | return 32; 242 | } 243 | 244 | static inline int __builtin_ctz(unsigned int mask) 245 | { 246 | // Win32 and Win64 expectations. 247 | static_assert(sizeof(mask) == 4, ""); 248 | static_assert(sizeof(unsigned long) == 4, ""); 249 | return __builtin_ctzl((unsigned long)(mask)); 250 | } 251 | 252 | // Returns the number of leading 0-bits in x, starting at the most significant 253 | // bit position. If x is 0, the result is undefined. 254 | static inline int __builtin_clzll(unsigned long long mask) 255 | { 256 | unsigned long where; 257 | // BitScanReverse scans from MSB to LSB for first set bit. 258 | // Returns 0 if no set bit is found. 259 | #if INTPTR_MAX >= INT64_MAX // 64-bit 260 | if (_BitScanReverse64(&where, mask)) 261 | return (int)(63 - where); 262 | #else 263 | // Scan the high 32 bits. 264 | if (_BitScanReverse(&where, (unsigned long)(mask >> 32))) 265 | return (int)(63 - 266 | (where + 32)); // Create a bit offset from the MSB. 267 | // Scan the low 32 bits. 268 | if (_BitScanReverse(&where, (unsigned long)(mask))) 269 | return (int)(63 - where); 270 | #endif 271 | return 64; // Undefined Behavior. 272 | } 273 | 274 | static inline int __builtin_clzl(unsigned long mask) 275 | { 276 | unsigned long where; 277 | // Search from LSB to MSB for first set bit. 278 | // Returns zero if no set bit is found. 279 | if (_BitScanReverse(&where, mask)) 280 | return (int)(31 - where); 281 | return 32; // Undefined Behavior. 282 | } 283 | 284 | static inline int __builtin_clz(unsigned int x) 285 | { 286 | return __builtin_clzl(x); 287 | } 288 | 289 | #endif // _LIBCPP_MSVC 290 | 291 | /* WARNING: undefined if a = 0 */ 292 | static inline int clz32(unsigned int a) 293 | { 294 | return __builtin_clz(a); 295 | } 296 | 297 | /* WARNING: undefined if a = 0 */ 298 | static inline int clz64(uint64_t a) 299 | { 300 | return __builtin_clzll(a); 301 | } 302 | 303 | /* WARNING: undefined if a = 0 */ 304 | static inline int ctz32(unsigned int a) 305 | { 306 | return __builtin_ctz(a); 307 | } 308 | 309 | /* WARNING: undefined if a = 0 */ 310 | static inline int ctz64(uint64_t a) 311 | { 312 | return __builtin_ctzll(a); 313 | } 314 | 315 | #ifdef _MSC_VER 316 | #pragma pack(push, 1) 317 | struct packed_u64 { 318 | uint64_t v; 319 | }; 320 | 321 | struct packed_u32 { 322 | uint32_t v; 323 | }; 324 | 325 | struct packed_u16 { 326 | uint16_t v; 327 | }; 328 | #pragma pack(pop) 329 | #else 330 | struct __attribute__((packed)) packed_u64 { 331 | uint64_t v; 332 | }; 333 | 334 | struct __attribute__((packed)) packed_u32 { 335 | uint32_t v; 336 | }; 337 | 338 | struct __attribute__((packed)) packed_u16 { 339 | uint16_t v; 340 | }; 341 | #endif 342 | 343 | static inline uint64_t get_u64(const uint8_t *tab) 344 | { 345 | return ((const struct packed_u64 *)tab)->v; 346 | } 347 | 348 | static inline int64_t get_i64(const uint8_t *tab) 349 | { 350 | return (int64_t)((const struct packed_u64 *)tab)->v; 351 | } 352 | 353 | static inline void put_u64(uint8_t *tab, uint64_t val) 354 | { 355 | ((struct packed_u64 *)tab)->v = val; 356 | } 357 | 358 | static inline uint32_t get_u32(const uint8_t *tab) 359 | { 360 | return ((const struct packed_u32 *)tab)->v; 361 | } 362 | 363 | static inline int32_t get_i32(const uint8_t *tab) 364 | { 365 | return (int32_t)((const struct packed_u32 *)tab)->v; 366 | } 367 | 368 | static inline void put_u32(uint8_t *tab, uint32_t val) 369 | { 370 | ((struct packed_u32 *)tab)->v = val; 371 | } 372 | 373 | static inline uint32_t get_u16(const uint8_t *tab) 374 | { 375 | return ((const struct packed_u16 *)tab)->v; 376 | } 377 | 378 | static inline int32_t get_i16(const uint8_t *tab) 379 | { 380 | return (int16_t)((const struct packed_u16 *)tab)->v; 381 | } 382 | 383 | static inline void put_u16(uint8_t *tab, uint16_t val) 384 | { 385 | ((struct packed_u16 *)tab)->v = val; 386 | } 387 | 388 | static inline uint32_t get_u8(const uint8_t *tab) 389 | { 390 | return *tab; 391 | } 392 | 393 | static inline int32_t get_i8(const uint8_t *tab) 394 | { 395 | return (int8_t)*tab; 396 | } 397 | 398 | static inline void put_u8(uint8_t *tab, uint8_t val) 399 | { 400 | *tab = val; 401 | } 402 | 403 | static inline uint16_t bswap16(uint16_t x) 404 | { 405 | return (x >> 8) | (x << 8); 406 | } 407 | 408 | static inline uint32_t bswap32(uint32_t v) 409 | { 410 | return ((v & 0xff000000) >> 24) | ((v & 0x00ff0000) >> 8) | 411 | ((v & 0x0000ff00) << 8) | ((v & 0x000000ff) << 24); 412 | } 413 | 414 | static inline uint64_t bswap64(uint64_t v) 415 | { 416 | return ((v & ((uint64_t)0xff << (7 * 8))) >> (7 * 8)) | 417 | ((v & ((uint64_t)0xff << (6 * 8))) >> (5 * 8)) | 418 | ((v & ((uint64_t)0xff << (5 * 8))) >> (3 * 8)) | 419 | ((v & ((uint64_t)0xff << (4 * 8))) >> (1 * 8)) | 420 | ((v & ((uint64_t)0xff << (3 * 8))) << (1 * 8)) | 421 | ((v & ((uint64_t)0xff << (2 * 8))) << (3 * 8)) | 422 | ((v & ((uint64_t)0xff << (1 * 8))) << (5 * 8)) | 423 | ((v & ((uint64_t)0xff << (0 * 8))) << (7 * 8)); 424 | } 425 | 426 | /* XXX: should take an extra argument to pass slack information to the caller */ 427 | typedef void *DynBufReallocFunc(void *opaque, void *ptr, size_t size); 428 | 429 | typedef struct DynBuf { 430 | uint8_t *buf; 431 | size_t size; 432 | size_t allocated_size; 433 | BOOL error; /* true if a memory allocation error occurred */ 434 | DynBufReallocFunc *realloc_func; 435 | void *opaque; /* for realloc_func */ 436 | } DynBuf; 437 | 438 | void dbuf_init(DynBuf *s); 439 | void dbuf_init2(DynBuf *s, void *opaque, DynBufReallocFunc *realloc_func); 440 | int dbuf_realloc(DynBuf *s, size_t new_size); 441 | int dbuf_write(DynBuf *s, size_t offset, const uint8_t *data, size_t len); 442 | int dbuf_put(DynBuf *s, const uint8_t *data, size_t len); 443 | int dbuf_put_self(DynBuf *s, size_t offset, size_t len); 444 | int dbuf_putc(DynBuf *s, uint8_t c); 445 | int dbuf_putstr(DynBuf *s, const char *str); 446 | 447 | static inline int dbuf_put_u16(DynBuf *s, uint16_t val) 448 | { 449 | return dbuf_put(s, (uint8_t *)&val, 2); 450 | } 451 | static inline int dbuf_put_u32(DynBuf *s, uint32_t val) 452 | { 453 | return dbuf_put(s, (uint8_t *)&val, 4); 454 | } 455 | static inline int dbuf_put_u64(DynBuf *s, uint64_t val) 456 | { 457 | return dbuf_put(s, (uint8_t *)&val, 8); 458 | } 459 | int __js_printf_like(2, 3) dbuf_printf(DynBuf* s, const char* fmt, ...); 460 | void dbuf_free(DynBuf *s); 461 | static inline BOOL dbuf_error(DynBuf *s) { 462 | return s->error; 463 | } 464 | static inline void dbuf_set_error(DynBuf *s) 465 | { 466 | s->error = TRUE; 467 | } 468 | 469 | #define UTF8_CHAR_LEN_MAX 6 470 | 471 | int unicode_to_utf8(uint8_t *buf, unsigned int c); 472 | int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp); 473 | 474 | static inline int from_hex(int c) 475 | { 476 | if (c >= '0' && c <= '9') 477 | return c - '0'; 478 | else if (c >= 'A' && c <= 'F') 479 | return c - 'A' + 10; 480 | else if (c >= 'a' && c <= 'f') 481 | return c - 'a' + 10; 482 | else 483 | return -1; 484 | } 485 | 486 | void rqsort(void *base, size_t nmemb, size_t size, 487 | int (*cmp)(const void *, const void *, void *), 488 | void *arg); 489 | 490 | #endif /* CUTILS_H */ 491 | -------------------------------------------------------------------------------- /libbf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Tiny arbitrary precision floating point library 3 | * 4 | * Copyright (c) 2017-2021 Fabrice Bellard 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | * THE SOFTWARE. 23 | */ 24 | #ifndef LIBBF_H 25 | #define LIBBF_H 26 | 27 | #include 28 | #include 29 | 30 | #if (INTPTR_MAX >= INT64_MAX) && !defined(_MSC_VER) /* MSVC has no __int128 in stdC mode */ 31 | #define LIMB_LOG2_BITS 6 32 | #else 33 | #define LIMB_LOG2_BITS 5 34 | #endif 35 | 36 | #define LIMB_BITS (1 << LIMB_LOG2_BITS) 37 | 38 | #if (LIMB_BITS == 64) 39 | 40 | typedef __int128 int128_t; 41 | typedef unsigned __int128 uint128_t; 42 | typedef int64_t slimb_t; 43 | typedef uint64_t limb_t; 44 | typedef uint128_t dlimb_t; 45 | #define BF_RAW_EXP_MIN INT64_MIN 46 | #define BF_RAW_EXP_MAX INT64_MAX 47 | 48 | #define LIMB_DIGITS 19 49 | #define BF_DEC_BASE UINT64_C(10000000000000000000) 50 | 51 | #else 52 | 53 | typedef int32_t slimb_t; 54 | typedef uint32_t limb_t; 55 | typedef uint64_t dlimb_t; 56 | #define BF_RAW_EXP_MIN INT32_MIN 57 | #define BF_RAW_EXP_MAX INT32_MAX 58 | 59 | #define LIMB_DIGITS 9 60 | #define BF_DEC_BASE 1000000000U 61 | 62 | #endif 63 | 64 | /* in bits */ 65 | /* minimum number of bits for the exponent */ 66 | #define BF_EXP_BITS_MIN 3 67 | /* maximum number of bits for the exponent */ 68 | #define BF_EXP_BITS_MAX (LIMB_BITS - 3) 69 | /* extended range for exponent, used internally */ 70 | #define BF_EXT_EXP_BITS_MAX (BF_EXP_BITS_MAX + 1) 71 | /* minimum possible precision */ 72 | #define BF_PREC_MIN 2 73 | /* minimum possible precision */ 74 | #define BF_PREC_MAX (((limb_t)1 << (LIMB_BITS - 2)) - 2) 75 | /* some operations support infinite precision */ 76 | #define BF_PREC_INF (BF_PREC_MAX + 1) /* infinite precision */ 77 | 78 | #if LIMB_BITS == 64 79 | #define BF_CHKSUM_MOD (UINT64_C(975620677) * UINT64_C(9795002197)) 80 | #else 81 | #define BF_CHKSUM_MOD 975620677U 82 | #endif 83 | 84 | #define BF_EXP_ZERO BF_RAW_EXP_MIN 85 | #define BF_EXP_INF (BF_RAW_EXP_MAX - 1) 86 | #define BF_EXP_NAN BF_RAW_EXP_MAX 87 | 88 | /* +/-zero is represented with expn = BF_EXP_ZERO and len = 0, 89 | +/-infinity is represented with expn = BF_EXP_INF and len = 0, 90 | NaN is represented with expn = BF_EXP_NAN and len = 0 (sign is ignored) 91 | */ 92 | typedef struct { 93 | struct bf_context_t *ctx; 94 | int sign; 95 | slimb_t expn; 96 | limb_t len; 97 | limb_t *tab; 98 | } bf_t; 99 | 100 | typedef struct { 101 | /* must be kept identical to bf_t */ 102 | struct bf_context_t *ctx; 103 | int sign; 104 | slimb_t expn; 105 | limb_t len; 106 | limb_t *tab; 107 | } bfdec_t; 108 | 109 | typedef enum { 110 | BF_RNDN, /* round to nearest, ties to even */ 111 | BF_RNDZ, /* round to zero */ 112 | BF_RNDD, /* round to -inf (the code relies on (BF_RNDD xor BF_RNDU) = 1) */ 113 | BF_RNDU, /* round to +inf */ 114 | BF_RNDNA, /* round to nearest, ties away from zero */ 115 | BF_RNDA, /* round away from zero */ 116 | BF_RNDF, /* faithful rounding (nondeterministic, either RNDD or RNDU, 117 | inexact flag is always set) */ 118 | } bf_rnd_t; 119 | 120 | /* allow subnormal numbers. Only available if the number of exponent 121 | bits is <= BF_EXP_BITS_USER_MAX and prec != BF_PREC_INF. */ 122 | #define BF_FLAG_SUBNORMAL (1 << 3) 123 | /* 'prec' is the precision after the radix point instead of the whole 124 | mantissa. Can only be used with bf_round() and 125 | bfdec_[add|sub|mul|div|sqrt|round](). */ 126 | #define BF_FLAG_RADPNT_PREC (1 << 4) 127 | 128 | #define BF_RND_MASK 0x7 129 | #define BF_EXP_BITS_SHIFT 5 130 | #define BF_EXP_BITS_MASK 0x3f 131 | 132 | /* shortcut for bf_set_exp_bits(BF_EXT_EXP_BITS_MAX) */ 133 | #define BF_FLAG_EXT_EXP (BF_EXP_BITS_MASK << BF_EXP_BITS_SHIFT) 134 | 135 | /* contains the rounding mode and number of exponents bits */ 136 | typedef uint32_t bf_flags_t; 137 | 138 | typedef void *bf_realloc_func_t(void *opaque, void *ptr, size_t size); 139 | 140 | typedef struct { 141 | bf_t val; 142 | limb_t prec; 143 | } BFConstCache; 144 | 145 | typedef struct bf_context_t { 146 | void *realloc_opaque; 147 | bf_realloc_func_t *realloc_func; 148 | BFConstCache log2_cache; 149 | BFConstCache pi_cache; 150 | struct BFNTTState *ntt_state; 151 | } bf_context_t; 152 | 153 | static inline int bf_get_exp_bits(bf_flags_t flags) 154 | { 155 | int e; 156 | e = (flags >> BF_EXP_BITS_SHIFT) & BF_EXP_BITS_MASK; 157 | if (e == BF_EXP_BITS_MASK) 158 | return BF_EXP_BITS_MAX + 1; 159 | else 160 | return BF_EXP_BITS_MAX - e; 161 | } 162 | 163 | static inline bf_flags_t bf_set_exp_bits(int n) 164 | { 165 | return ((BF_EXP_BITS_MAX - n) & BF_EXP_BITS_MASK) << BF_EXP_BITS_SHIFT; 166 | } 167 | 168 | /* returned status */ 169 | #define BF_ST_INVALID_OP (1 << 0) 170 | #define BF_ST_DIVIDE_ZERO (1 << 1) 171 | #define BF_ST_OVERFLOW (1 << 2) 172 | #define BF_ST_UNDERFLOW (1 << 3) 173 | #define BF_ST_INEXACT (1 << 4) 174 | /* indicate that a memory allocation error occured. NaN is returned */ 175 | #define BF_ST_MEM_ERROR (1 << 5) 176 | 177 | #define BF_RADIX_MAX 36 /* maximum radix for bf_atof() and bf_ftoa() */ 178 | 179 | static inline slimb_t bf_max(slimb_t a, slimb_t b) 180 | { 181 | if (a > b) 182 | return a; 183 | else 184 | return b; 185 | } 186 | 187 | static inline slimb_t bf_min(slimb_t a, slimb_t b) 188 | { 189 | if (a < b) 190 | return a; 191 | else 192 | return b; 193 | } 194 | 195 | void bf_context_init(bf_context_t *s, bf_realloc_func_t *realloc_func, 196 | void *realloc_opaque); 197 | void bf_context_end(bf_context_t *s); 198 | /* free memory allocated for the bf cache data */ 199 | void bf_clear_cache(bf_context_t *s); 200 | 201 | static inline void *bf_realloc(bf_context_t *s, void *ptr, size_t size) 202 | { 203 | return s->realloc_func(s->realloc_opaque, ptr, size); 204 | } 205 | 206 | /* 'size' must be != 0 */ 207 | static inline void *bf_malloc(bf_context_t *s, size_t size) 208 | { 209 | return bf_realloc(s, NULL, size); 210 | } 211 | 212 | static inline void bf_free(bf_context_t *s, void *ptr) 213 | { 214 | /* must test ptr otherwise equivalent to malloc(0) */ 215 | if (ptr) 216 | bf_realloc(s, ptr, 0); 217 | } 218 | 219 | void bf_init(bf_context_t *s, bf_t *r); 220 | 221 | static inline void bf_delete(bf_t *r) 222 | { 223 | bf_context_t *s = r->ctx; 224 | /* we accept to delete a zeroed bf_t structure */ 225 | if (s && r->tab) { 226 | bf_realloc(s, r->tab, 0); 227 | } 228 | } 229 | 230 | static inline void bf_neg(bf_t *r) 231 | { 232 | r->sign ^= 1; 233 | } 234 | 235 | static inline int bf_is_finite(const bf_t *a) 236 | { 237 | return (a->expn < BF_EXP_INF); 238 | } 239 | 240 | static inline int bf_is_nan(const bf_t *a) 241 | { 242 | return (a->expn == BF_EXP_NAN); 243 | } 244 | 245 | static inline int bf_is_zero(const bf_t *a) 246 | { 247 | return (a->expn == BF_EXP_ZERO); 248 | } 249 | 250 | static inline void bf_memcpy(bf_t *r, const bf_t *a) 251 | { 252 | *r = *a; 253 | } 254 | 255 | int bf_set_ui(bf_t *r, uint64_t a); 256 | int bf_set_si(bf_t *r, int64_t a); 257 | void bf_set_nan(bf_t *r); 258 | void bf_set_zero(bf_t *r, int is_neg); 259 | void bf_set_inf(bf_t *r, int is_neg); 260 | int bf_set(bf_t *r, const bf_t *a); 261 | void bf_move(bf_t *r, bf_t *a); 262 | int bf_get_float64(const bf_t *a, double *pres, bf_rnd_t rnd_mode); 263 | int bf_set_float64(bf_t *a, double d); 264 | 265 | int bf_cmpu(const bf_t *a, const bf_t *b); 266 | int bf_cmp_full(const bf_t *a, const bf_t *b); 267 | int bf_cmp(const bf_t *a, const bf_t *b); 268 | static inline int bf_cmp_eq(const bf_t *a, const bf_t *b) 269 | { 270 | return bf_cmp(a, b) == 0; 271 | } 272 | 273 | static inline int bf_cmp_le(const bf_t *a, const bf_t *b) 274 | { 275 | return bf_cmp(a, b) <= 0; 276 | } 277 | 278 | static inline int bf_cmp_lt(const bf_t *a, const bf_t *b) 279 | { 280 | return bf_cmp(a, b) < 0; 281 | } 282 | 283 | int bf_add(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags); 284 | int bf_sub(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags); 285 | int bf_add_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec, bf_flags_t flags); 286 | int bf_mul(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags); 287 | int bf_mul_ui(bf_t *r, const bf_t *a, uint64_t b1, limb_t prec, bf_flags_t flags); 288 | int bf_mul_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec, 289 | bf_flags_t flags); 290 | int bf_mul_2exp(bf_t *r, slimb_t e, limb_t prec, bf_flags_t flags); 291 | int bf_div(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags); 292 | #define BF_DIVREM_EUCLIDIAN BF_RNDF 293 | int bf_divrem(bf_t *q, bf_t *r, const bf_t *a, const bf_t *b, 294 | limb_t prec, bf_flags_t flags, int rnd_mode); 295 | int bf_rem(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, 296 | bf_flags_t flags, int rnd_mode); 297 | int bf_remquo(slimb_t *pq, bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, 298 | bf_flags_t flags, int rnd_mode); 299 | /* round to integer with infinite precision */ 300 | int bf_rint(bf_t *r, int rnd_mode); 301 | int bf_round(bf_t *r, limb_t prec, bf_flags_t flags); 302 | int bf_sqrtrem(bf_t *r, bf_t *rem1, const bf_t *a); 303 | int bf_sqrt(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); 304 | slimb_t bf_get_exp_min(const bf_t *a); 305 | int bf_logic_or(bf_t *r, const bf_t *a, const bf_t *b); 306 | int bf_logic_xor(bf_t *r, const bf_t *a, const bf_t *b); 307 | int bf_logic_and(bf_t *r, const bf_t *a, const bf_t *b); 308 | 309 | /* additional flags for bf_atof */ 310 | /* do not accept hex radix prefix (0x or 0X) if radix = 0 or radix = 16 */ 311 | #define BF_ATOF_NO_HEX (1 << 16) 312 | /* accept binary (0b or 0B) or octal (0o or 0O) radix prefix if radix = 0 */ 313 | #define BF_ATOF_BIN_OCT (1 << 17) 314 | /* Do not parse NaN or Inf */ 315 | #define BF_ATOF_NO_NAN_INF (1 << 18) 316 | /* return the exponent separately */ 317 | #define BF_ATOF_EXPONENT (1 << 19) 318 | 319 | int bf_atof(bf_t *a, const char *str, const char **pnext, int radix, 320 | limb_t prec, bf_flags_t flags); 321 | /* this version accepts prec = BF_PREC_INF and returns the radix 322 | exponent */ 323 | int bf_atof2(bf_t *r, slimb_t *pexponent, 324 | const char *str, const char **pnext, int radix, 325 | limb_t prec, bf_flags_t flags); 326 | int bf_mul_pow_radix(bf_t *r, const bf_t *T, limb_t radix, 327 | slimb_t expn, limb_t prec, bf_flags_t flags); 328 | 329 | 330 | /* Conversion of floating point number to string. Return a null 331 | terminated string or NULL if memory error. *plen contains its 332 | length if plen != NULL. The exponent letter is "e" for base 10, 333 | "p" for bases 2, 8, 16 with a binary exponent and "@" for the other 334 | bases. */ 335 | 336 | #define BF_FTOA_FORMAT_MASK (3 << 16) 337 | 338 | /* fixed format: prec significant digits rounded with (flags & 339 | BF_RND_MASK). Exponential notation is used if too many zeros are 340 | needed.*/ 341 | #define BF_FTOA_FORMAT_FIXED (0 << 16) 342 | /* fractional format: prec digits after the decimal point rounded with 343 | (flags & BF_RND_MASK) */ 344 | #define BF_FTOA_FORMAT_FRAC (1 << 16) 345 | /* free format: 346 | 347 | For binary radices with bf_ftoa() and for bfdec_ftoa(): use the minimum 348 | number of digits to represent 'a'. The precision and the rounding 349 | mode are ignored. 350 | 351 | For the non binary radices with bf_ftoa(): use as many digits as 352 | necessary so that bf_atof() return the same number when using 353 | precision 'prec', rounding to nearest and the subnormal 354 | configuration of 'flags'. The result is meaningful only if 'a' is 355 | already rounded to 'prec' bits. If the subnormal flag is set, the 356 | exponent in 'flags' must also be set to the desired exponent range. 357 | */ 358 | #define BF_FTOA_FORMAT_FREE (2 << 16) 359 | /* same as BF_FTOA_FORMAT_FREE but uses the minimum number of digits 360 | (takes more computation time). Identical to BF_FTOA_FORMAT_FREE for 361 | binary radices with bf_ftoa() and for bfdec_ftoa(). */ 362 | #define BF_FTOA_FORMAT_FREE_MIN (3 << 16) 363 | 364 | /* force exponential notation for fixed or free format */ 365 | #define BF_FTOA_FORCE_EXP (1 << 20) 366 | /* add 0x prefix for base 16, 0o prefix for base 8 or 0b prefix for 367 | base 2 if non zero value */ 368 | #define BF_FTOA_ADD_PREFIX (1 << 21) 369 | /* return "Infinity" instead of "Inf" and add a "+" for positive 370 | exponents */ 371 | #define BF_FTOA_JS_QUIRKS (1 << 22) 372 | 373 | char *bf_ftoa(size_t *plen, const bf_t *a, int radix, limb_t prec, 374 | bf_flags_t flags); 375 | 376 | /* modulo 2^n instead of saturation. NaN and infinity return 0 */ 377 | #define BF_GET_INT_MOD (1 << 0) 378 | int bf_get_int32(int *pres, const bf_t *a, int flags); 379 | int bf_get_int64(int64_t *pres, const bf_t *a, int flags); 380 | int bf_get_uint64(uint64_t *pres, const bf_t *a); 381 | 382 | /* the following functions are exported for testing only. */ 383 | void mp_print_str(const char *str, const limb_t *tab, limb_t n); 384 | void bf_print_str(const char *str, const bf_t *a); 385 | int bf_resize(bf_t *r, limb_t len); 386 | int bf_get_fft_size(int *pdpl, int *pnb_mods, limb_t len); 387 | int bf_normalize_and_round(bf_t *r, limb_t prec1, bf_flags_t flags); 388 | int bf_can_round(const bf_t *a, slimb_t prec, bf_rnd_t rnd_mode, slimb_t k); 389 | slimb_t bf_mul_log2_radix(slimb_t a1, unsigned int radix, int is_inv, 390 | int is_ceil1); 391 | int mp_mul(bf_context_t *s, limb_t *result, 392 | const limb_t *op1, limb_t op1_size, 393 | const limb_t *op2, limb_t op2_size); 394 | limb_t mp_add(limb_t *res, const limb_t *op1, const limb_t *op2, 395 | limb_t n, limb_t carry); 396 | limb_t mp_add_ui(limb_t *tab, limb_t b, size_t n); 397 | int mp_sqrtrem(bf_context_t *s, limb_t *tabs, limb_t *taba, limb_t n); 398 | int mp_recip(bf_context_t *s, limb_t *tabr, const limb_t *taba, limb_t n); 399 | limb_t bf_isqrt(limb_t a); 400 | 401 | /* transcendental functions */ 402 | int bf_const_log2(bf_t *T, limb_t prec, bf_flags_t flags); 403 | int bf_const_pi(bf_t *T, limb_t prec, bf_flags_t flags); 404 | int bf_exp(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); 405 | int bf_log(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); 406 | #define BF_POW_JS_QUIRKS (1 << 16) /* (+/-1)^(+/-Inf) = NaN, 1^NaN = NaN */ 407 | int bf_pow(bf_t *r, const bf_t *x, const bf_t *y, limb_t prec, bf_flags_t flags); 408 | int bf_cos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); 409 | int bf_sin(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); 410 | int bf_tan(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); 411 | int bf_atan(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); 412 | int bf_atan2(bf_t *r, const bf_t *y, const bf_t *x, 413 | limb_t prec, bf_flags_t flags); 414 | int bf_asin(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); 415 | int bf_acos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); 416 | 417 | /* decimal floating point */ 418 | 419 | static inline void bfdec_init(bf_context_t *s, bfdec_t *r) 420 | { 421 | bf_init(s, (bf_t *)r); 422 | } 423 | static inline void bfdec_delete(bfdec_t *r) 424 | { 425 | bf_delete((bf_t *)r); 426 | } 427 | 428 | static inline void bfdec_neg(bfdec_t *r) 429 | { 430 | r->sign ^= 1; 431 | } 432 | 433 | static inline int bfdec_is_finite(const bfdec_t *a) 434 | { 435 | return (a->expn < BF_EXP_INF); 436 | } 437 | 438 | static inline int bfdec_is_nan(const bfdec_t *a) 439 | { 440 | return (a->expn == BF_EXP_NAN); 441 | } 442 | 443 | static inline int bfdec_is_zero(const bfdec_t *a) 444 | { 445 | return (a->expn == BF_EXP_ZERO); 446 | } 447 | 448 | static inline void bfdec_memcpy(bfdec_t *r, const bfdec_t *a) 449 | { 450 | bf_memcpy((bf_t *)r, (const bf_t *)a); 451 | } 452 | 453 | int bfdec_set_ui(bfdec_t *r, uint64_t a); 454 | int bfdec_set_si(bfdec_t *r, int64_t a); 455 | 456 | static inline void bfdec_set_nan(bfdec_t *r) 457 | { 458 | bf_set_nan((bf_t *)r); 459 | } 460 | static inline void bfdec_set_zero(bfdec_t *r, int is_neg) 461 | { 462 | bf_set_zero((bf_t *)r, is_neg); 463 | } 464 | static inline void bfdec_set_inf(bfdec_t *r, int is_neg) 465 | { 466 | bf_set_inf((bf_t *)r, is_neg); 467 | } 468 | static inline int bfdec_set(bfdec_t *r, const bfdec_t *a) 469 | { 470 | return bf_set((bf_t *)r, (bf_t *)a); 471 | } 472 | static inline void bfdec_move(bfdec_t *r, bfdec_t *a) 473 | { 474 | bf_move((bf_t *)r, (bf_t *)a); 475 | } 476 | static inline int bfdec_cmpu(const bfdec_t *a, const bfdec_t *b) 477 | { 478 | return bf_cmpu((const bf_t *)a, (const bf_t *)b); 479 | } 480 | static inline int bfdec_cmp_full(const bfdec_t *a, const bfdec_t *b) 481 | { 482 | return bf_cmp_full((const bf_t *)a, (const bf_t *)b); 483 | } 484 | static inline int bfdec_cmp(const bfdec_t *a, const bfdec_t *b) 485 | { 486 | return bf_cmp((const bf_t *)a, (const bf_t *)b); 487 | } 488 | static inline int bfdec_cmp_eq(const bfdec_t *a, const bfdec_t *b) 489 | { 490 | return bfdec_cmp(a, b) == 0; 491 | } 492 | static inline int bfdec_cmp_le(const bfdec_t *a, const bfdec_t *b) 493 | { 494 | return bfdec_cmp(a, b) <= 0; 495 | } 496 | static inline int bfdec_cmp_lt(const bfdec_t *a, const bfdec_t *b) 497 | { 498 | return bfdec_cmp(a, b) < 0; 499 | } 500 | 501 | int bfdec_add(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, 502 | bf_flags_t flags); 503 | int bfdec_sub(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, 504 | bf_flags_t flags); 505 | int bfdec_add_si(bfdec_t *r, const bfdec_t *a, int64_t b1, limb_t prec, 506 | bf_flags_t flags); 507 | int bfdec_mul(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, 508 | bf_flags_t flags); 509 | int bfdec_mul_si(bfdec_t *r, const bfdec_t *a, int64_t b1, limb_t prec, 510 | bf_flags_t flags); 511 | int bfdec_div(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, 512 | bf_flags_t flags); 513 | int bfdec_divrem(bfdec_t *q, bfdec_t *r, const bfdec_t *a, const bfdec_t *b, 514 | limb_t prec, bf_flags_t flags, int rnd_mode); 515 | int bfdec_rem(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, 516 | bf_flags_t flags, int rnd_mode); 517 | int bfdec_rint(bfdec_t *r, int rnd_mode); 518 | int bfdec_sqrt(bfdec_t *r, const bfdec_t *a, limb_t prec, bf_flags_t flags); 519 | int bfdec_round(bfdec_t *r, limb_t prec, bf_flags_t flags); 520 | int bfdec_get_int32(int *pres, const bfdec_t *a); 521 | int bfdec_pow_ui(bfdec_t *r, const bfdec_t *a, limb_t b); 522 | 523 | char *bfdec_ftoa(size_t *plen, const bfdec_t *a, limb_t prec, bf_flags_t flags); 524 | int bfdec_atof(bfdec_t *r, const char *str, const char **pnext, 525 | limb_t prec, bf_flags_t flags); 526 | 527 | /* the following functions are exported for testing only. */ 528 | extern const limb_t mp_pow_dec[LIMB_DIGITS + 1]; 529 | void bfdec_print_str(const char *str, const bfdec_t *a); 530 | static inline int bfdec_resize(bfdec_t *r, limb_t len) 531 | { 532 | return bf_resize((bf_t *)r, len); 533 | } 534 | int bfdec_normalize_and_round(bfdec_t *r, limb_t prec1, bf_flags_t flags); 535 | 536 | #endif /* LIBBF_H */ 537 | -------------------------------------------------------------------------------- /pi_1e5.sha1sum: -------------------------------------------------------------------------------- 1 | ddec8e3cd091057af4de7dd147bff14860c802e8 pi_1e5.txt 2 | -------------------------------------------------------------------------------- /pi_1e6.sha1sum: -------------------------------------------------------------------------------- 1 | ec497f9b8b0aad5fe967d0916bff266972081f50 pi_1e6.txt 2 | -------------------------------------------------------------------------------- /pi_1e7.sha1sum: -------------------------------------------------------------------------------- 1 | 056fe739ad2e3b427691e4e62eef8936ce2a88e4 pi_1e7.txt 2 | -------------------------------------------------------------------------------- /pi_1e8.sha1sum: -------------------------------------------------------------------------------- 1 | 23456396be72fb9a390e5f707c7bff7a1c3697f8 pi_1e8.txt 2 | -------------------------------------------------------------------------------- /pi_1e9.sha1sum: -------------------------------------------------------------------------------- 1 | 8ef30374165e5e4a11552a0d896e9e961ea13c33 pi_1e9.txt 2 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | Tiny Big Float library 2 | ---------------------- 3 | 4 | Copyright (c) 2017-2020 Fabrice Bellard 5 | 6 | LibBF is a small library to handle arbitrary precision binary or 7 | decimal floating point numbers. Its compiled size is about 90 KB of 8 | x86 code and has no dependency on other libraries. It is not the 9 | fastest library nor the smallest but it tries to be simple while using 10 | asymptotically optimal algorithms. The basic arithmetic operations 11 | have a near linear running time. 12 | 13 | The TinyPI example computes billions of digits of Pi using the 14 | Chudnovsky formula. 15 | 16 | 1) Features 17 | ----------- 18 | 19 | - Arbitrary precision floating point numbers in base 2 using the IEEE 20 | 754 semantics (including subnormal numbers, infinities and 21 | NaN). 22 | - All operations are exactly rounded using the 5 IEEE 754 rounding 23 | modes (round to nearest with ties to even or away from zero, round 24 | to zero, -/+ infinity). The additional non-deterministic faithful 25 | rounding mode is supported when a lower or deterministic running 26 | time is necessary. 27 | - Stateless API (each function takes as input the rounding mode, 28 | mantissa and exponent precisions in bits and return the IEEE status 29 | flags). 30 | - The basic arithmetic operations (addition, subtraction, 31 | multiplication, division, square root) have a near linear running 32 | time. 33 | - Multiplication using a SIMD optimized Number Theoretic Transform. 34 | - Exactly rounded floating point input and output in any base between 35 | 2 and 36 with near linear runnning time. Floating point output can 36 | select the smallest amount of digits to get the required precision. 37 | - Transcendental functions are supported (exp, log, pow, sin, cos, tan, 38 | asin, acos, atan, atan2). 39 | - Operations on arbitrarily large integers are supported by using a 40 | special "infinite" precision. Integer division with remainder and 41 | logical operations (assuming two complement binary representation) 42 | are implemented. 43 | - Arbitrary precision floating point numbers in base 10 corresponding 44 | to the IEEE 754 2008 semantics with the limitation that the mantissa 45 | is always normalized. The basic arithmetic operations, output and 46 | input are supported with a quadratic running time. 47 | - Easy to embed: a few C files need to be copied, the memory allocator 48 | can be redefined, the memory allocation failures are tested. 49 | - MIT license. 50 | 51 | 2) Compilation 52 | -------------- 53 | 54 | Edit the top of the Makefile to select the build options. By default, 55 | the MPFR library is used to compile the test tools (bftest and 56 | bfbench) but it is not needed to build libbf. The included SoftFP code 57 | (softfp* files) is only used by the bftest test tool. 58 | 59 | TinyPI example: the "tinypi" executable uses the portable code. The 60 | "tinypi-avx2" executable uses the AVX2 implementation. An x86 CPU of 61 | at least the Intel Haswell generation is necessary for AVX2. 62 | 63 | 3) Design principles 64 | -------------------- 65 | 66 | - Base 2 and IEEE 754 semantics were chosen so that it is possible to 67 | get good performance and to compare the results with other libraries 68 | or hardware implementations. Moreover, base 2 arbitrary precision is 69 | easier to analyse and implement. 70 | 71 | - The support of subnormal numbers and of a configurable number of 72 | bits for the exponent allows the exact emulation of IEEE 754 73 | floating hardware. 74 | 75 | - The stateless API ensures that there is no global state to save and 76 | restore between operations. The rounding mode, subnormal flag and 77 | number of exponent bits are ored to a single "flags" parameter to 78 | limit the verbosity of the API. The number of exponent bits 'n' is 79 | specified as '(M-n)' where M is the maximum number of exponent bits 80 | so that '0' always indicates the maximum number of exponent bits. 81 | 82 | - All the IEEE 754 status flags are returned by each operation. The 83 | user can easily or them when necessary. 84 | 85 | - Unlike other libraries (such as MPFR [2]), the numbers have no 86 | attached precision. The general rule is that each operation is 87 | internally computed with infinite precision and then rounded with 88 | the precision and rounding mode specified for the operation. 89 | 90 | - In many computations it is necessary to use arbitrarily large 91 | integers. LibBF support them without adding another number type by 92 | providing a special "infinite" precision. There is a small overhead 93 | of course because they are manipulated as floating point numbers but 94 | there is no cost to convert between floating point numbers and 95 | integers. 96 | 97 | - The faithful rounding mode (i.e. the result is rounded to - or 98 | +infinity non deterministically) is supported for all operations. It 99 | usually gives a faster and deterministic running time. The 100 | transcendental functions, inverse or inverse square root are 101 | internally implemented to give a faithful rounding. When a 102 | non-faithful rounding is requested by the user, the Ziv rounding 103 | algorithm is invoked. 104 | 105 | 4) Implementation notes 106 | ----------------------- 107 | 108 | - The code was tested on a 64 bit x86 CPU. It should be portable to 109 | other CPUs. The portable version handles numbers with up to 4*10^16 110 | digits. The AVX2 version handles numbers with up to 8*10^12 digits. 111 | 112 | - 32 bits: the code compiles on 32 bit architectures but it is not 113 | designed to be efficient nor scalable in this case. The size of the 114 | numbers is limited to about 10 million digits. 115 | 116 | - The Number Theoretic Transform is not the fastest algorithm for 117 | small to medium numbers (i.e. a few million digits), but it gets 118 | better when the size of the numbers grows. There is no round-off 119 | errors as with Fast Fourier Transform, the memory usage is much 120 | smaller and it is potentially easier to parallelize. This code 121 | contains an original SIMD (AVX2 on x86) implementation using 64 bit 122 | floating point numbers. It relies on the fact that the fused 123 | multiply accumulate (FMA) operation gives access to the full 124 | precision of the product of two 64 bit floating point numbers. The 125 | portable code relies on the fact that the C compiler supports a 126 | double word integer type (i.e. 128 bit integers on 64 bit). The 127 | modulo operations were replaced with multiplications which are 128 | usually faster. 129 | 130 | - Base conversion: the algorithm is not the fastest one but it is 131 | simple and still gives a near linear running time. 132 | 133 | - This library reuses some ideas from TachusPI ( 134 | http://bellard.org/pi/pi2700e9/tpi.html ) . It is about 4 times 135 | slower to compute Pi but is much smaller and simpler. 136 | 137 | 5) Known limitations 138 | -------------------- 139 | 140 | - In some operations (such as the transcendental ones), there is no 141 | rigourous proof of the rounding error. We expect to improve it by 142 | reusing ideas from the MPFR algorithms. Some unlikely 143 | overflow/underflow cases are also not handled in exp or pow. 144 | 145 | - The transcendental operations are not speed optimized and do not use 146 | an asymptotically optimal algorithm (the running time is in 147 | O(n^(1/2)*M(n)) where M(n) is the time to multiply two n bit 148 | numbers). A possible solution would be to implement a binary 149 | splitting algorithm for exp and sin/cos (see [1]) and to use a 150 | Newton based inversion to get log and atan. 151 | 152 | - Memory allocation errors are not always correctly reported for the 153 | transcendental operations. 154 | 155 | 6) References 156 | ------------- 157 | 158 | [1] Modern Computer Arithmetic, Richard Brent and Paul Zimmermann, 159 | Cambridge University Press, 2010 160 | (https://members.loria.fr/PZimmermann/mca/pub226.html). 161 | 162 | [2] The GNU MPFR Library (http://www.mpfr.org/) 163 | -------------------------------------------------------------------------------- /softfp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * SoftFP Library 3 | * 4 | * Copyright (c) 2016 Fabrice Bellard 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | * THE SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "cutils.h" 30 | #include "softfp.h" 31 | 32 | static inline int clz_u32(uint32_t a) 33 | { 34 | int r; 35 | if (a == 0) { 36 | r = 32; 37 | } else { 38 | r = __builtin_clz(a); 39 | } 40 | return r; 41 | } 42 | 43 | static inline int clz_u64(uint64_t a) 44 | { 45 | int r; 46 | if (a == 0) { 47 | r = 64; 48 | } else 49 | { 50 | r = __builtin_clzll(a); 51 | } 52 | return r; 53 | } 54 | 55 | #ifdef HAVE_INT128 56 | static inline int clz_u128(uint128_t a) 57 | { 58 | int r; 59 | if (a == 0) { 60 | r = 128; 61 | } else 62 | { 63 | uint64_t ah, al; 64 | ah = a >> 64; 65 | al = a; 66 | if (ah != 0) 67 | r = __builtin_clzll(ah); 68 | else 69 | r = __builtin_clzll(al) + 64; 70 | } 71 | return r; 72 | } 73 | #endif 74 | 75 | #define F_SIZE 32 76 | #include "softfp_template.h" 77 | 78 | #define F_SIZE 64 79 | #include "softfp_template.h" 80 | 81 | #ifdef HAVE_INT128 82 | 83 | #define F_SIZE 128 84 | #include "softfp_template.h" 85 | 86 | #endif 87 | 88 | -------------------------------------------------------------------------------- /softfp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SoftFP Library 3 | * 4 | * Copyright (c) 2016 Fabrice Bellard 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | * THE SOFTWARE. 23 | */ 24 | #ifndef SOFTFP_H 25 | #define SOFTFP_H 26 | 27 | #include 28 | #include "cutils.h" 29 | 30 | typedef enum { 31 | RM_RNE, /* Round to Nearest, ties to Even */ 32 | RM_RTZ, /* Round towards Zero */ 33 | RM_RDN, /* Round Down */ 34 | RM_RUP, /* Round Up */ 35 | RM_RMM, /* Round to Nearest, ties to Max Magnitude */ 36 | } RoundingModeEnum; 37 | 38 | #define FFLAG_INVALID_OP (1 << 4) 39 | #define FFLAG_DIVIDE_ZERO (1 << 3) 40 | #define FFLAG_OVERFLOW (1 << 2) 41 | #define FFLAG_UNDERFLOW (1 << 1) 42 | #define FFLAG_INEXACT (1 << 0) 43 | 44 | #define FCLASS_NINF (1 << 0) 45 | #define FCLASS_NNORMAL (1 << 1) 46 | #define FCLASS_NSUBNORMAL (1 << 2) 47 | #define FCLASS_NZERO (1 << 3) 48 | #define FCLASS_PZERO (1 << 4) 49 | #define FCLASS_PSUBNORMAL (1 << 5) 50 | #define FCLASS_PNORMAL (1 << 6) 51 | #define FCLASS_PINF (1 << 7) 52 | #define FCLASS_SNAN (1 << 8) 53 | #define FCLASS_QNAN (1 << 9) 54 | 55 | typedef uint32_t sfloat32; 56 | typedef uint64_t sfloat64; 57 | #ifdef HAVE_INT128 58 | typedef uint128_t sfloat128; 59 | #endif 60 | 61 | /* 32 bit floats */ 62 | 63 | #define FSIGN_MASK32 (1 << 31) 64 | 65 | sfloat32 add_sf32(sfloat32 a, sfloat32 b, RoundingModeEnum rm, uint32_t *pfflags); 66 | sfloat32 sub_sf32(sfloat32 a, sfloat32 b, RoundingModeEnum rm, uint32_t *pfflags); 67 | sfloat32 mul_sf32(sfloat32 a, sfloat32 b, RoundingModeEnum rm, uint32_t *pfflags); 68 | sfloat32 div_sf32(sfloat32 a, sfloat32 b, RoundingModeEnum rm, uint32_t *pfflags); 69 | sfloat32 sqrt_sf32(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags); 70 | sfloat32 fma_sf32(sfloat32 a, sfloat32 b, sfloat32 c, RoundingModeEnum rm, uint32_t *pfflags); 71 | 72 | sfloat32 min_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags); 73 | sfloat32 max_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags); 74 | int eq_quiet_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags); 75 | int le_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags); 76 | int lt_sf32(sfloat32 a, sfloat32 b, uint32_t *pfflags); 77 | uint32_t fclass_sf32(sfloat32 a); 78 | 79 | sfloat64 cvt_sf32_sf64(sfloat32 a, uint32_t *pfflags); 80 | sfloat32 cvt_sf64_sf32(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 81 | int32_t cvt_sf32_i32(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags); 82 | uint32_t cvt_sf32_u32(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags); 83 | int64_t cvt_sf32_i64(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags); 84 | uint64_t cvt_sf32_u64(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags); 85 | #ifdef HAVE_INT128 86 | int128_t cvt_sf32_i128(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags); 87 | uint128_t cvt_sf32_u128(sfloat32 a, RoundingModeEnum rm, uint32_t *pfflags); 88 | #endif 89 | sfloat32 cvt_i32_sf32(int32_t a, RoundingModeEnum rm, uint32_t *pfflags); 90 | sfloat32 cvt_u32_sf32(uint32_t a, RoundingModeEnum rm, uint32_t *pfflags); 91 | sfloat32 cvt_i64_sf32(int64_t a, RoundingModeEnum rm, uint32_t *pfflags); 92 | sfloat32 cvt_u64_sf32(uint64_t a, RoundingModeEnum rm, uint32_t *pfflags); 93 | #ifdef HAVE_INT128 94 | sfloat32 cvt_i128_sf32(int128_t a, RoundingModeEnum rm, uint32_t *pfflags); 95 | sfloat32 cvt_u128_sf32(uint128_t a, RoundingModeEnum rm, uint32_t *pfflags); 96 | #endif 97 | 98 | /* 64 bit floats */ 99 | 100 | #define FSIGN_MASK64 ((uint64_t)1 << 63) 101 | 102 | sfloat64 add_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags); 103 | sfloat64 sub_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags); 104 | sfloat64 mul_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags); 105 | sfloat64 div_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags); 106 | sfloat64 sqrt_sf64(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 107 | sfloat64 fma_sf64(sfloat64 a, sfloat64 b, sfloat64 c, RoundingModeEnum rm, uint32_t *pfflags); 108 | 109 | sfloat64 min_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags); 110 | sfloat64 max_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags); 111 | int eq_quiet_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags); 112 | int le_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags); 113 | int lt_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags); 114 | uint32_t fclass_sf64(sfloat64 a); 115 | 116 | sfloat64 cvt_sf32_sf64(sfloat32 a, uint32_t *pfflags); 117 | sfloat32 cvt_sf64_sf32(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 118 | int32_t cvt_sf64_i32(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 119 | uint32_t cvt_sf64_u32(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 120 | int64_t cvt_sf64_i64(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 121 | uint64_t cvt_sf64_u64(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 122 | #ifdef HAVE_INT128 123 | int128_t cvt_sf64_i128(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 124 | uint128_t cvt_sf64_u128(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 125 | #endif 126 | sfloat64 cvt_i32_sf64(int32_t a, RoundingModeEnum rm, uint32_t *pfflags); 127 | sfloat64 cvt_u32_sf64(uint32_t a, RoundingModeEnum rm, uint32_t *pfflags); 128 | sfloat64 cvt_i64_sf64(int64_t a, RoundingModeEnum rm, uint32_t *pfflags); 129 | sfloat64 cvt_u64_sf64(uint64_t a, RoundingModeEnum rm, uint32_t *pfflags); 130 | #ifdef HAVE_INT128 131 | sfloat64 cvt_i128_sf64(int128_t a, RoundingModeEnum rm, uint32_t *pfflags); 132 | sfloat64 cvt_u128_sf64(uint128_t a, RoundingModeEnum rm, uint32_t *pfflags); 133 | #endif 134 | 135 | /* 128 bit floats */ 136 | 137 | #ifdef HAVE_INT128 138 | 139 | #define FSIGN_MASK128 ((uint128_t)1 << 127) 140 | 141 | sfloat128 add_sf128(sfloat128 a, sfloat128 b, RoundingModeEnum rm, uint32_t *pfflags); 142 | sfloat128 sub_sf128(sfloat128 a, sfloat128 b, RoundingModeEnum rm, uint32_t *pfflags); 143 | sfloat128 mul_sf128(sfloat128 a, sfloat128 b, RoundingModeEnum rm, uint32_t *pfflags); 144 | sfloat128 div_sf128(sfloat128 a, sfloat128 b, RoundingModeEnum rm, uint32_t *pfflags); 145 | sfloat128 sqrt_sf128(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags); 146 | sfloat128 fma_sf128(sfloat128 a, sfloat128 b, sfloat128 c, RoundingModeEnum rm, uint32_t *pfflags); 147 | 148 | sfloat128 min_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags); 149 | sfloat128 max_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags); 150 | int eq_quiet_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags); 151 | int le_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags); 152 | int lt_sf128(sfloat128 a, sfloat128 b, uint32_t *pfflags); 153 | uint32_t fclass_sf128(sfloat128 a); 154 | 155 | sfloat128 cvt_sf32_sf128(sfloat32 a, uint32_t *pfflags); 156 | sfloat32 cvt_sf128_sf32(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags); 157 | sfloat128 cvt_sf64_sf128(sfloat64 a, uint32_t *pfflags); 158 | sfloat64 cvt_sf128_sf64(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags); 159 | 160 | int32_t cvt_sf128_i32(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags); 161 | uint32_t cvt_sf128_u32(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags); 162 | int64_t cvt_sf128_i64(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags); 163 | uint64_t cvt_sf128_u64(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags); 164 | int128_t cvt_sf128_i128(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags); 165 | uint128_t cvt_sf128_u128(sfloat128 a, RoundingModeEnum rm, uint32_t *pfflags); 166 | sfloat128 cvt_i32_sf128(int32_t a, RoundingModeEnum rm, uint32_t *pfflags); 167 | sfloat128 cvt_u32_sf128(uint32_t a, RoundingModeEnum rm, uint32_t *pfflags); 168 | sfloat128 cvt_i64_sf128(int64_t a, RoundingModeEnum rm, uint32_t *pfflags); 169 | sfloat128 cvt_u64_sf128(uint64_t a, RoundingModeEnum rm, uint32_t *pfflags); 170 | sfloat128 cvt_i128_sf128(int128_t a, RoundingModeEnum rm, uint32_t *pfflags); 171 | sfloat128 cvt_u128_sf128(uint128_t a, RoundingModeEnum rm, uint32_t *pfflags); 172 | 173 | #endif 174 | 175 | #endif /* SOFTFP_H */ 176 | -------------------------------------------------------------------------------- /softfp_template.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SoftFP Library 3 | * 4 | * Copyright (c) 2016 Fabrice Bellard 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | * THE SOFTWARE. 23 | */ 24 | #if F_SIZE == 32 25 | #define F_UINT uint32_t 26 | #define F_ULONG uint64_t 27 | #define MANT_SIZE 23 28 | #define EXP_SIZE 8 29 | #elif F_SIZE == 64 30 | #define F_UHALF uint32_t 31 | #define F_UINT uint64_t 32 | #ifdef HAVE_INT128 33 | #define F_ULONG uint128_t 34 | #endif 35 | #define MANT_SIZE 52 36 | #define EXP_SIZE 11 37 | #elif F_SIZE == 128 38 | #define F_UHALF uint64_t 39 | #define F_UINT uint128_t 40 | #define MANT_SIZE 112 41 | #define EXP_SIZE 15 42 | #else 43 | #error unsupported F_SIZE 44 | #endif 45 | 46 | #define EXP_MASK ((1 << EXP_SIZE) - 1) 47 | #define MANT_MASK (((F_UINT)1 << MANT_SIZE) - 1) 48 | #define SIGN_MASK ((F_UINT)1 << (F_SIZE - 1)) 49 | #define IMANT_SIZE (F_SIZE - 2) /* internal mantissa size */ 50 | #define RND_SIZE (IMANT_SIZE - MANT_SIZE) 51 | #define QNAN_MASK ((F_UINT)1 << (MANT_SIZE - 1)) 52 | 53 | /* quiet NaN */ 54 | #define F_QNAN glue(F_QNAN, F_SIZE) 55 | #define clz glue(clz_u, F_SIZE) 56 | #define pack_sf glue(pack_sf, F_SIZE) 57 | #define unpack_sf glue(unpack_sf, F_SIZE) 58 | #define rshift_rnd glue(rshift_rnd, F_SIZE) 59 | #define round_pack_sf glue(roundpack_sf, F_SIZE) 60 | #define normalize_sf glue(normalize_sf, F_SIZE) 61 | #define normalize2_sf glue(normalize2_sf, F_SIZE) 62 | #define issignan_sf glue(issignan_sf, F_SIZE) 63 | #define isnan_sf glue(isnan_sf, F_SIZE) 64 | #define add_sf glue(add_sf, F_SIZE) 65 | #define mul_sf glue(mul_sf, F_SIZE) 66 | #define fma_sf glue(fma_sf, F_SIZE) 67 | #define div_sf glue(div_sf, F_SIZE) 68 | #define sqrt_sf glue(sqrt_sf, F_SIZE) 69 | #define normalize_subnormal_sf glue(normalize_subnormal_sf, F_SIZE) 70 | #define divrem_u glue(divrem_u, F_SIZE) 71 | #define sqrtrem_u glue(sqrtrem_u, F_SIZE) 72 | #define mul_u glue(mul_u, F_SIZE) 73 | #define cvt_sf32_sf glue(cvt_sf32_sf, F_SIZE) 74 | #define cvt_sf64_sf glue(cvt_sf64_sf, F_SIZE) 75 | 76 | static const F_UINT F_QNAN = (((F_UINT)EXP_MASK << MANT_SIZE) | ((F_UINT)1 << (MANT_SIZE - 1))); 77 | 78 | static inline F_UINT pack_sf(uint32_t a_sign, uint32_t a_exp, F_UINT a_mant) 79 | { 80 | return ((F_UINT)a_sign << (F_SIZE - 1)) | 81 | ((F_UINT)a_exp << MANT_SIZE) | 82 | (a_mant & MANT_MASK); 83 | } 84 | 85 | static inline F_UINT unpack_sf(uint32_t *pa_sign, int32_t *pa_exp, 86 | F_UINT a) 87 | { 88 | *pa_sign = a >> (F_SIZE - 1); 89 | *pa_exp = (a >> MANT_SIZE) & EXP_MASK; 90 | return a & MANT_MASK; 91 | } 92 | 93 | static F_UINT rshift_rnd(F_UINT a, int d) 94 | { 95 | F_UINT mask; 96 | if (d != 0) { 97 | if (d >= F_SIZE) { 98 | a = (a != 0); 99 | } else { 100 | mask = ((F_UINT)1 << d) - 1; 101 | a = (a >> d) | ((a & mask) != 0); 102 | } 103 | } 104 | return a; 105 | } 106 | 107 | /* a_mant is considered to have its MSB at F_SIZE - 2 bits */ 108 | static F_UINT round_pack_sf(uint32_t a_sign, int a_exp, F_UINT a_mant, 109 | RoundingModeEnum rm, uint32_t *pfflags) 110 | { 111 | int diff; 112 | uint32_t addend, rnd_bits; 113 | 114 | switch(rm) { 115 | case RM_RNE: 116 | case RM_RMM: 117 | addend = (1 << (RND_SIZE - 1)); 118 | break; 119 | case RM_RTZ: 120 | addend = 0; 121 | break; 122 | default: 123 | case RM_RDN: 124 | case RM_RUP: 125 | // printf("s=%d rm=%d m=%x\n", a_sign, rm, a_mant); 126 | if (a_sign ^ (rm & 1)) 127 | addend = (1 << RND_SIZE) - 1; 128 | else 129 | addend = 0; 130 | break; 131 | } 132 | 133 | /* potentially subnormal */ 134 | if (a_exp <= 0) { 135 | BOOL is_subnormal; 136 | /* Note: we set the underflow flag if the rounded result 137 | is subnormal and inexact */ 138 | is_subnormal = (a_exp < 0 || 139 | (a_mant + addend) < ((F_UINT)1 << (F_SIZE - 1))); 140 | diff = 1 - a_exp; 141 | a_mant = rshift_rnd(a_mant, diff); 142 | rnd_bits = a_mant & ((1 << RND_SIZE ) - 1); 143 | if (is_subnormal && rnd_bits != 0) { 144 | *pfflags |= FFLAG_UNDERFLOW; 145 | } 146 | a_exp = 1; 147 | } else { 148 | rnd_bits = a_mant & ((1 << RND_SIZE ) - 1); 149 | } 150 | if (rnd_bits != 0) 151 | *pfflags |= FFLAG_INEXACT; 152 | a_mant = (a_mant + addend) >> RND_SIZE; 153 | /* half way: select even result */ 154 | if (rm == RM_RNE && rnd_bits == (1 << (RND_SIZE - 1))) 155 | a_mant &= ~1; 156 | /* Note the rounding adds at least 1, so this is the maximum 157 | value */ 158 | a_exp += a_mant >> (MANT_SIZE + 1); 159 | if (a_mant <= MANT_MASK) { 160 | /* denormalized or zero */ 161 | a_exp = 0; 162 | } else if (a_exp >= EXP_MASK) { 163 | /* overflow */ 164 | if (addend == 0) { 165 | a_exp = EXP_MASK - 1; 166 | a_mant = MANT_MASK; 167 | } else { 168 | /* infinity */ 169 | a_exp = EXP_MASK; 170 | a_mant = 0; 171 | } 172 | *pfflags |= FFLAG_OVERFLOW | FFLAG_INEXACT; 173 | } 174 | return pack_sf(a_sign, a_exp, a_mant); 175 | } 176 | 177 | /* a_mant is considered to have at most F_SIZE - 1 bits */ 178 | static F_UINT normalize_sf(uint32_t a_sign, int a_exp, F_UINT a_mant, 179 | RoundingModeEnum rm, uint32_t *pfflags) 180 | { 181 | int shift; 182 | shift = clz(a_mant) - (F_SIZE - 1 - IMANT_SIZE); 183 | assert(shift >= 0); 184 | a_exp -= shift; 185 | a_mant <<= shift; 186 | return round_pack_sf(a_sign, a_exp, a_mant, rm, pfflags); 187 | } 188 | 189 | /* same as normalize_sf() but with a double word mantissa. a_mant1 is 190 | considered to have at most F_SIZE - 1 bits */ 191 | static F_UINT normalize2_sf(uint32_t a_sign, int a_exp, F_UINT a_mant1, F_UINT a_mant0, 192 | RoundingModeEnum rm, uint32_t *pfflags) 193 | { 194 | int l, shift; 195 | if (a_mant1 == 0) { 196 | l = F_SIZE + clz(a_mant0); 197 | } else { 198 | l = clz(a_mant1); 199 | } 200 | shift = l - (F_SIZE - 1 - IMANT_SIZE); 201 | assert(shift >= 0); 202 | a_exp -= shift; 203 | if (shift == 0) { 204 | a_mant1 |= (a_mant0 != 0); 205 | } else if (shift < F_SIZE) { 206 | a_mant1 = (a_mant1 << shift) | (a_mant0 >> (F_SIZE - shift)); 207 | a_mant0 <<= shift; 208 | a_mant1 |= (a_mant0 != 0); 209 | } else { 210 | a_mant1 = a_mant0 << (shift - F_SIZE); 211 | } 212 | return round_pack_sf(a_sign, a_exp, a_mant1, rm, pfflags); 213 | } 214 | 215 | BOOL issignan_sf(F_UINT a) 216 | { 217 | uint32_t a_exp1; 218 | F_UINT a_mant; 219 | a_exp1 = (a >> (MANT_SIZE - 1)) & ((1 << (EXP_SIZE + 1)) - 1); 220 | a_mant = a & MANT_MASK; 221 | return (a_exp1 == (2 * EXP_MASK) && a_mant != 0); 222 | } 223 | 224 | BOOL isnan_sf(F_UINT a) 225 | { 226 | uint32_t a_exp; 227 | F_UINT a_mant; 228 | a_exp = (a >> MANT_SIZE) & EXP_MASK; 229 | a_mant = a & MANT_MASK; 230 | return (a_exp == EXP_MASK && a_mant != 0); 231 | } 232 | 233 | 234 | F_UINT add_sf(F_UINT a, F_UINT b, RoundingModeEnum rm, 235 | uint32_t *pfflags) 236 | { 237 | uint32_t a_sign, b_sign, a_exp, b_exp; 238 | F_UINT tmp, a_mant, b_mant; 239 | 240 | /* swap so that abs(a) >= abs(b) */ 241 | if ((a & ~SIGN_MASK) < (b & ~SIGN_MASK)) { 242 | tmp = a; 243 | a = b; 244 | b = tmp; 245 | } 246 | a_sign = a >> (F_SIZE - 1); 247 | b_sign = b >> (F_SIZE - 1); 248 | a_exp = (a >> MANT_SIZE) & EXP_MASK; 249 | b_exp = (b >> MANT_SIZE) & EXP_MASK; 250 | a_mant = (a & MANT_MASK) << 3; 251 | b_mant = (b & MANT_MASK) << 3; 252 | if (unlikely(a_exp == EXP_MASK)) { 253 | if (a_mant != 0) { 254 | /* NaN result */ 255 | if (!(a_mant & (QNAN_MASK << 3)) || issignan_sf(b)) 256 | *pfflags |= FFLAG_INVALID_OP; 257 | return F_QNAN; 258 | } else if (b_exp == EXP_MASK && a_sign != b_sign) { 259 | *pfflags |= FFLAG_INVALID_OP; 260 | return F_QNAN; 261 | } else { 262 | /* infinity */ 263 | return a; 264 | } 265 | } 266 | if (a_exp == 0) { 267 | a_exp = 1; 268 | } else { 269 | a_mant |= (F_UINT)1 << (MANT_SIZE + 3); 270 | } 271 | if (b_exp == 0) { 272 | b_exp = 1; 273 | } else { 274 | b_mant |= (F_UINT)1 << (MANT_SIZE + 3); 275 | } 276 | b_mant = rshift_rnd(b_mant, a_exp - b_exp); 277 | if (a_sign == b_sign) { 278 | /* same signs : add the absolute values */ 279 | a_mant += b_mant; 280 | } else { 281 | /* different signs : subtract the absolute values */ 282 | a_mant -= b_mant; 283 | if (a_mant == 0) { 284 | /* zero result : the sign needs a specific handling */ 285 | a_sign = (rm == RM_RDN); 286 | } 287 | } 288 | a_exp += (RND_SIZE - 3); 289 | return normalize_sf(a_sign, a_exp, a_mant, rm, pfflags); 290 | } 291 | 292 | F_UINT glue(sub_sf, F_SIZE)(F_UINT a, F_UINT b, RoundingModeEnum rm, 293 | uint32_t *pfflags) 294 | { 295 | return add_sf(a, b ^ SIGN_MASK, rm, pfflags); 296 | } 297 | 298 | static inline F_UINT normalize_subnormal_sf(int32_t *pa_exp, F_UINT a_mant) 299 | { 300 | int shift; 301 | shift = MANT_SIZE - ((F_SIZE - 1 - clz(a_mant))); 302 | *pa_exp = 1 - shift; 303 | return a_mant << shift; 304 | } 305 | 306 | #ifdef F_ULONG 307 | 308 | static F_UINT mul_u(F_UINT *plow, F_UINT a, F_UINT b) 309 | { 310 | F_ULONG r; 311 | r = (F_ULONG)a * (F_ULONG)b; 312 | *plow = r; 313 | return r >> F_SIZE; 314 | } 315 | 316 | #else 317 | 318 | #define FH_SIZE (F_SIZE / 2) 319 | 320 | static F_UINT mul_u(F_UINT *plow, F_UINT a, F_UINT b) 321 | { 322 | F_UHALF a0, a1, b0, b1, r0, r1, r2, r3; 323 | F_UINT r00, r01, r10, r11, c; 324 | a0 = a; 325 | a1 = a >> FH_SIZE; 326 | b0 = b; 327 | b1 = b >> FH_SIZE; 328 | 329 | r00 = (F_UINT)a0 * (F_UINT)b0; 330 | r01 = (F_UINT)a0 * (F_UINT)b1; 331 | r10 = (F_UINT)a1 * (F_UINT)b0; 332 | r11 = (F_UINT)a1 * (F_UINT)b1; 333 | 334 | r0 = r00; 335 | c = (r00 >> FH_SIZE) + (F_UHALF)r01 + (F_UHALF)r10; 336 | r1 = c; 337 | c = (c >> FH_SIZE) + (r01 >> FH_SIZE) + (r10 >> FH_SIZE) + (F_UHALF)r11; 338 | r2 = c; 339 | r3 = (c >> FH_SIZE) + (r11 >> FH_SIZE); 340 | 341 | *plow = ((F_UINT)r1 << FH_SIZE) | r0; 342 | return ((F_UINT)r3 << FH_SIZE) | r2; 343 | } 344 | 345 | #undef FH_SIZE 346 | 347 | #endif 348 | 349 | F_UINT mul_sf(F_UINT a, F_UINT b, RoundingModeEnum rm, 350 | uint32_t *pfflags) 351 | { 352 | uint32_t a_sign, b_sign, r_sign; 353 | int32_t a_exp, b_exp, r_exp; 354 | F_UINT a_mant, b_mant, r_mant, r_mant_low; 355 | 356 | a_sign = a >> (F_SIZE - 1); 357 | b_sign = b >> (F_SIZE - 1); 358 | r_sign = a_sign ^ b_sign; 359 | a_exp = (a >> MANT_SIZE) & EXP_MASK; 360 | b_exp = (b >> MANT_SIZE) & EXP_MASK; 361 | a_mant = a & MANT_MASK; 362 | b_mant = b & MANT_MASK; 363 | if (a_exp == EXP_MASK || b_exp == EXP_MASK) { 364 | if (isnan_sf(a) || isnan_sf(b)) { 365 | if (issignan_sf(a) || issignan_sf(b)) { 366 | *pfflags |= FFLAG_INVALID_OP; 367 | } 368 | return F_QNAN; 369 | } else { 370 | /* infinity */ 371 | if ((a_exp == EXP_MASK && (b_exp == 0 && b_mant == 0)) || 372 | (b_exp == EXP_MASK && (a_exp == 0 && a_mant == 0))) { 373 | *pfflags |= FFLAG_INVALID_OP; 374 | return F_QNAN; 375 | } else { 376 | return pack_sf(r_sign, EXP_MASK, 0); 377 | } 378 | } 379 | } 380 | if (a_exp == 0) { 381 | if (a_mant == 0) 382 | return pack_sf(r_sign, 0, 0); /* zero */ 383 | a_mant = normalize_subnormal_sf(&a_exp, a_mant); 384 | } else { 385 | a_mant |= (F_UINT)1 << MANT_SIZE; 386 | } 387 | if (b_exp == 0) { 388 | if (b_mant == 0) 389 | return pack_sf(r_sign, 0, 0); /* zero */ 390 | b_mant = normalize_subnormal_sf(&b_exp, b_mant); 391 | } else { 392 | b_mant |= (F_UINT)1 << MANT_SIZE; 393 | } 394 | r_exp = a_exp + b_exp - (1 << (EXP_SIZE - 1)) + 2; 395 | 396 | r_mant = mul_u(&r_mant_low,a_mant << RND_SIZE, b_mant << (RND_SIZE + 1)); 397 | r_mant |= (r_mant_low != 0); 398 | return normalize_sf(r_sign, r_exp, r_mant, rm, pfflags); 399 | } 400 | 401 | /* fused multiply and add */ 402 | F_UINT fma_sf(F_UINT a, F_UINT b, F_UINT c, RoundingModeEnum rm, 403 | uint32_t *pfflags) 404 | { 405 | uint32_t a_sign, b_sign, c_sign, r_sign; 406 | int32_t a_exp, b_exp, c_exp, r_exp, shift; 407 | F_UINT a_mant, b_mant, c_mant, r_mant1, r_mant0, c_mant1, c_mant0, mask; 408 | 409 | a_sign = a >> (F_SIZE - 1); 410 | b_sign = b >> (F_SIZE - 1); 411 | c_sign = c >> (F_SIZE - 1); 412 | r_sign = a_sign ^ b_sign; 413 | a_exp = (a >> MANT_SIZE) & EXP_MASK; 414 | b_exp = (b >> MANT_SIZE) & EXP_MASK; 415 | c_exp = (c >> MANT_SIZE) & EXP_MASK; 416 | a_mant = a & MANT_MASK; 417 | b_mant = b & MANT_MASK; 418 | c_mant = c & MANT_MASK; 419 | if (a_exp == EXP_MASK || b_exp == EXP_MASK || c_exp == EXP_MASK) { 420 | if (isnan_sf(a) || isnan_sf(b) || isnan_sf(c)) { 421 | if (issignan_sf(a) || issignan_sf(b) || issignan_sf(c)) { 422 | *pfflags |= FFLAG_INVALID_OP; 423 | } 424 | return F_QNAN; 425 | } else { 426 | /* infinities */ 427 | if ((a_exp == EXP_MASK && (b_exp == 0 && b_mant == 0)) || 428 | (b_exp == EXP_MASK && (a_exp == 0 && a_mant == 0)) || 429 | ((a_exp == EXP_MASK || b_exp == EXP_MASK) && 430 | (c_exp == EXP_MASK && r_sign != c_sign))) { 431 | *pfflags |= FFLAG_INVALID_OP; 432 | return F_QNAN; 433 | } else if (c_exp == EXP_MASK) { 434 | return pack_sf(c_sign, EXP_MASK, 0); 435 | } else { 436 | return pack_sf(r_sign, EXP_MASK, 0); 437 | } 438 | } 439 | } 440 | if (a_exp == 0) { 441 | if (a_mant == 0) 442 | goto mul_zero; 443 | a_mant = normalize_subnormal_sf(&a_exp, a_mant); 444 | } else { 445 | a_mant |= (F_UINT)1 << MANT_SIZE; 446 | } 447 | if (b_exp == 0) { 448 | if (b_mant == 0) { 449 | mul_zero: 450 | if (c_exp == 0 && c_mant == 0) { 451 | if (c_sign != r_sign) 452 | r_sign = (rm == RM_RDN); 453 | return pack_sf(r_sign, 0, 0); 454 | } else { 455 | return c; 456 | } 457 | } 458 | b_mant = normalize_subnormal_sf(&b_exp, b_mant); 459 | } else { 460 | b_mant |= (F_UINT)1 << MANT_SIZE; 461 | } 462 | /* multiply */ 463 | r_exp = a_exp + b_exp - (1 << (EXP_SIZE - 1)) + 3; 464 | 465 | r_mant1 = mul_u(&r_mant0, a_mant << RND_SIZE, b_mant << RND_SIZE); 466 | /* normalize to F_SIZE - 3 */ 467 | if (r_mant1 < ((F_UINT)1 << (F_SIZE - 3))) { 468 | r_mant1 = (r_mant1 << 1) | (r_mant0 >> (F_SIZE - 1)); 469 | r_mant0 <<= 1; 470 | r_exp--; 471 | } 472 | 473 | /* add */ 474 | if (c_exp == 0) { 475 | if (c_mant == 0) { 476 | /* add zero */ 477 | r_mant1 |= (r_mant0 != 0); 478 | return normalize_sf(r_sign, r_exp, r_mant1, rm, pfflags); 479 | } 480 | c_mant = normalize_subnormal_sf(&c_exp, c_mant); 481 | } else { 482 | c_mant |= (F_UINT)1 << MANT_SIZE; 483 | } 484 | c_exp++; 485 | c_mant1 = c_mant << (RND_SIZE - 1); 486 | c_mant0 = 0; 487 | 488 | // printf("r_s=%d r_exp=%d r_mant=%08x %08x\n", r_sign, r_exp, (uint32_t)r_mant1, (uint32_t)r_mant0); 489 | // printf("c_s=%d c_exp=%d c_mant=%08x %08x\n", c_sign, c_exp, (uint32_t)c_mant1, (uint32_t)c_mant0); 490 | 491 | /* ensure that abs(r) >= abs(c) */ 492 | if (!(r_exp > c_exp || (r_exp == c_exp && r_mant1 >= c_mant1))) { 493 | F_UINT tmp; 494 | int32_t c_tmp; 495 | /* swap */ 496 | tmp = r_mant1; r_mant1 = c_mant1; c_mant1 = tmp; 497 | tmp = r_mant0; r_mant0 = c_mant0; c_mant0 = tmp; 498 | c_tmp = r_exp; r_exp = c_exp; c_exp = c_tmp; 499 | c_tmp = r_sign; r_sign = c_sign; c_sign = c_tmp; 500 | } 501 | /* right shift c_mant */ 502 | shift = r_exp - c_exp; 503 | if (shift >= 2 * F_SIZE) { 504 | c_mant0 = (c_mant0 | c_mant1) != 0; 505 | c_mant1 = 0; 506 | } else if (shift >= F_SIZE + 1) { 507 | c_mant0 = rshift_rnd(c_mant1, shift - F_SIZE); 508 | c_mant1 = 0; 509 | } else if (shift == F_SIZE) { 510 | c_mant0 = c_mant1 | (c_mant0 != 0); 511 | c_mant1 = 0; 512 | } else if (shift != 0) { 513 | mask = ((F_UINT)1 << shift) - 1; 514 | c_mant0 = (c_mant1 << (F_SIZE - shift)) | (c_mant0 >> shift) | ((c_mant0 & mask) != 0); 515 | c_mant1 = c_mant1 >> shift; 516 | } 517 | // printf(" r_mant=%08x %08x\n", (uint32_t)r_mant1, (uint32_t)r_mant0); 518 | // printf(" c_mant=%08x %08x\n", (uint32_t)c_mant1, (uint32_t)c_mant0); 519 | /* add or subtract */ 520 | if (r_sign == c_sign) { 521 | r_mant0 += c_mant0; 522 | r_mant1 += c_mant1 + (r_mant0 < c_mant0); 523 | } else { 524 | F_UINT tmp; 525 | tmp = r_mant0; 526 | r_mant0 -= c_mant0; 527 | r_mant1 = r_mant1 - c_mant1 - (r_mant0 > tmp); 528 | if ((r_mant0 | r_mant1) == 0) { 529 | /* zero result : the sign needs a specific handling */ 530 | r_sign = (rm == RM_RDN); 531 | } 532 | } 533 | #if 0 534 | // printf(" r1_mant=%08x %08x\n", (uint32_t)r_mant1, (uint32_t)r_mant0); 535 | /* normalize */ 536 | if (r_mant1 == 0) { 537 | r_mant1 = r_mant0; 538 | r_exp -= F_SIZE; 539 | } else { 540 | shift = clz(r_mant1) - (F_SIZE - 1 - IMANT_SIZE); 541 | if (shift != 0) { 542 | r_mant1 = (r_mant1 << shift) | (r_mant0 >> (F_SIZE - shift)); 543 | r_mant0 <<= shift; 544 | r_exp -= shift; 545 | } 546 | r_mant1 |= (r_mant0 != 0); 547 | } 548 | return normalize_sf(r_sign, r_exp, r_mant1, rm, pfflags); 549 | #else 550 | return normalize2_sf(r_sign, r_exp, r_mant1, r_mant0, rm, pfflags); 551 | #endif 552 | } 553 | 554 | #ifdef F_ULONG 555 | 556 | static F_UINT divrem_u(F_UINT *pr, F_UINT ah, F_UINT al, F_UINT b) 557 | { 558 | F_ULONG a; 559 | a = ((F_ULONG)ah << F_SIZE) | al; 560 | *pr = a % b; 561 | return a / b; 562 | } 563 | 564 | #else 565 | 566 | /* XXX: optimize */ 567 | static F_UINT divrem_u(F_UINT *pr, F_UINT a1, F_UINT a0, F_UINT b) 568 | { 569 | int i, qb, ab; 570 | 571 | assert(a1 < b); 572 | for(i = 0; i < F_SIZE; i++) { 573 | ab = a1 >> (F_SIZE - 1); 574 | a1 = (a1 << 1) | (a0 >> (F_SIZE - 1)); 575 | if (ab || a1 >= b) { 576 | a1 -= b; 577 | qb = 1; 578 | } else { 579 | qb = 0; 580 | } 581 | a0 = (a0 << 1) | qb; 582 | } 583 | *pr = a1; 584 | return a0; 585 | } 586 | 587 | #endif 588 | 589 | F_UINT div_sf(F_UINT a, F_UINT b, RoundingModeEnum rm, 590 | uint32_t *pfflags) 591 | { 592 | uint32_t a_sign, b_sign, r_sign; 593 | int32_t a_exp, b_exp, r_exp; 594 | F_UINT a_mant, b_mant, r_mant, r; 595 | 596 | a_sign = a >> (F_SIZE - 1); 597 | b_sign = b >> (F_SIZE - 1); 598 | r_sign = a_sign ^ b_sign; 599 | a_exp = (a >> MANT_SIZE) & EXP_MASK; 600 | b_exp = (b >> MANT_SIZE) & EXP_MASK; 601 | a_mant = a & MANT_MASK; 602 | b_mant = b & MANT_MASK; 603 | if (a_exp == EXP_MASK) { 604 | if (a_mant != 0 || isnan_sf(b)) { 605 | if (issignan_sf(a) || issignan_sf(b)) { 606 | *pfflags |= FFLAG_INVALID_OP; 607 | } 608 | return F_QNAN; 609 | } else if (b_exp == EXP_MASK) { 610 | *pfflags |= FFLAG_INVALID_OP; 611 | return F_QNAN; 612 | } else { 613 | return pack_sf(r_sign, EXP_MASK, 0); 614 | } 615 | } else if (b_exp == EXP_MASK) { 616 | if (b_mant != 0) { 617 | if (issignan_sf(a) || issignan_sf(b)) { 618 | *pfflags |= FFLAG_INVALID_OP; 619 | } 620 | return F_QNAN; 621 | } else { 622 | return pack_sf(r_sign, 0, 0); 623 | } 624 | } 625 | 626 | if (b_exp == 0) { 627 | if (b_mant == 0) { 628 | /* zero */ 629 | if (a_exp == 0 && a_mant == 0) { 630 | *pfflags |= FFLAG_INVALID_OP; 631 | return F_QNAN; 632 | } else { 633 | *pfflags |= FFLAG_DIVIDE_ZERO; 634 | return pack_sf(r_sign, EXP_MASK, 0); 635 | } 636 | } 637 | b_mant = normalize_subnormal_sf(&b_exp, b_mant); 638 | } else { 639 | b_mant |= (F_UINT)1 << MANT_SIZE; 640 | } 641 | if (a_exp == 0) { 642 | if (a_mant == 0) 643 | return pack_sf(r_sign, 0, 0); /* zero */ 644 | a_mant = normalize_subnormal_sf(&a_exp, a_mant); 645 | } else { 646 | a_mant |= (F_UINT)1 << MANT_SIZE; 647 | } 648 | r_exp = a_exp - b_exp + (1 << (EXP_SIZE - 1)) - 1; 649 | r_mant = divrem_u(&r, a_mant, 0, b_mant << 2); 650 | if (r != 0) 651 | r_mant |= 1; 652 | return normalize_sf(r_sign, r_exp, r_mant, rm, pfflags); 653 | } 654 | 655 | #ifdef F_ULONG 656 | 657 | /* compute sqrt(a) with a = ah*2^F_SIZE+al and a < 2^(F_SIZE - 2) 658 | return true if not exact square. */ 659 | static int sqrtrem_u(F_UINT *pr, F_UINT ah, F_UINT al) 660 | { 661 | F_ULONG a, u, s; 662 | int l, inexact; 663 | 664 | /* 2^l >= a */ 665 | if (ah != 0) { 666 | l = 2 * F_SIZE - clz(ah - 1); 667 | } else { 668 | if (al == 0) { 669 | *pr = 0; 670 | return 0; 671 | } 672 | l = F_SIZE - clz(al - 1); 673 | } 674 | a = ((F_ULONG)ah << F_SIZE) | al; 675 | u = (F_ULONG)1 << ((l + 1) / 2); 676 | for(;;) { 677 | s = u; 678 | u = ((a / s) + s) / 2; 679 | if (u >= s) 680 | break; 681 | } 682 | inexact = (a - s * s) != 0; 683 | *pr = s; 684 | return inexact; 685 | } 686 | 687 | #else 688 | 689 | static int sqrtrem_u(F_UINT *pr, F_UINT a1, F_UINT a0) 690 | { 691 | int l, inexact; 692 | F_UINT u, s, r, q, sq0, sq1; 693 | 694 | /* 2^l >= a */ 695 | if (a1 != 0) { 696 | l = 2 * F_SIZE - clz(a1 - 1); 697 | } else { 698 | if (a0 == 0) { 699 | *pr = 0; 700 | return 0; 701 | } 702 | l = F_SIZE - clz(a0 - 1); 703 | } 704 | u = (F_UINT)1 << ((l + 1) / 2); 705 | for(;;) { 706 | s = u; 707 | q = divrem_u(&r, a1, a0, s); 708 | u = (q + s) / 2; 709 | if (u >= s) 710 | break; 711 | } 712 | sq1 = mul_u(&sq0, s, s); 713 | inexact = (sq0 != a0 || sq1 != a1); 714 | *pr = s; 715 | return inexact; 716 | } 717 | 718 | #endif 719 | 720 | F_UINT sqrt_sf(F_UINT a, RoundingModeEnum rm, 721 | uint32_t *pfflags) 722 | { 723 | uint32_t a_sign; 724 | int32_t a_exp; 725 | F_UINT a_mant; 726 | 727 | a_sign = a >> (F_SIZE - 1); 728 | a_exp = (a >> MANT_SIZE) & EXP_MASK; 729 | a_mant = a & MANT_MASK; 730 | if (a_exp == EXP_MASK) { 731 | if (a_mant != 0) { 732 | if (issignan_sf(a)) { 733 | *pfflags |= FFLAG_INVALID_OP; 734 | } 735 | return F_QNAN; 736 | } else if (a_sign) { 737 | goto neg_error; 738 | } else { 739 | return a; /* +infinity */ 740 | } 741 | } 742 | if (a_sign) { 743 | if (a_exp == 0 && a_mant == 0) 744 | return a; /* -zero */ 745 | neg_error: 746 | *pfflags |= FFLAG_INVALID_OP; 747 | return F_QNAN; 748 | } 749 | if (a_exp == 0) { 750 | if (a_mant == 0) 751 | return pack_sf(0, 0, 0); /* zero */ 752 | a_mant = normalize_subnormal_sf(&a_exp, a_mant); 753 | } else { 754 | a_mant |= (F_UINT)1 << MANT_SIZE; 755 | } 756 | a_exp -= EXP_MASK / 2; 757 | /* simpler to handle an even exponent */ 758 | if (a_exp & 1) { 759 | a_exp--; 760 | a_mant <<= 1; 761 | } 762 | a_exp = (a_exp >> 1) + EXP_MASK / 2; 763 | a_mant <<= (F_SIZE - 4 - MANT_SIZE); 764 | if (sqrtrem_u(&a_mant, a_mant, 0)) 765 | a_mant |= 1; 766 | return normalize_sf(a_sign, a_exp, a_mant, rm, pfflags); 767 | } 768 | 769 | /* comparisons */ 770 | 771 | F_UINT glue(min_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags) 772 | { 773 | uint32_t a_sign, b_sign; 774 | 775 | if (isnan_sf(a) || isnan_sf(b)) { 776 | if (issignan_sf(a) || issignan_sf(b)) { 777 | *pfflags |= FFLAG_INVALID_OP; 778 | return F_QNAN; 779 | } else if (isnan_sf(a)) { 780 | if (isnan_sf(b)) 781 | return F_QNAN; 782 | else 783 | return b; 784 | } else { 785 | return a; 786 | } 787 | } 788 | a_sign = a >> (F_SIZE - 1); 789 | b_sign = b >> (F_SIZE - 1); 790 | 791 | if (a_sign != b_sign) { 792 | if (a_sign) 793 | return a; 794 | else 795 | return b; 796 | } else { 797 | if ((a < b) ^ a_sign) 798 | return a; 799 | else 800 | return b; 801 | } 802 | } 803 | 804 | F_UINT glue(max_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags) 805 | { 806 | uint32_t a_sign, b_sign; 807 | 808 | if (isnan_sf(a) || isnan_sf(b)) { 809 | if (issignan_sf(a) || issignan_sf(b)) { 810 | *pfflags |= FFLAG_INVALID_OP; 811 | return F_QNAN; 812 | } else if (isnan_sf(a)) { 813 | if (isnan_sf(b)) 814 | return F_QNAN; 815 | else 816 | return b; 817 | } else { 818 | return a; 819 | } 820 | } 821 | a_sign = a >> (F_SIZE - 1); 822 | b_sign = b >> (F_SIZE - 1); 823 | 824 | if (a_sign != b_sign) { 825 | if (a_sign) 826 | return b; 827 | else 828 | return a; 829 | } else { 830 | if ((a < b) ^ a_sign) 831 | return b; 832 | else 833 | return a; 834 | } 835 | } 836 | 837 | int glue(eq_quiet_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags) 838 | { 839 | if (isnan_sf(a) || isnan_sf(b)) { 840 | if (issignan_sf(a) || issignan_sf(b)) { 841 | *pfflags |= FFLAG_INVALID_OP; 842 | } 843 | return 0; 844 | } 845 | 846 | if ((F_UINT)((a | b) << 1) == 0) 847 | return 1; /* zero case */ 848 | return (a == b); 849 | } 850 | 851 | int glue(le_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags) 852 | { 853 | uint32_t a_sign, b_sign; 854 | 855 | if (isnan_sf(a) || isnan_sf(b)) { 856 | *pfflags |= FFLAG_INVALID_OP; 857 | return 0; 858 | } 859 | 860 | a_sign = a >> (F_SIZE - 1); 861 | b_sign = b >> (F_SIZE - 1); 862 | if (a_sign != b_sign) { 863 | return (a_sign || ((F_UINT)((a | b) << 1) == 0)); 864 | } else { 865 | if (a_sign) { 866 | return (a >= b); 867 | } else { 868 | return (a <= b); 869 | } 870 | } 871 | } 872 | 873 | int glue(lt_sf, F_SIZE)(F_UINT a, F_UINT b, uint32_t *pfflags) 874 | { 875 | uint32_t a_sign, b_sign; 876 | 877 | if (isnan_sf(a) || isnan_sf(b)) { 878 | *pfflags |= FFLAG_INVALID_OP; 879 | return 0; 880 | } 881 | 882 | a_sign = a >> (F_SIZE - 1); 883 | b_sign = b >> (F_SIZE - 1); 884 | if (a_sign != b_sign) { 885 | return (a_sign && ((F_UINT)((a | b) << 1) != 0)); 886 | } else { 887 | if (a_sign) { 888 | return (a > b); 889 | } else { 890 | return (a < b); 891 | } 892 | } 893 | } 894 | 895 | uint32_t glue(fclass_sf, F_SIZE)(F_UINT a) 896 | { 897 | uint32_t a_sign; 898 | int32_t a_exp; 899 | F_UINT a_mant; 900 | uint32_t ret; 901 | 902 | a_sign = a >> (F_SIZE - 1); 903 | a_exp = (a >> MANT_SIZE) & EXP_MASK; 904 | a_mant = a & MANT_MASK; 905 | if (a_exp == EXP_MASK) { 906 | if (a_mant != 0) { 907 | if (a_mant & QNAN_MASK) 908 | ret = FCLASS_QNAN; 909 | else 910 | ret = FCLASS_SNAN; 911 | } else { 912 | if (a_sign) 913 | ret = FCLASS_NINF; 914 | else 915 | ret = FCLASS_PINF; 916 | } 917 | } else if (a_exp == 0) { 918 | if (a_mant == 0) { 919 | if (a_sign) 920 | ret = FCLASS_NZERO; 921 | else 922 | ret = FCLASS_PZERO; 923 | } else { 924 | if (a_sign) 925 | ret = FCLASS_NSUBNORMAL; 926 | else 927 | ret = FCLASS_PSUBNORMAL; 928 | } 929 | } else { 930 | if (a_sign) 931 | ret = FCLASS_NNORMAL; 932 | else 933 | ret = FCLASS_PNORMAL; 934 | } 935 | return ret; 936 | } 937 | 938 | /* conversions between floats */ 939 | 940 | #if F_SIZE >= 64 941 | 942 | F_UINT cvt_sf32_sf(uint32_t a, uint32_t *pfflags) 943 | { 944 | uint32_t a_sign; 945 | int32_t a_exp; 946 | F_UINT a_mant; 947 | 948 | a_mant = unpack_sf32(&a_sign, &a_exp, a); 949 | if (a_exp == 0xff) { 950 | if (a_mant != 0) { 951 | /* NaN */ 952 | if (issignan_sf32(a)) { 953 | *pfflags |= FFLAG_INVALID_OP; 954 | } 955 | return F_QNAN; 956 | } else { 957 | /* infinity */ 958 | return pack_sf(a_sign, EXP_MASK, 0); 959 | } 960 | } 961 | if (a_exp == 0) { 962 | if (a_mant == 0) 963 | return pack_sf(a_sign, 0, 0); /* zero */ 964 | a_mant = normalize_subnormal_sf32(&a_exp, a_mant); 965 | } 966 | /* convert the exponent value */ 967 | a_exp = a_exp - 0x7f + (EXP_MASK / 2); 968 | /* shift the mantissa */ 969 | a_mant <<= (MANT_SIZE - 23); 970 | /* We assume the target float is large enough to that no 971 | normalization is necessary */ 972 | return pack_sf(a_sign, a_exp, a_mant); 973 | } 974 | 975 | uint32_t glue(glue(cvt_sf, F_SIZE), _sf32)(F_UINT a, RoundingModeEnum rm, 976 | uint32_t *pfflags) 977 | { 978 | uint32_t a_sign; 979 | int32_t a_exp; 980 | F_UINT a_mant; 981 | 982 | a_mant = unpack_sf(&a_sign, &a_exp, a); 983 | if (a_exp == EXP_MASK) { 984 | if (a_mant != 0) { 985 | /* NaN */ 986 | if (issignan_sf(a)) { 987 | *pfflags |= FFLAG_INVALID_OP; 988 | } 989 | return F_QNAN32; 990 | } else { 991 | /* infinity */ 992 | return pack_sf32(a_sign, 0xff, 0); 993 | } 994 | } 995 | if (a_exp == 0) { 996 | if (a_mant == 0) 997 | return pack_sf32(a_sign, 0, 0); /* zero */ 998 | normalize_subnormal_sf(&a_exp, a_mant); 999 | } else { 1000 | a_mant |= (F_UINT)1 << MANT_SIZE; 1001 | } 1002 | /* convert the exponent value */ 1003 | a_exp = a_exp - (EXP_MASK / 2) + 0x7f; 1004 | /* shift the mantissa */ 1005 | a_mant = rshift_rnd(a_mant, MANT_SIZE - (32 - 2)); 1006 | return normalize_sf32(a_sign, a_exp, a_mant, rm, pfflags); 1007 | } 1008 | 1009 | #endif 1010 | 1011 | #if F_SIZE >= 128 1012 | 1013 | F_UINT cvt_sf64_sf(uint64_t a, uint32_t *pfflags) 1014 | { 1015 | uint32_t a_sign; 1016 | int32_t a_exp; 1017 | F_UINT a_mant; 1018 | 1019 | a_mant = unpack_sf64(&a_sign, &a_exp, a); 1020 | 1021 | if (a_exp == 0x7ff) { 1022 | if (a_mant != 0) { 1023 | /* NaN */ 1024 | if (issignan_sf64(a)) { 1025 | *pfflags |= FFLAG_INVALID_OP; 1026 | } 1027 | return F_QNAN; 1028 | } else { 1029 | /* infinity */ 1030 | return pack_sf(a_sign, EXP_MASK, 0); 1031 | } 1032 | } 1033 | if (a_exp == 0) { 1034 | if (a_mant == 0) 1035 | return pack_sf(a_sign, 0, 0); /* zero */ 1036 | a_mant = normalize_subnormal_sf64(&a_exp, a_mant); 1037 | } 1038 | /* convert the exponent value */ 1039 | a_exp = a_exp - 0x3ff + (EXP_MASK / 2); 1040 | /* shift the mantissa */ 1041 | a_mant <<= (MANT_SIZE - 52); 1042 | return pack_sf(a_sign, a_exp, a_mant); 1043 | } 1044 | 1045 | uint64_t glue(glue(cvt_sf, F_SIZE), _sf64)(F_UINT a, RoundingModeEnum rm, 1046 | uint32_t *pfflags) 1047 | { 1048 | uint32_t a_sign; 1049 | int32_t a_exp; 1050 | F_UINT a_mant; 1051 | 1052 | a_mant = unpack_sf(&a_sign, &a_exp, a); 1053 | if (a_exp == EXP_MASK) { 1054 | if (a_mant != 0) { 1055 | /* NaN */ 1056 | if (issignan_sf(a)) { 1057 | *pfflags |= FFLAG_INVALID_OP; 1058 | } 1059 | return F_QNAN64; 1060 | } else { 1061 | /* infinity */ 1062 | return pack_sf64(a_sign, 0x7ff, 0); 1063 | } 1064 | } 1065 | if (a_exp == 0) { 1066 | if (a_mant == 0) 1067 | return pack_sf64(a_sign, 0, 0); /* zero */ 1068 | normalize_subnormal_sf(&a_exp, a_mant); 1069 | } else { 1070 | a_mant |= (F_UINT)1 << MANT_SIZE; 1071 | } 1072 | /* convert the exponent value */ 1073 | a_exp = a_exp - (EXP_MASK / 2) + 0x3ff; 1074 | /* shift the mantissa */ 1075 | a_mant = rshift_rnd(a_mant, MANT_SIZE - (64 - 2)); 1076 | return normalize_sf64(a_sign, a_exp, a_mant, rm, pfflags); 1077 | } 1078 | 1079 | #endif 1080 | 1081 | #undef clz 1082 | 1083 | #define ICVT_SIZE 32 1084 | #include "softfp_template_icvt.h" 1085 | 1086 | #define ICVT_SIZE 64 1087 | #include "softfp_template_icvt.h" 1088 | 1089 | #ifdef HAVE_INT128 1090 | #define ICVT_SIZE 128 1091 | #include "softfp_template_icvt.h" 1092 | #endif 1093 | 1094 | #undef F_SIZE 1095 | #undef F_UINT 1096 | #undef F_ULONG 1097 | #undef F_UHALF 1098 | #undef MANT_SIZE 1099 | #undef EXP_SIZE 1100 | #undef EXP_MASK 1101 | #undef MANT_MASK 1102 | #undef SIGN_MASK 1103 | #undef IMANT_SIZE 1104 | #undef RND_SIZE 1105 | #undef QNAN_MASK 1106 | #undef F_QNAN 1107 | 1108 | #undef pack_sf 1109 | #undef unpack_sf 1110 | #undef rshift_rnd 1111 | #undef round_pack_sf 1112 | #undef normalize_sf 1113 | #undef normalize2_sf 1114 | #undef issignan_sf 1115 | #undef isnan_sf 1116 | #undef add_sf 1117 | #undef mul_sf 1118 | #undef fma_sf 1119 | #undef div_sf 1120 | #undef sqrt_sf 1121 | #undef normalize_subnormal_sf 1122 | #undef divrem_u 1123 | #undef sqrtrem_u 1124 | #undef mul_u 1125 | #undef cvt_sf32_sf 1126 | #undef cvt_sf64_sf 1127 | -------------------------------------------------------------------------------- /softfp_template_icvt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SoftFP Library 3 | * 4 | * Copyright (c) 2016 Fabrice Bellard 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | * THE SOFTWARE. 23 | */ 24 | #if ICVT_SIZE == 32 25 | #define ICVT_UINT uint32_t 26 | #define ICVT_INT int32_t 27 | #elif ICVT_SIZE == 64 28 | #define ICVT_UINT uint64_t 29 | #define ICVT_INT int64_t 30 | #elif ICVT_SIZE == 128 31 | #define ICVT_UINT uint128_t 32 | #define ICVT_INT int128_t 33 | #else 34 | #error unsupported icvt 35 | #endif 36 | 37 | /* conversions between float and integers */ 38 | static ICVT_INT glue(glue(glue(internal_cvt_sf, F_SIZE), _i), ICVT_SIZE)(F_UINT a, RoundingModeEnum rm, 39 | uint32_t *pfflags, BOOL is_unsigned) 40 | { 41 | uint32_t a_sign, addend, rnd_bits; 42 | int32_t a_exp; 43 | F_UINT a_mant; 44 | ICVT_UINT r, r_max; 45 | 46 | a_sign = a >> (F_SIZE - 1); 47 | a_exp = (a >> MANT_SIZE) & EXP_MASK; 48 | a_mant = a & MANT_MASK; 49 | if (a_exp == EXP_MASK && a_mant != 0) 50 | a_sign = 0; /* NaN is like +infinity */ 51 | if (a_exp == 0) { 52 | a_exp = 1; 53 | } else { 54 | a_mant |= (F_UINT)1 << MANT_SIZE; 55 | } 56 | a_mant <<= RND_SIZE; 57 | a_exp = a_exp - (EXP_MASK / 2) - MANT_SIZE; 58 | 59 | if (is_unsigned) 60 | r_max = (ICVT_UINT)a_sign - 1; 61 | else 62 | r_max = ((ICVT_UINT)1 << (ICVT_SIZE - 1)) - (ICVT_UINT)(a_sign ^ 1); 63 | if (a_exp >= 0) { 64 | if (a_exp <= (ICVT_SIZE - 1 - MANT_SIZE)) { 65 | r = (ICVT_UINT)(a_mant >> RND_SIZE) << a_exp; 66 | if (r > r_max) 67 | goto overflow; 68 | } else { 69 | overflow: 70 | *pfflags |= FFLAG_INVALID_OP; 71 | return r_max; 72 | } 73 | } else { 74 | a_mant = rshift_rnd(a_mant, -a_exp); 75 | 76 | switch(rm) { 77 | case RM_RNE: 78 | case RM_RMM: 79 | addend = (1 << (RND_SIZE - 1)); 80 | break; 81 | case RM_RTZ: 82 | addend = 0; 83 | break; 84 | default: 85 | case RM_RDN: 86 | case RM_RUP: 87 | if (a_sign ^ (rm & 1)) 88 | addend = (1 << RND_SIZE) - 1; 89 | else 90 | addend = 0; 91 | break; 92 | } 93 | 94 | rnd_bits = a_mant & ((1 << RND_SIZE ) - 1); 95 | a_mant = (a_mant + addend) >> RND_SIZE; 96 | /* half way: select even result */ 97 | if (rm == RM_RNE && rnd_bits == (1 << (RND_SIZE - 1))) 98 | a_mant &= ~1; 99 | if (a_mant > r_max) 100 | goto overflow; 101 | r = a_mant; 102 | if (rnd_bits != 0) 103 | *pfflags |= FFLAG_INEXACT; 104 | } 105 | if (a_sign) 106 | r = -r; 107 | return r; 108 | } 109 | 110 | ICVT_INT glue(glue(glue(cvt_sf, F_SIZE), _i), ICVT_SIZE)(F_UINT a, RoundingModeEnum rm, 111 | uint32_t *pfflags) 112 | { 113 | return glue(glue(glue(internal_cvt_sf, F_SIZE), _i), ICVT_SIZE)(a, rm, 114 | pfflags, FALSE); 115 | } 116 | 117 | ICVT_UINT glue(glue(glue(cvt_sf, F_SIZE), _u), ICVT_SIZE)(F_UINT a, RoundingModeEnum rm, 118 | uint32_t *pfflags) 119 | { 120 | return glue(glue(glue(internal_cvt_sf, F_SIZE), _i), ICVT_SIZE) (a, rm, 121 | pfflags, TRUE); 122 | } 123 | 124 | /* conversions between float and integers */ 125 | static F_UINT glue(glue(glue(internal_cvt_i, ICVT_SIZE), _sf), F_SIZE)(ICVT_INT a, 126 | RoundingModeEnum rm, 127 | uint32_t *pfflags, 128 | BOOL is_unsigned) 129 | { 130 | uint32_t a_sign; 131 | int32_t a_exp; 132 | F_UINT a_mant; 133 | ICVT_UINT r, mask; 134 | int l; 135 | 136 | if (!is_unsigned && a < 0) { 137 | a_sign = 1; 138 | r = -a; 139 | } else { 140 | a_sign = 0; 141 | r = a; 142 | } 143 | a_exp = (EXP_MASK / 2) + F_SIZE - 2; 144 | /* need to reduce range before generic float normalization */ 145 | l = ICVT_SIZE - glue(clz, ICVT_SIZE)(r) - (F_SIZE - 1); 146 | if (l > 0) { 147 | mask = r & (((ICVT_UINT)1 << l) - 1); 148 | r = (r >> l) | ((r & mask) != 0); 149 | a_exp += l; 150 | } 151 | a_mant = r; 152 | return normalize_sf(a_sign, a_exp, a_mant, rm, pfflags); 153 | } 154 | 155 | F_UINT glue(glue(glue(cvt_i, ICVT_SIZE), _sf), F_SIZE)(ICVT_INT a, 156 | RoundingModeEnum rm, 157 | uint32_t *pfflags) 158 | { 159 | return glue(glue(glue(internal_cvt_i, ICVT_SIZE), _sf), F_SIZE)(a, rm, pfflags, FALSE); 160 | } 161 | 162 | F_UINT glue(glue(glue(cvt_u, ICVT_SIZE), _sf), F_SIZE)(ICVT_UINT a, 163 | RoundingModeEnum rm, 164 | uint32_t *pfflags) 165 | { 166 | return glue(glue(glue(internal_cvt_i, ICVT_SIZE), _sf), F_SIZE)(a, rm, pfflags, TRUE); 167 | } 168 | 169 | #undef ICVT_SIZE 170 | #undef ICVT_INT 171 | #undef ICVT_UINT 172 | -------------------------------------------------------------------------------- /tinypi.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Tiny PI computation 3 | * 4 | * Copyright (c) 2017 Fabrice Bellard 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | * THE SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "libbf.h" 32 | 33 | #define CHUD_A 13591409 34 | #define CHUD_B 545140134 35 | #define CHUD_C 640320 36 | /* log2(C/12)*3 */ 37 | #define CHUD_BITS_PER_TERM 47.11041313821584202247 38 | 39 | /* number of bits per base 10 digit */ 40 | #define BITS_PER_DIGIT 3.32192809488736234786 41 | 42 | static bf_context_t bf_ctx; 43 | 44 | static void *my_bf_realloc(void *opaque, void *ptr, size_t size) 45 | { 46 | return realloc(ptr, size); 47 | } 48 | 49 | static void chud_bs(bf_t *P, bf_t *Q, bf_t *G, int64_t a, int64_t b, int need_g, 50 | limb_t prec) 51 | { 52 | int64_t c; 53 | 54 | if (a == (b - 1)) { 55 | bf_t T0, T1; 56 | 57 | bf_init(&bf_ctx, &T0); 58 | bf_init(&bf_ctx, &T1); 59 | bf_set_ui(G, 2 * b - 1); 60 | bf_mul_ui(G, G, 6 * b - 1, prec, BF_RNDN); 61 | bf_mul_ui(G, G, 6 * b - 5, prec, BF_RNDN); 62 | bf_set_ui(&T0, CHUD_B); 63 | bf_mul_ui(&T0, &T0, b, prec, BF_RNDN); 64 | bf_set_ui(&T1, CHUD_A); 65 | bf_add(&T0, &T0, &T1, prec, BF_RNDN); 66 | bf_mul(P, G, &T0, prec, BF_RNDN); 67 | P->sign = b & 1; 68 | 69 | bf_set_ui(Q, b); 70 | bf_mul_ui(Q, Q, b, prec, BF_RNDN); 71 | bf_mul_ui(Q, Q, b, prec, BF_RNDN); 72 | #if LIMB_BITS == 64 73 | bf_mul_ui(Q, Q, (uint64_t)CHUD_C * CHUD_C * CHUD_C / 24, prec, BF_RNDN); 74 | #else 75 | bf_mul_ui(Q, Q, CHUD_C, prec, BF_RNDN); 76 | bf_mul_ui(Q, Q, CHUD_C, prec, BF_RNDN); 77 | bf_mul_ui(Q, Q, CHUD_C / 24, prec, BF_RNDN); 78 | #endif 79 | bf_delete(&T0); 80 | bf_delete(&T1); 81 | } else { 82 | bf_t P2, Q2, G2; 83 | 84 | bf_init(&bf_ctx, &P2); 85 | bf_init(&bf_ctx, &Q2); 86 | bf_init(&bf_ctx, &G2); 87 | 88 | c = (a + b) / 2; 89 | chud_bs(P, Q, G, a, c, 1, prec); 90 | chud_bs(&P2, &Q2, &G2, c, b, need_g, prec); 91 | 92 | /* Q = Q1 * Q2 */ 93 | /* G = G1 * G2 */ 94 | /* P = P1 * Q2 + P2 * G1 */ 95 | bf_mul(&P2, &P2, G, prec, BF_RNDN); 96 | if (!need_g) 97 | bf_set_ui(G, 0); 98 | bf_mul(P, P, &Q2, prec, BF_RNDN); 99 | bf_add(P, P, &P2, prec, BF_RNDN); 100 | bf_delete(&P2); 101 | 102 | bf_mul(Q, Q, &Q2, prec, BF_RNDN); 103 | bf_delete(&Q2); 104 | if (need_g) 105 | bf_mul(G, G, &G2, prec, BF_RNDN); 106 | bf_delete(&G2); 107 | #if 0 108 | printf("%" PRId64 "-%" PRId64 " limbs: P=%" PRId64 " Q=%" PRId64 " G=%" PRId64 "\n", 109 | a, b, P->len, Q->len, G->len); 110 | #endif 111 | } 112 | } 113 | 114 | static int64_t time_start; 115 | int verbose; 116 | 117 | static int64_t get_clock_msec(void) 118 | { 119 | struct timeval tv; 120 | gettimeofday(&tv, NULL); 121 | return tv.tv_sec * 1000LL + (tv.tv_usec / 1000); 122 | } 123 | 124 | static void step_start(const char *str) 125 | { 126 | if (verbose) { 127 | printf("%-20s", str); 128 | fflush(stdout); 129 | time_start = get_clock_msec(); 130 | } 131 | } 132 | 133 | static void step_end(void) 134 | { 135 | int64_t ti; 136 | if (verbose) { 137 | ti = get_clock_msec() - time_start; 138 | printf("(%0.3f s)\n", ti / 1000.0); 139 | } 140 | } 141 | 142 | static void pi_chud(bf_t *Q, int64_t prec) 143 | { 144 | int64_t n, prec1; 145 | bf_t P, G; 146 | 147 | /* number of serie terms */ 148 | n = (int64_t)ceil(prec / CHUD_BITS_PER_TERM) + 10; 149 | prec1 = prec + 32; 150 | 151 | bf_init(&bf_ctx, &P); 152 | bf_init(&bf_ctx, &G); 153 | 154 | step_start("chud_bs"); 155 | chud_bs(&P, Q, &G, 0, n, 0, prec1); 156 | 157 | bf_mul_ui(&G, Q, CHUD_A, prec1, BF_RNDN); 158 | bf_add(&P, &G, &P, prec1, BF_RNDN); 159 | step_end(); 160 | 161 | step_start("div"); 162 | bf_div(Q, Q, &P, prec1, BF_RNDF); 163 | step_end(); 164 | 165 | step_start("sqrt"); 166 | bf_set_ui(&P, CHUD_C); 167 | bf_sqrt(&G, &P, prec1, BF_RNDF); 168 | bf_mul_ui(&G, &G, (uint64_t)CHUD_C / 12, prec1, BF_RNDF); 169 | step_end(); 170 | 171 | step_start("final mul"); 172 | bf_mul(Q, Q, &G, prec, BF_RNDN); 173 | step_end(); 174 | 175 | bf_delete(&P); 176 | bf_delete(&G); 177 | } 178 | 179 | int main(int argc, char **argv) 180 | { 181 | int64_t n_digits, prec, n_bits, ti_tot; 182 | bf_t PI; 183 | const char *output_filename; 184 | FILE *f; 185 | int arg_idx, dec_output; 186 | char *digits; 187 | size_t digits_len; 188 | 189 | dec_output = 1; 190 | verbose = 0; 191 | arg_idx = 1; 192 | while (arg_idx < argc) { 193 | if (!strcmp(argv[arg_idx], "-b")) { 194 | dec_output = 0; 195 | arg_idx++; 196 | } else if (!strcmp(argv[arg_idx], "-v")) { 197 | verbose = 1; 198 | arg_idx++; 199 | } else { 200 | break; 201 | } 202 | } 203 | 204 | if (arg_idx >= argc) { 205 | printf("usage: tinypi [options] n_digits [output_file]\n" 206 | "\n" 207 | "Options:\n" 208 | "-b : output in binary (hexa) instead of base 10\n" 209 | "-v : dump computation steps\n"); 210 | exit(1); 211 | } 212 | 213 | n_digits = (int64_t)strtod(argv[arg_idx++], NULL); 214 | output_filename = NULL; 215 | if (arg_idx < argc) 216 | output_filename = argv[arg_idx++]; 217 | 218 | ti_tot = get_clock_msec(); 219 | n_digits = bf_max(n_digits, 50); 220 | n_bits = (limb_t)ceil(n_digits * BITS_PER_DIGIT); 221 | /* we add more bits to reduce the probability of bad rounding for 222 | the last digits */ 223 | prec = n_bits + 32; 224 | bf_context_init(&bf_ctx, my_bf_realloc, NULL); 225 | bf_init(&bf_ctx, &PI); 226 | 227 | pi_chud(&PI, prec); 228 | 229 | if (dec_output) { 230 | step_start("base conversion"); 231 | digits = bf_ftoa(&digits_len, &PI, 10, n_digits + 1, 232 | BF_FTOA_FORMAT_FIXED | BF_RNDZ); 233 | step_end(); 234 | } else { 235 | digits = bf_ftoa(&digits_len, &PI, 16, n_bits / 4, 236 | BF_FTOA_FORMAT_FIXED | BF_RNDZ); 237 | } 238 | ti_tot = get_clock_msec() - ti_tot; 239 | if (verbose) { 240 | printf("%-20s(%0.3f s)\n", "total", ti_tot / 1000.0); 241 | } 242 | 243 | if (output_filename) { 244 | f = fopen(output_filename, "wb"); 245 | if (!f) { 246 | perror(output_filename); 247 | exit(1); 248 | } 249 | fwrite(digits, 1, digits_len, f); 250 | fclose(f); 251 | } 252 | free(digits); 253 | bf_delete(&PI); 254 | bf_context_end(&bf_ctx); 255 | return 0; 256 | } 257 | --------------------------------------------------------------------------------