├── README.md ├── install.txt ├── Makefile ├── readme.txt ├── fftsg_h.c └── pi_fftcs.c /README.md: -------------------------------------------------------------------------------- 1 | # pi_css5 2 | 3 | PI(= 3.1415926535897932384626...) Calculation Program. 4 | 5 | ## Documents 6 | 7 | - [readme.txt](readme.txt) 8 | - [install.txt](install.txt) 9 | 10 | ## 1M benchmark result on some SoC 11 | 12 | Benchmark command: 13 | 14 | ```shell 15 | ./pi_ccs5 $((1<<20)) 16 | ``` 17 | 18 | - Intel Celeron 1037U (IvyBridge) on Hyper-V 3.69s 19 | - Amlogic S905D (4xA53) on phicomm N1 18.51s 20 | - Intel Celeron N2840 (BayTrail) 29.53s 21 | - BCM2836 (4xA7) on raspberry PI 2B 33.14s 22 | -------------------------------------------------------------------------------- /install.txt: -------------------------------------------------------------------------------- 1 | PI(= 3.1415926535897932384626...) Calculation Program 2 | 3 | 4 | ver. LG1.1.2-MP1.5.2a Nov. 1999 5 | modified easier usage, May 2006 6 | 7 | If you have a C compiler installed, simply edit the makefile and build with: 8 | 9 | make all 10 | 11 | 12 | Copyright 13 | source files: 14 | Original Copyright(C) 1999 Takuya OOURA 15 | Email: ooura@mmm.t.u-tokyo.ac.jp 16 | URL: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html 17 | You may use, copy, modify this code for any purpose and 18 | without fee. You may distribute this ORIGINAL package. 19 | 20 | 21 | Modifications Copyright(C) 2004, 2006 Dara Hazeghi 22 | Email: dhazeghi@yahoo.com 23 | URL: http://www.myownlittleworld.com/miscellaenous/computers/piprogram.html 24 | This package may be freely redistributed or modified 25 | in accordance with its original license. 26 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for pi_fftcs5, Takuyo Ooura's 2 | # Pi calculation for *nix or DOS 3 | # Warning: this file has tabs in it. It is required for GNU Make. 4 | # 5 | # This version may not produce optimal runs. You'll need to experiment 6 | # with the compiler options. 7 | 8 | #Set this to your remove command. 9 | RM= rm -f 10 | 11 | #Set this to your copy command. 12 | CP= cp 13 | 14 | #Set this to you directory creation command 15 | MKDIR= mkdir -p 16 | 17 | #Set this to your strip command. 18 | STRIP= strip 19 | 20 | #Set this to the proper math libraries to link against 21 | MATH_LIBS= -lm 22 | 23 | #Set this to the proper install directory 24 | PREFIX= /usr/local 25 | 26 | CC = gcc 27 | 28 | #Set this to best optimization flags for your system. Defaults are for gcc. 29 | CFLAGS = -Wall -pedantic -O -fomit-frame-pointer -funroll-loops 30 | 31 | #Set this to the cpu to target 32 | CFLAGS += -malign-double 33 | 34 | #If you would like to minimize program output uncomment this line 35 | #CFLAGS += -DQUIET_OUT 36 | 37 | #Set this to important linker flags 38 | LDFLAGS = -static 39 | 40 | # Leave items below this point unchanged! 41 | 42 | all: pi_css5 43 | 44 | 45 | pi_css5 : pi_fftcs.o fftsg_h.o 46 | $(CC) $(CFLAGS) pi_fftcs.o fftsg_h.o ${MATH_LIBS} ${LDFLAGS} -o pi_css5 47 | 48 | pi_fftcs.o : pi_fftcs.c 49 | $(CC) $(CFLAGS) -c pi_fftcs.c -o pi_fftcs.o 50 | 51 | fftsg_h.o : fftsg_h.c 52 | $(CC) $(CFLAGS) -c fftsg_h.c -o fftsg_h.o 53 | 54 | 55 | install: all 56 | ${MKDIR} ${PREFIX}/bin 57 | ${CP} pi_css5 ${PREFIX}/bin 58 | 59 | install-strip: strip install 60 | 61 | strip: all 62 | ${STRIP} pi_css5 63 | 64 | clean: 65 | ${RM} pi_fftcs.o fftsg_h.o pi_css5 66 | 67 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | PI(= 3.1415926535897932384626...) Calculation Program 2 | 3 | 4 | ver. LG1.1.2-MP1.5.2a Nov. 1999 5 | 6 | 7 | Files: 8 | fftsg_h.c : FFT Package - split-radix - use no work areas 9 | pi_fftcs.c : PI Calculation Program - memory save version 10 | -- use rdft() in "fft*g_h.c" 11 | readme.txt : this file 12 | Makefile : - for gcc modify for your own compiler. 13 | 14 | To Compile: 15 | Check macros in "pi_fftcs.c" and modify if necessary. 16 | DBL_ERROR_MARGIN is very impotant parameter. 17 | If DBL_ERROR_MARGIN is very small then efficiency will be bad. 18 | If DBL_ERROR_MARGIN >= 0.5 then it may calculate a wrong result. 19 | 20 | Example Compilation: 21 | GNU: 22 | gcc -O -funroll-loops -fomit-frame-pointer pi_fftcs.c fftsg_h.c -lm -o pi_css5 23 | SUN: 24 | cc -fast pi_fftcs.c fftsg_h.c -lm -o pi_css5 25 | HP: 26 | aCC -fast pi_fftcs.c fftsg_h.c -lm -o pi_css5 27 | Microsoft: 28 | cl -O2 pi_fftcs.c fftsg_h.c -o pi_css5 29 | 30 | Relationship between Number of Digits and FFT Length: 31 | ndigit = nfft*log_10(R), R >= 10000 or 1000 32 | R is a radix of multiple-precision format. 33 | R depends on DBL_ERROR_MARGIN and 34 | FFT+machine+compiler's tolerance. 35 | 36 | Number of Floating Point Operations: 37 | pi_fftcs.c + fftsg_h.c: 38 | 42*nfft*(log_2(nfft))^2 [Operations] 39 | 40 | Memory Use: 41 | pi_fftcs.c: 42 | nfft*(6*sizeof(short int)+3*sizeof(double)) [Bytes] 43 | 44 | AGM Algorithm: 45 | ---- a formula based on the AGM (Arithmetic-Geometric Mean) ---- 46 | c = sqrt(0.125); 47 | a = 1 + 3 * c; 48 | b = sqrt(a); 49 | e = b - 0.625; 50 | b = 2 * b; 51 | c = e - c; 52 | a = a + e; 53 | npow = 4; 54 | do { 55 | npow = 2 * npow; 56 | e = (a + b) / 2; 57 | b = sqrt(a * b); 58 | e = e - b; 59 | b = 2 * b; 60 | c = c - e; 61 | a = e + b; 62 | } while (e > SQRT_SQRT_EPSILON); 63 | e = e * e / 4; 64 | a = a + b; 65 | pi = (a * a - e - e / 2) / (a * c - e) / npow; 66 | ---- modification ---- 67 | This is a modified version of Gauss-Legendre formula 68 | (by T.Ooura). It is faster than original version. 69 | 70 | Reference: 71 | 1. E.Salamin, 72 | Computation of PI Using Arithmetic-Geometric Mean, 73 | Mathematics of Computation, Vol.30 1976. 74 | 2. R.P.Brent, 75 | Fast Multiple-Precision Evaluation of Elementary Functions, 76 | J. ACM 23 1976. 77 | 3. D.Takahasi, Y.Kanada, 78 | Calculation of PI to 51.5 Billion Decimal Digits on 79 | Distributed Memoriy Parallel Processors, 80 | Transactions of Information Processing Society of Japan, 81 | Vol.39 No.7 1998. 82 | 4. T.Ooura, 83 | Improvement of the PI Calculation Algorithm and 84 | Implementation of Fast Multiple-Precision Computation, 85 | Information Processing Society of Japan SIG Notes, 86 | 98-HPC-74, 1998. 87 | 88 | 89 | Copyright 90 | source files: 91 | Copyright(C) 1999 Takuya OOURA 92 | Email: ooura@mmm.t.u-tokyo.ac.jp 93 | URL: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html 94 | You may use, copy, modify this code for any purpose and 95 | without fee. You may distribute this ORIGINAL package. 96 | 97 | -------------------------------------------------------------------------------- /fftsg_h.c: -------------------------------------------------------------------------------- 1 | /* 2 | Fast Fourier/Cosine/Sine Transform 3 | dimension :one 4 | data length :power of 2 5 | decimation :frequency 6 | radix :split-radix 7 | data :inplace 8 | table :not use 9 | functions 10 | cdft: Complex Discrete Fourier Transform 11 | rdft: Real Discrete Fourier Transform 12 | ddct: Discrete Cosine Transform 13 | ddst: Discrete Sine Transform 14 | dfct: Cosine Transform of RDFT (Real Symmetric DFT) 15 | dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) 16 | function prototypes 17 | void cdft(int, int, double *); 18 | void rdft(int, int, double *); 19 | void ddct(int, int, double *); 20 | void ddst(int, int, double *); 21 | void dfct(int, double *); 22 | void dfst(int, double *); 23 | 24 | 25 | -------- Complex DFT (Discrete Fourier Transform) -------- 26 | [definition] 27 | 28 | X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k 30 | X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k 34 | cdft(2*n, 1, a); 35 | 36 | cdft(2*n, -1, a); 37 | [parameters] 38 | 2*n :data length (int) 39 | n >= 1, n = power of 2 40 | a[0...2*n-1] :input/output data (double *) 41 | input data 42 | a[2*j] = Re(x[j]), 43 | a[2*j+1] = Im(x[j]), 0<=j RDFT 61 | R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 62 | I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) 64 | a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + 65 | sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + 66 | sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k 69 | rdft(n, 1, a); 70 | 71 | rdft(n, -1, a); 72 | [parameters] 73 | n :data length (int) 74 | n >= 2, n = power of 2 75 | a[0...n-1] :input/output data (double *) 76 | 77 | output data 78 | a[2*k] = R[k], 0<=k 82 | input data 83 | a[2*j] = R[j], 0<=j IDCT (excluding scale) 100 | C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT 102 | C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k 105 | ddct(n, 1, a); 106 | 107 | ddct(n, -1, a); 108 | [parameters] 109 | n :data length (int) 110 | n >= 2, n = power of 2 111 | a[0...n-1] :input/output data (double *) 112 | output data 113 | a[k] = C[k], 0<=k IDST (excluding scale) 129 | S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST 131 | S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 134 | ddst(n, 1, a); 135 | 136 | ddst(n, -1, a); 137 | [parameters] 138 | n :data length (int) 139 | n >= 2, n = power of 2 140 | a[0...n-1] :input/output data (double *) 141 | 142 | input data 143 | a[j] = A[j], 0 148 | output data 149 | a[k] = S[k], 0= 2, n = power of 2 171 | a[0...n] :input/output data (double *) 172 | output data 173 | a[k] = C[k], 0<=k<=n 174 | [remark] 175 | Inverse of 176 | a[0] *= 0.5; 177 | a[n] *= 0.5; 178 | dfct(n, a); 179 | is 180 | a[0] *= 0.5; 181 | a[n] *= 0.5; 182 | dfct(n, a); 183 | for (j = 0; j <= n; j++) { 184 | a[j] *= 2.0 / n; 185 | } 186 | . 187 | 188 | 189 | -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- 190 | [definition] 191 | S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 197 | a[0...n-1] :input/output data (double *) 198 | output data 199 | a[k] = S[k], 0= 0) 220 | { 221 | cftfsub (n, a); 222 | } 223 | else 224 | { 225 | cftbsub (n, a); 226 | } 227 | } 228 | 229 | 230 | void 231 | rdft (int n, int isgn, double *a) 232 | { 233 | void cftfsub (int n, double *a); 234 | void cftbsub (int n, double *a); 235 | void rftfsub (int n, double *a); 236 | void rftbsub (int n, double *a); 237 | double xi; 238 | 239 | if (isgn >= 0) 240 | { 241 | if (n > 4) 242 | { 243 | cftfsub (n, a); 244 | rftfsub (n, a); 245 | } 246 | else if (n == 4) 247 | { 248 | cftfsub (n, a); 249 | } 250 | xi = a[0] - a[1]; 251 | a[0] += a[1]; 252 | a[1] = xi; 253 | } 254 | else 255 | { 256 | a[1] = 0.5 * (a[0] - a[1]); 257 | a[0] -= a[1]; 258 | if (n > 4) 259 | { 260 | rftbsub (n, a); 261 | cftbsub (n, a); 262 | } 263 | else if (n == 4) 264 | { 265 | cftbsub (n, a); 266 | } 267 | } 268 | } 269 | 270 | 271 | void 272 | ddct (int n, int isgn, double *a) 273 | { 274 | void cftfsub (int n, double *a); 275 | void cftbsub (int n, double *a); 276 | void rftfsub (int n, double *a); 277 | void rftbsub (int n, double *a); 278 | void dctsub (int n, double *a); 279 | void dctsub4 (int n, double *a); 280 | int j; 281 | double xr; 282 | 283 | if (isgn < 0) 284 | { 285 | xr = a[n - 1]; 286 | for (j = n - 2; j >= 2; j -= 2) 287 | { 288 | a[j + 1] = a[j] - a[j - 1]; 289 | a[j] += a[j - 1]; 290 | } 291 | a[1] = a[0] - xr; 292 | a[0] += xr; 293 | if (n > 4) 294 | { 295 | rftbsub (n, a); 296 | cftbsub (n, a); 297 | } 298 | else if (n == 4) 299 | { 300 | cftbsub (n, a); 301 | } 302 | } 303 | if (n > 4) 304 | { 305 | dctsub (n, a); 306 | } 307 | else 308 | { 309 | dctsub4 (n, a); 310 | } 311 | if (isgn >= 0) 312 | { 313 | if (n > 4) 314 | { 315 | cftfsub (n, a); 316 | rftfsub (n, a); 317 | } 318 | else if (n == 4) 319 | { 320 | cftfsub (n, a); 321 | } 322 | xr = a[0] - a[1]; 323 | a[0] += a[1]; 324 | for (j = 2; j < n; j += 2) 325 | { 326 | a[j - 1] = a[j] - a[j + 1]; 327 | a[j] += a[j + 1]; 328 | } 329 | a[n - 1] = xr; 330 | } 331 | } 332 | 333 | 334 | void 335 | ddst (int n, int isgn, double *a) 336 | { 337 | void cftfsub (int n, double *a); 338 | void cftbsub (int n, double *a); 339 | void rftfsub (int n, double *a); 340 | void rftbsub (int n, double *a); 341 | void dstsub (int n, double *a); 342 | void dstsub4 (int n, double *a); 343 | int j; 344 | double xr; 345 | 346 | if (isgn < 0) 347 | { 348 | xr = a[n - 1]; 349 | for (j = n - 2; j >= 2; j -= 2) 350 | { 351 | a[j + 1] = -a[j] - a[j - 1]; 352 | a[j] -= a[j - 1]; 353 | } 354 | a[1] = a[0] + xr; 355 | a[0] -= xr; 356 | if (n > 4) 357 | { 358 | rftbsub (n, a); 359 | cftbsub (n, a); 360 | } 361 | else if (n == 4) 362 | { 363 | cftbsub (n, a); 364 | } 365 | } 366 | if (n > 4) 367 | { 368 | dstsub (n, a); 369 | } 370 | else 371 | { 372 | dstsub4 (n, a); 373 | } 374 | if (isgn >= 0) 375 | { 376 | if (n > 4) 377 | { 378 | cftfsub (n, a); 379 | rftfsub (n, a); 380 | } 381 | else if (n == 4) 382 | { 383 | cftfsub (n, a); 384 | } 385 | xr = a[0] - a[1]; 386 | a[0] += a[1]; 387 | for (j = 2; j < n; j += 2) 388 | { 389 | a[j - 1] = -a[j] - a[j + 1]; 390 | a[j] -= a[j + 1]; 391 | } 392 | a[n - 1] = -xr; 393 | } 394 | } 395 | 396 | 397 | void 398 | dfct (int n, double *a) 399 | { 400 | void ddct (int n, int isgn, double *a); 401 | void bitrv1 (int n, double *a); 402 | int j, k, m, mh; 403 | double xr, xi, yr, yi, an; 404 | 405 | m = n >> 1; 406 | for (j = 0; j < m; j++) 407 | { 408 | k = n - j; 409 | xr = a[j] + a[k]; 410 | a[j] -= a[k]; 411 | a[k] = xr; 412 | } 413 | an = a[n]; 414 | while (m >= 2) 415 | { 416 | ddct (m, 1, a); 417 | bitrv1 (m, a); 418 | mh = m >> 1; 419 | xi = a[m]; 420 | a[m] = a[0]; 421 | a[0] = an - xi; 422 | an += xi; 423 | for (j = 1; j < mh; j++) 424 | { 425 | k = m - j; 426 | xr = a[m + k]; 427 | xi = a[m + j]; 428 | yr = a[j]; 429 | yi = a[k]; 430 | a[m + j] = yr; 431 | a[m + k] = yi; 432 | a[j] = xr - xi; 433 | a[k] = xr + xi; 434 | } 435 | xr = a[mh]; 436 | a[mh] = a[m + mh]; 437 | a[m + mh] = xr; 438 | m = mh; 439 | } 440 | xi = a[1]; 441 | a[1] = a[0]; 442 | a[0] = an + xi; 443 | a[n] = an - xi; 444 | bitrv1 (n, a); 445 | } 446 | 447 | 448 | void 449 | dfst (int n, double *a) 450 | { 451 | void ddst (int n, int isgn, double *a); 452 | void bitrv1 (int n, double *a); 453 | int j, k, m, mh; 454 | double xr, xi, yr, yi; 455 | 456 | m = n >> 1; 457 | for (j = 1; j < m; j++) 458 | { 459 | k = n - j; 460 | xr = a[j] - a[k]; 461 | a[j] += a[k]; 462 | a[k] = xr; 463 | } 464 | a[0] = a[m]; 465 | while (m >= 2) 466 | { 467 | ddst (m, 1, a); 468 | bitrv1 (m, a); 469 | mh = m >> 1; 470 | for (j = 1; j < mh; j++) 471 | { 472 | k = m - j; 473 | xr = a[m + k]; 474 | xi = a[m + j]; 475 | yr = a[j]; 476 | yi = a[k]; 477 | a[m + j] = yr; 478 | a[m + k] = yi; 479 | a[j] = xr + xi; 480 | a[k] = xr - xi; 481 | } 482 | a[m] = a[0]; 483 | a[0] = a[m + mh]; 484 | a[m + mh] = a[mh]; 485 | m = mh; 486 | } 487 | a[1] = a[0]; 488 | a[0] = 0; 489 | bitrv1 (n, a); 490 | } 491 | 492 | 493 | /* -------- child routines -------- */ 494 | 495 | 496 | #include 497 | #ifndef M_PI_2 498 | #define M_PI_2 1.570796326794896619231321691639751442098584699687 499 | #endif 500 | #ifndef WR5000 /* cos(M_PI_2*0.5000) */ 501 | #define WR5000 0.707106781186547524400844362104849039284835937688 502 | #endif 503 | #ifndef WR2500 /* cos(M_PI_2*0.2500) */ 504 | #define WR2500 0.923879532511286756128183189396788286822416625863 505 | #endif 506 | #ifndef WI2500 /* sin(M_PI_2*0.2500) */ 507 | #define WI2500 0.382683432365089771728459984030398866761344562485 508 | #endif 509 | #ifndef WR1250 /* cos(M_PI_2*0.1250) */ 510 | #define WR1250 0.980785280403230449126182236134239036973933730893 511 | #endif 512 | #ifndef WI1250 /* sin(M_PI_2*0.1250) */ 513 | #define WI1250 0.195090322016128267848284868477022240927691617751 514 | #endif 515 | #ifndef WR3750 /* cos(M_PI_2*0.3750) */ 516 | #define WR3750 0.831469612302545237078788377617905756738560811987 517 | #endif 518 | #ifndef WI3750 /* sin(M_PI_2*0.3750) */ 519 | #define WI3750 0.555570233019602224742830813948532874374937190754 520 | #endif 521 | 522 | 523 | #ifndef CDFT_RECURSIVE_N /* length of the recursive FFT mode */ 524 | #define CDFT_RECURSIVE_N 512 /* <= (L1 cache size) / 16 */ 525 | #endif 526 | 527 | #ifndef CDFT_LOOP_DIV /* control of the CDFT's speed & tolerance */ 528 | #define CDFT_LOOP_DIV 32 529 | #endif 530 | 531 | #ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */ 532 | #define RDFT_LOOP_DIV 64 533 | #endif 534 | 535 | #ifndef DCST_LOOP_DIV /* control of the DCT,DST's speed & tolerance */ 536 | #define DCST_LOOP_DIV 64 537 | #endif 538 | 539 | 540 | void 541 | cftfsub (int n, double *a) 542 | { 543 | void bitrv2 (int n, double *a); 544 | void bitrv216 (double *a); 545 | void bitrv208 (double *a); 546 | void cftmdl1 (int n, double *a); 547 | void cftrec1 (int n, double *a); 548 | void cftrec2 (int n, double *a); 549 | void cftexp1 (int n, double *a); 550 | void cftfx41 (int n, double *a); 551 | void cftf161 (double *a); 552 | void cftf081 (double *a); 553 | void cftf040 (double *a); 554 | void cftx020 (double *a); 555 | int m; 556 | 557 | if (n > 32) 558 | { 559 | m = n >> 2; 560 | cftmdl1 (n, a); 561 | if (n > CDFT_RECURSIVE_N) 562 | { 563 | cftrec1 (m, a); 564 | cftrec2 (m, &a[m]); 565 | cftrec1 (m, &a[2 * m]); 566 | cftrec1 (m, &a[3 * m]); 567 | } 568 | else if (m > 32) 569 | { 570 | cftexp1 (n, a); 571 | } 572 | else 573 | { 574 | cftfx41 (n, a); 575 | } 576 | bitrv2 (n, a); 577 | } 578 | else if (n > 8) 579 | { 580 | if (n == 32) 581 | { 582 | cftf161 (a); 583 | bitrv216 (a); 584 | } 585 | else 586 | { 587 | cftf081 (a); 588 | bitrv208 (a); 589 | } 590 | } 591 | else if (n == 8) 592 | { 593 | cftf040 (a); 594 | } 595 | else if (n == 4) 596 | { 597 | cftx020 (a); 598 | } 599 | } 600 | 601 | 602 | void 603 | cftbsub (int n, double *a) 604 | { 605 | void bitrv2conj (int n, double *a); 606 | void bitrv216neg (double *a); 607 | void bitrv208neg (double *a); 608 | void cftb1st (int n, double *a); 609 | void cftrec1 (int n, double *a); 610 | void cftrec2 (int n, double *a); 611 | void cftexp1 (int n, double *a); 612 | void cftfx41 (int n, double *a); 613 | void cftf161 (double *a); 614 | void cftf081 (double *a); 615 | void cftb040 (double *a); 616 | void cftx020 (double *a); 617 | int m; 618 | 619 | if (n > 32) 620 | { 621 | m = n >> 2; 622 | cftb1st (n, a); 623 | if (n > CDFT_RECURSIVE_N) 624 | { 625 | cftrec1 (m, a); 626 | cftrec2 (m, &a[m]); 627 | cftrec1 (m, &a[2 * m]); 628 | cftrec1 (m, &a[3 * m]); 629 | } 630 | else if (m > 32) 631 | { 632 | cftexp1 (n, a); 633 | } 634 | else 635 | { 636 | cftfx41 (n, a); 637 | } 638 | bitrv2conj (n, a); 639 | } 640 | else if (n > 8) 641 | { 642 | if (n == 32) 643 | { 644 | cftf161 (a); 645 | bitrv216neg (a); 646 | } 647 | else 648 | { 649 | cftf081 (a); 650 | bitrv208neg (a); 651 | } 652 | } 653 | else if (n == 8) 654 | { 655 | cftb040 (a); 656 | } 657 | else if (n == 4) 658 | { 659 | cftx020 (a); 660 | } 661 | } 662 | 663 | 664 | void 665 | bitrv2 (int n, double *a) 666 | { 667 | int j0, k0, j1, k1, l, m, i, j, k; 668 | double xr, xi, yr, yi; 669 | 670 | l = n >> 2; 671 | m = 2; 672 | while (m < l) 673 | { 674 | l >>= 1; 675 | m <<= 1; 676 | } 677 | if (m == l) 678 | { 679 | j0 = 0; 680 | for (k0 = 0; k0 < m; k0 += 2) 681 | { 682 | k = k0; 683 | for (j = j0; j < j0 + k0; j += 2) 684 | { 685 | xr = a[j]; 686 | xi = a[j + 1]; 687 | yr = a[k]; 688 | yi = a[k + 1]; 689 | a[j] = yr; 690 | a[j + 1] = yi; 691 | a[k] = xr; 692 | a[k + 1] = xi; 693 | j1 = j + m; 694 | k1 = k + 2 * m; 695 | xr = a[j1]; 696 | xi = a[j1 + 1]; 697 | yr = a[k1]; 698 | yi = a[k1 + 1]; 699 | a[j1] = yr; 700 | a[j1 + 1] = yi; 701 | a[k1] = xr; 702 | a[k1 + 1] = xi; 703 | j1 += m; 704 | k1 -= m; 705 | xr = a[j1]; 706 | xi = a[j1 + 1]; 707 | yr = a[k1]; 708 | yi = a[k1 + 1]; 709 | a[j1] = yr; 710 | a[j1 + 1] = yi; 711 | a[k1] = xr; 712 | a[k1 + 1] = xi; 713 | j1 += m; 714 | k1 += 2 * m; 715 | xr = a[j1]; 716 | xi = a[j1 + 1]; 717 | yr = a[k1]; 718 | yi = a[k1 + 1]; 719 | a[j1] = yr; 720 | a[j1 + 1] = yi; 721 | a[k1] = xr; 722 | a[k1 + 1] = xi; 723 | for (i = n >> 1; i > (k ^= i); i >>= 1); 724 | } 725 | j1 = j0 + k0 + m; 726 | k1 = j1 + m; 727 | xr = a[j1]; 728 | xi = a[j1 + 1]; 729 | yr = a[k1]; 730 | yi = a[k1 + 1]; 731 | a[j1] = yr; 732 | a[j1 + 1] = yi; 733 | a[k1] = xr; 734 | a[k1 + 1] = xi; 735 | for (i = n >> 1; i > (j0 ^= i); i >>= 1); 736 | } 737 | } 738 | else 739 | { 740 | j0 = 0; 741 | for (k0 = 2; k0 < m; k0 += 2) 742 | { 743 | for (i = n >> 1; i > (j0 ^= i); i >>= 1); 744 | k = k0; 745 | for (j = j0; j < j0 + k0; j += 2) 746 | { 747 | xr = a[j]; 748 | xi = a[j + 1]; 749 | yr = a[k]; 750 | yi = a[k + 1]; 751 | a[j] = yr; 752 | a[j + 1] = yi; 753 | a[k] = xr; 754 | a[k + 1] = xi; 755 | j1 = j + m; 756 | k1 = k + m; 757 | xr = a[j1]; 758 | xi = a[j1 + 1]; 759 | yr = a[k1]; 760 | yi = a[k1 + 1]; 761 | a[j1] = yr; 762 | a[j1 + 1] = yi; 763 | a[k1] = xr; 764 | a[k1 + 1] = xi; 765 | for (i = n >> 1; i > (k ^= i); i >>= 1); 766 | } 767 | } 768 | } 769 | } 770 | 771 | 772 | void 773 | bitrv2conj (int n, double *a) 774 | { 775 | int j0, k0, j1, k1, l, m, i, j, k; 776 | double xr, xi, yr, yi; 777 | 778 | l = n >> 2; 779 | m = 2; 780 | while (m < l) 781 | { 782 | l >>= 1; 783 | m <<= 1; 784 | } 785 | if (m == l) 786 | { 787 | j0 = 0; 788 | for (k0 = 0; k0 < m; k0 += 2) 789 | { 790 | k = k0; 791 | for (j = j0; j < j0 + k0; j += 2) 792 | { 793 | xr = a[j]; 794 | xi = -a[j + 1]; 795 | yr = a[k]; 796 | yi = -a[k + 1]; 797 | a[j] = yr; 798 | a[j + 1] = yi; 799 | a[k] = xr; 800 | a[k + 1] = xi; 801 | j1 = j + m; 802 | k1 = k + 2 * m; 803 | xr = a[j1]; 804 | xi = -a[j1 + 1]; 805 | yr = a[k1]; 806 | yi = -a[k1 + 1]; 807 | a[j1] = yr; 808 | a[j1 + 1] = yi; 809 | a[k1] = xr; 810 | a[k1 + 1] = xi; 811 | j1 += m; 812 | k1 -= m; 813 | xr = a[j1]; 814 | xi = -a[j1 + 1]; 815 | yr = a[k1]; 816 | yi = -a[k1 + 1]; 817 | a[j1] = yr; 818 | a[j1 + 1] = yi; 819 | a[k1] = xr; 820 | a[k1 + 1] = xi; 821 | j1 += m; 822 | k1 += 2 * m; 823 | xr = a[j1]; 824 | xi = -a[j1 + 1]; 825 | yr = a[k1]; 826 | yi = -a[k1 + 1]; 827 | a[j1] = yr; 828 | a[j1 + 1] = yi; 829 | a[k1] = xr; 830 | a[k1 + 1] = xi; 831 | for (i = n >> 1; i > (k ^= i); i >>= 1); 832 | } 833 | k1 = j0 + k0; 834 | a[k1 + 1] = -a[k1 + 1]; 835 | j1 = k1 + m; 836 | k1 = j1 + m; 837 | xr = a[j1]; 838 | xi = -a[j1 + 1]; 839 | yr = a[k1]; 840 | yi = -a[k1 + 1]; 841 | a[j1] = yr; 842 | a[j1 + 1] = yi; 843 | a[k1] = xr; 844 | a[k1 + 1] = xi; 845 | k1 += m; 846 | a[k1 + 1] = -a[k1 + 1]; 847 | for (i = n >> 1; i > (j0 ^= i); i >>= 1); 848 | } 849 | } 850 | else 851 | { 852 | a[1] = -a[1]; 853 | a[m + 1] = -a[m + 1]; 854 | j0 = 0; 855 | for (k0 = 2; k0 < m; k0 += 2) 856 | { 857 | for (i = n >> 1; i > (j0 ^= i); i >>= 1); 858 | k = k0; 859 | for (j = j0; j < j0 + k0; j += 2) 860 | { 861 | xr = a[j]; 862 | xi = -a[j + 1]; 863 | yr = a[k]; 864 | yi = -a[k + 1]; 865 | a[j] = yr; 866 | a[j + 1] = yi; 867 | a[k] = xr; 868 | a[k + 1] = xi; 869 | j1 = j + m; 870 | k1 = k + m; 871 | xr = a[j1]; 872 | xi = -a[j1 + 1]; 873 | yr = a[k1]; 874 | yi = -a[k1 + 1]; 875 | a[j1] = yr; 876 | a[j1 + 1] = yi; 877 | a[k1] = xr; 878 | a[k1 + 1] = xi; 879 | for (i = n >> 1; i > (k ^= i); i >>= 1); 880 | } 881 | k1 = j0 + k0; 882 | a[k1 + 1] = -a[k1 + 1]; 883 | a[k1 + m + 1] = -a[k1 + m + 1]; 884 | } 885 | } 886 | } 887 | 888 | 889 | void 890 | bitrv216 (double *a) 891 | { 892 | double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 893 | x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, 894 | x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i; 895 | 896 | x1r = a[2]; 897 | x1i = a[3]; 898 | x2r = a[4]; 899 | x2i = a[5]; 900 | x3r = a[6]; 901 | x3i = a[7]; 902 | x4r = a[8]; 903 | x4i = a[9]; 904 | x5r = a[10]; 905 | x5i = a[11]; 906 | x7r = a[14]; 907 | x7i = a[15]; 908 | x8r = a[16]; 909 | x8i = a[17]; 910 | x10r = a[20]; 911 | x10i = a[21]; 912 | x11r = a[22]; 913 | x11i = a[23]; 914 | x12r = a[24]; 915 | x12i = a[25]; 916 | x13r = a[26]; 917 | x13i = a[27]; 918 | x14r = a[28]; 919 | x14i = a[29]; 920 | a[2] = x8r; 921 | a[3] = x8i; 922 | a[4] = x4r; 923 | a[5] = x4i; 924 | a[6] = x12r; 925 | a[7] = x12i; 926 | a[8] = x2r; 927 | a[9] = x2i; 928 | a[10] = x10r; 929 | a[11] = x10i; 930 | a[14] = x14r; 931 | a[15] = x14i; 932 | a[16] = x1r; 933 | a[17] = x1i; 934 | a[20] = x5r; 935 | a[21] = x5i; 936 | a[22] = x13r; 937 | a[23] = x13i; 938 | a[24] = x3r; 939 | a[25] = x3i; 940 | a[26] = x11r; 941 | a[27] = x11i; 942 | a[28] = x7r; 943 | a[29] = x7i; 944 | } 945 | 946 | 947 | void 948 | bitrv216neg (double *a) 949 | { 950 | double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 951 | x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, 952 | x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, 953 | x13r, x13i, x14r, x14i, x15r, x15i; 954 | 955 | x1r = a[2]; 956 | x1i = a[3]; 957 | x2r = a[4]; 958 | x2i = a[5]; 959 | x3r = a[6]; 960 | x3i = a[7]; 961 | x4r = a[8]; 962 | x4i = a[9]; 963 | x5r = a[10]; 964 | x5i = a[11]; 965 | x6r = a[12]; 966 | x6i = a[13]; 967 | x7r = a[14]; 968 | x7i = a[15]; 969 | x8r = a[16]; 970 | x8i = a[17]; 971 | x9r = a[18]; 972 | x9i = a[19]; 973 | x10r = a[20]; 974 | x10i = a[21]; 975 | x11r = a[22]; 976 | x11i = a[23]; 977 | x12r = a[24]; 978 | x12i = a[25]; 979 | x13r = a[26]; 980 | x13i = a[27]; 981 | x14r = a[28]; 982 | x14i = a[29]; 983 | x15r = a[30]; 984 | x15i = a[31]; 985 | a[2] = x15r; 986 | a[3] = x15i; 987 | a[4] = x7r; 988 | a[5] = x7i; 989 | a[6] = x11r; 990 | a[7] = x11i; 991 | a[8] = x3r; 992 | a[9] = x3i; 993 | a[10] = x13r; 994 | a[11] = x13i; 995 | a[12] = x5r; 996 | a[13] = x5i; 997 | a[14] = x9r; 998 | a[15] = x9i; 999 | a[16] = x1r; 1000 | a[17] = x1i; 1001 | a[18] = x14r; 1002 | a[19] = x14i; 1003 | a[20] = x6r; 1004 | a[21] = x6i; 1005 | a[22] = x10r; 1006 | a[23] = x10i; 1007 | a[24] = x2r; 1008 | a[25] = x2i; 1009 | a[26] = x12r; 1010 | a[27] = x12i; 1011 | a[28] = x4r; 1012 | a[29] = x4i; 1013 | a[30] = x8r; 1014 | a[31] = x8i; 1015 | } 1016 | 1017 | 1018 | void 1019 | bitrv208 (double *a) 1020 | { 1021 | double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i; 1022 | 1023 | x1r = a[2]; 1024 | x1i = a[3]; 1025 | x3r = a[6]; 1026 | x3i = a[7]; 1027 | x4r = a[8]; 1028 | x4i = a[9]; 1029 | x6r = a[12]; 1030 | x6i = a[13]; 1031 | a[2] = x4r; 1032 | a[3] = x4i; 1033 | a[6] = x6r; 1034 | a[7] = x6i; 1035 | a[8] = x1r; 1036 | a[9] = x1i; 1037 | a[12] = x3r; 1038 | a[13] = x3i; 1039 | } 1040 | 1041 | 1042 | void 1043 | bitrv208neg (double *a) 1044 | { 1045 | double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i; 1046 | 1047 | x1r = a[2]; 1048 | x1i = a[3]; 1049 | x2r = a[4]; 1050 | x2i = a[5]; 1051 | x3r = a[6]; 1052 | x3i = a[7]; 1053 | x4r = a[8]; 1054 | x4i = a[9]; 1055 | x5r = a[10]; 1056 | x5i = a[11]; 1057 | x6r = a[12]; 1058 | x6i = a[13]; 1059 | x7r = a[14]; 1060 | x7i = a[15]; 1061 | a[2] = x7r; 1062 | a[3] = x7i; 1063 | a[4] = x3r; 1064 | a[5] = x3i; 1065 | a[6] = x5r; 1066 | a[7] = x5i; 1067 | a[8] = x1r; 1068 | a[9] = x1i; 1069 | a[10] = x6r; 1070 | a[11] = x6i; 1071 | a[12] = x2r; 1072 | a[13] = x2i; 1073 | a[14] = x4r; 1074 | a[15] = x4i; 1075 | } 1076 | 1077 | 1078 | void 1079 | bitrv1 (int n, double *a) 1080 | { 1081 | int j0, k0, j1, k1, l, m, i, j, k; 1082 | double x; 1083 | 1084 | l = n >> 2; 1085 | m = 1; 1086 | while (m < l) 1087 | { 1088 | l >>= 1; 1089 | m <<= 1; 1090 | } 1091 | if (m == l) 1092 | { 1093 | j0 = 0; 1094 | for (k0 = 0; k0 < m; k0++) 1095 | { 1096 | k = k0; 1097 | for (j = j0; j < j0 + k0; j++) 1098 | { 1099 | x = a[j]; 1100 | a[j] = a[k]; 1101 | a[k] = x; 1102 | j1 = j + m; 1103 | k1 = k + 2 * m; 1104 | x = a[j1]; 1105 | a[j1] = a[k1]; 1106 | a[k1] = x; 1107 | j1 += m; 1108 | k1 -= m; 1109 | x = a[j1]; 1110 | a[j1] = a[k1]; 1111 | a[k1] = x; 1112 | j1 += m; 1113 | k1 += 2 * m; 1114 | x = a[j1]; 1115 | a[j1] = a[k1]; 1116 | a[k1] = x; 1117 | for (i = n >> 1; i > (k ^= i); i >>= 1); 1118 | } 1119 | j1 = j0 + k0 + m; 1120 | k1 = j1 + m; 1121 | x = a[j1]; 1122 | a[j1] = a[k1]; 1123 | a[k1] = x; 1124 | for (i = n >> 1; i > (j0 ^= i); i >>= 1); 1125 | } 1126 | } 1127 | else 1128 | { 1129 | j0 = 0; 1130 | for (k0 = 1; k0 < m; k0++) 1131 | { 1132 | for (i = n >> 1; i > (j0 ^= i); i >>= 1); 1133 | k = k0; 1134 | for (j = j0; j < j0 + k0; j++) 1135 | { 1136 | x = a[j]; 1137 | a[j] = a[k]; 1138 | a[k] = x; 1139 | j1 = j + m; 1140 | k1 = k + m; 1141 | x = a[j1]; 1142 | a[j1] = a[k1]; 1143 | a[k1] = x; 1144 | for (i = n >> 1; i > (k ^= i); i >>= 1); 1145 | } 1146 | } 1147 | } 1148 | } 1149 | 1150 | 1151 | void 1152 | cftb1st (int n, double *a) 1153 | { 1154 | int i, i0, j, j0, j1, j2, j3, m, mh; 1155 | double ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, 1156 | wd1r, wd1i, wd3r, wd3i, ss1, ss3; 1157 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1158 | 1159 | mh = n >> 3; 1160 | m = 2 * mh; 1161 | j1 = m; 1162 | j2 = j1 + m; 1163 | j3 = j2 + m; 1164 | x0r = a[0] + a[j2]; 1165 | x0i = -a[1] - a[j2 + 1]; 1166 | x1r = a[0] - a[j2]; 1167 | x1i = -a[1] + a[j2 + 1]; 1168 | x2r = a[j1] + a[j3]; 1169 | x2i = a[j1 + 1] + a[j3 + 1]; 1170 | x3r = a[j1] - a[j3]; 1171 | x3i = a[j1 + 1] - a[j3 + 1]; 1172 | a[0] = x0r + x2r; 1173 | a[1] = x0i - x2i; 1174 | a[j1] = x0r - x2r; 1175 | a[j1 + 1] = x0i + x2i; 1176 | a[j2] = x1r + x3i; 1177 | a[j2 + 1] = x1i + x3r; 1178 | a[j3] = x1r - x3i; 1179 | a[j3 + 1] = x1i - x3r; 1180 | wd1r = 1; 1181 | wd1i = 0; 1182 | wd3r = 1; 1183 | wd3i = 0; 1184 | ew = M_PI_2 / m; 1185 | w1r = cos (2 * ew); 1186 | w1i = sin (2 * ew); 1187 | wk1r = w1r; 1188 | wk1i = w1i; 1189 | ss1 = 2 * w1i; 1190 | wk3i = 2 * ss1 * wk1r; 1191 | wk3r = wk1r - wk3i * wk1i; 1192 | wk3i = wk1i - wk3i * wk1r; 1193 | ss3 = 2 * wk3i; 1194 | i = 0; 1195 | for (;;) 1196 | { 1197 | i0 = i + 4 * CDFT_LOOP_DIV; 1198 | if (i0 > mh - 4) 1199 | { 1200 | i0 = mh - 4; 1201 | } 1202 | for (j = i + 2; j < i0; j += 4) 1203 | { 1204 | wd1r -= ss1 * wk1i; 1205 | wd1i += ss1 * wk1r; 1206 | wd3r -= ss3 * wk3i; 1207 | wd3i += ss3 * wk3r; 1208 | j1 = j + m; 1209 | j2 = j1 + m; 1210 | j3 = j2 + m; 1211 | x0r = a[j] + a[j2]; 1212 | x0i = -a[j + 1] - a[j2 + 1]; 1213 | x1r = a[j] - a[j2]; 1214 | x1i = -a[j + 1] + a[j2 + 1]; 1215 | x2r = a[j1] + a[j3]; 1216 | x2i = a[j1 + 1] + a[j3 + 1]; 1217 | x3r = a[j1] - a[j3]; 1218 | x3i = a[j1 + 1] - a[j3 + 1]; 1219 | a[j] = x0r + x2r; 1220 | a[j + 1] = x0i - x2i; 1221 | a[j1] = x0r - x2r; 1222 | a[j1 + 1] = x0i + x2i; 1223 | x0r = x1r + x3i; 1224 | x0i = x1i + x3r; 1225 | a[j2] = wk1r * x0r - wk1i * x0i; 1226 | a[j2 + 1] = wk1r * x0i + wk1i * x0r; 1227 | x0r = x1r - x3i; 1228 | x0i = x1i - x3r; 1229 | a[j3] = wk3r * x0r + wk3i * x0i; 1230 | a[j3 + 1] = wk3r * x0i - wk3i * x0r; 1231 | x0r = a[j + 2] + a[j2 + 2]; 1232 | x0i = -a[j + 3] - a[j2 + 3]; 1233 | x1r = a[j + 2] - a[j2 + 2]; 1234 | x1i = -a[j + 3] + a[j2 + 3]; 1235 | x2r = a[j1 + 2] + a[j3 + 2]; 1236 | x2i = a[j1 + 3] + a[j3 + 3]; 1237 | x3r = a[j1 + 2] - a[j3 + 2]; 1238 | x3i = a[j1 + 3] - a[j3 + 3]; 1239 | a[j + 2] = x0r + x2r; 1240 | a[j + 3] = x0i - x2i; 1241 | a[j1 + 2] = x0r - x2r; 1242 | a[j1 + 3] = x0i + x2i; 1243 | x0r = x1r + x3i; 1244 | x0i = x1i + x3r; 1245 | a[j2 + 2] = wd1r * x0r - wd1i * x0i; 1246 | a[j2 + 3] = wd1r * x0i + wd1i * x0r; 1247 | x0r = x1r - x3i; 1248 | x0i = x1i - x3r; 1249 | a[j3 + 2] = wd3r * x0r + wd3i * x0i; 1250 | a[j3 + 3] = wd3r * x0i - wd3i * x0r; 1251 | j0 = m - j; 1252 | j1 = j0 + m; 1253 | j2 = j1 + m; 1254 | j3 = j2 + m; 1255 | x0r = a[j0] + a[j2]; 1256 | x0i = -a[j0 + 1] - a[j2 + 1]; 1257 | x1r = a[j0] - a[j2]; 1258 | x1i = -a[j0 + 1] + a[j2 + 1]; 1259 | x2r = a[j1] + a[j3]; 1260 | x2i = a[j1 + 1] + a[j3 + 1]; 1261 | x3r = a[j1] - a[j3]; 1262 | x3i = a[j1 + 1] - a[j3 + 1]; 1263 | a[j0] = x0r + x2r; 1264 | a[j0 + 1] = x0i - x2i; 1265 | a[j1] = x0r - x2r; 1266 | a[j1 + 1] = x0i + x2i; 1267 | x0r = x1r + x3i; 1268 | x0i = x1i + x3r; 1269 | a[j2] = wk1i * x0r - wk1r * x0i; 1270 | a[j2 + 1] = wk1i * x0i + wk1r * x0r; 1271 | x0r = x1r - x3i; 1272 | x0i = x1i - x3r; 1273 | a[j3] = wk3i * x0r + wk3r * x0i; 1274 | a[j3 + 1] = wk3i * x0i - wk3r * x0r; 1275 | x0r = a[j0 - 2] + a[j2 - 2]; 1276 | x0i = -a[j0 - 1] - a[j2 - 1]; 1277 | x1r = a[j0 - 2] - a[j2 - 2]; 1278 | x1i = -a[j0 - 1] + a[j2 - 1]; 1279 | x2r = a[j1 - 2] + a[j3 - 2]; 1280 | x2i = a[j1 - 1] + a[j3 - 1]; 1281 | x3r = a[j1 - 2] - a[j3 - 2]; 1282 | x3i = a[j1 - 1] - a[j3 - 1]; 1283 | a[j0 - 2] = x0r + x2r; 1284 | a[j0 - 1] = x0i - x2i; 1285 | a[j1 - 2] = x0r - x2r; 1286 | a[j1 - 1] = x0i + x2i; 1287 | x0r = x1r + x3i; 1288 | x0i = x1i + x3r; 1289 | a[j2 - 2] = wd1i * x0r - wd1r * x0i; 1290 | a[j2 - 1] = wd1i * x0i + wd1r * x0r; 1291 | x0r = x1r - x3i; 1292 | x0i = x1i - x3r; 1293 | a[j3 - 2] = wd3i * x0r + wd3r * x0i; 1294 | a[j3 - 1] = wd3i * x0i - wd3r * x0r; 1295 | wk1r -= ss1 * wd1i; 1296 | wk1i += ss1 * wd1r; 1297 | wk3r -= ss3 * wd3i; 1298 | wk3i += ss3 * wd3r; 1299 | } 1300 | if (i0 == mh - 4) 1301 | { 1302 | break; 1303 | } 1304 | wd1r = cos (ew * i0); 1305 | wd1i = sin (ew * i0); 1306 | wd3i = 4 * wd1i * wd1r; 1307 | wd3r = wd1r - wd3i * wd1i; 1308 | wd3i = wd1i - wd3i * wd1r; 1309 | wk1r = w1r * wd1r - w1i * wd1i; 1310 | wk1i = w1r * wd1i + w1i * wd1r; 1311 | wk3i = 4 * wk1i * wk1r; 1312 | wk3r = wk1r - wk3i * wk1i; 1313 | wk3i = wk1i - wk3i * wk1r; 1314 | i = i0; 1315 | } 1316 | wd1r -= ss1 * wk1i; 1317 | j0 = mh; 1318 | j1 = j0 + m; 1319 | j2 = j1 + m; 1320 | j3 = j2 + m; 1321 | x0r = a[j0 - 2] + a[j2 - 2]; 1322 | x0i = -a[j0 - 1] - a[j2 - 1]; 1323 | x1r = a[j0 - 2] - a[j2 - 2]; 1324 | x1i = -a[j0 - 1] + a[j2 - 1]; 1325 | x2r = a[j1 - 2] + a[j3 - 2]; 1326 | x2i = a[j1 - 1] + a[j3 - 1]; 1327 | x3r = a[j1 - 2] - a[j3 - 2]; 1328 | x3i = a[j1 - 1] - a[j3 - 1]; 1329 | a[j0 - 2] = x0r + x2r; 1330 | a[j0 - 1] = x0i - x2i; 1331 | a[j1 - 2] = x0r - x2r; 1332 | a[j1 - 1] = x0i + x2i; 1333 | x0r = x1r + x3i; 1334 | x0i = x1i + x3r; 1335 | a[j2 - 2] = wk1r * x0r - wk1i * x0i; 1336 | a[j2 - 1] = wk1r * x0i + wk1i * x0r; 1337 | x0r = x1r - x3i; 1338 | x0i = x1i - x3r; 1339 | a[j3 - 2] = wk3r * x0r + wk3i * x0i; 1340 | a[j3 - 1] = wk3r * x0i - wk3i * x0r; 1341 | x0r = a[j0] + a[j2]; 1342 | x0i = -a[j0 + 1] - a[j2 + 1]; 1343 | x1r = a[j0] - a[j2]; 1344 | x1i = -a[j0 + 1] + a[j2 + 1]; 1345 | x2r = a[j1] + a[j3]; 1346 | x2i = a[j1 + 1] + a[j3 + 1]; 1347 | x3r = a[j1] - a[j3]; 1348 | x3i = a[j1 + 1] - a[j3 + 1]; 1349 | a[j0] = x0r + x2r; 1350 | a[j0 + 1] = x0i - x2i; 1351 | a[j1] = x0r - x2r; 1352 | a[j1 + 1] = x0i + x2i; 1353 | x0r = x1r + x3i; 1354 | x0i = x1i + x3r; 1355 | a[j2] = wd1r * (x0r - x0i); 1356 | a[j2 + 1] = wd1r * (x0i + x0r); 1357 | x0r = x1r - x3i; 1358 | x0i = x1i - x3r; 1359 | a[j3] = -wd1r * (x0r + x0i); 1360 | a[j3 + 1] = -wd1r * (x0i - x0r); 1361 | x0r = a[j0 + 2] + a[j2 + 2]; 1362 | x0i = -a[j0 + 3] - a[j2 + 3]; 1363 | x1r = a[j0 + 2] - a[j2 + 2]; 1364 | x1i = -a[j0 + 3] + a[j2 + 3]; 1365 | x2r = a[j1 + 2] + a[j3 + 2]; 1366 | x2i = a[j1 + 3] + a[j3 + 3]; 1367 | x3r = a[j1 + 2] - a[j3 + 2]; 1368 | x3i = a[j1 + 3] - a[j3 + 3]; 1369 | a[j0 + 2] = x0r + x2r; 1370 | a[j0 + 3] = x0i - x2i; 1371 | a[j1 + 2] = x0r - x2r; 1372 | a[j1 + 3] = x0i + x2i; 1373 | x0r = x1r + x3i; 1374 | x0i = x1i + x3r; 1375 | a[j2 + 2] = wk1i * x0r - wk1r * x0i; 1376 | a[j2 + 3] = wk1i * x0i + wk1r * x0r; 1377 | x0r = x1r - x3i; 1378 | x0i = x1i - x3r; 1379 | a[j3 + 2] = wk3i * x0r + wk3r * x0i; 1380 | a[j3 + 3] = wk3i * x0i - wk3r * x0r; 1381 | } 1382 | 1383 | 1384 | void 1385 | cftrec1 (int n, double *a) 1386 | { 1387 | void cftrec1 (int n, double *a); 1388 | void cftrec2 (int n, double *a); 1389 | void cftmdl1 (int n, double *a); 1390 | void cftexp1 (int n, double *a); 1391 | int m; 1392 | 1393 | m = n >> 2; 1394 | cftmdl1 (n, a); 1395 | if (n > CDFT_RECURSIVE_N) 1396 | { 1397 | cftrec1 (m, a); 1398 | cftrec2 (m, &a[m]); 1399 | cftrec1 (m, &a[2 * m]); 1400 | cftrec1 (m, &a[3 * m]); 1401 | } 1402 | else 1403 | { 1404 | cftexp1 (n, a); 1405 | } 1406 | } 1407 | 1408 | 1409 | void 1410 | cftrec2 (int n, double *a) 1411 | { 1412 | void cftrec1 (int n, double *a); 1413 | void cftrec2 (int n, double *a); 1414 | void cftmdl2 (int n, double *a); 1415 | void cftexp2 (int n, double *a); 1416 | int m; 1417 | 1418 | m = n >> 2; 1419 | cftmdl2 (n, a); 1420 | if (n > CDFT_RECURSIVE_N) 1421 | { 1422 | cftrec1 (m, a); 1423 | cftrec2 (m, &a[m]); 1424 | cftrec1 (m, &a[2 * m]); 1425 | cftrec2 (m, &a[3 * m]); 1426 | } 1427 | else 1428 | { 1429 | cftexp2 (n, a); 1430 | } 1431 | } 1432 | 1433 | 1434 | void 1435 | cftexp1 (int n, double *a) 1436 | { 1437 | void cftmdl1 (int n, double *a); 1438 | void cftmdl2 (int n, double *a); 1439 | void cftfx41 (int n, double *a); 1440 | void cftfx42 (int n, double *a); 1441 | int j, k, l; 1442 | 1443 | l = n >> 2; 1444 | while (l > 128) 1445 | { 1446 | for (k = l; k < n; k <<= 2) 1447 | { 1448 | for (j = k - l; j < n; j += 4 * k) 1449 | { 1450 | cftmdl1 (l, &a[j]); 1451 | cftmdl2 (l, &a[k + j]); 1452 | cftmdl1 (l, &a[2 * k + j]); 1453 | } 1454 | } 1455 | cftmdl1 (l, &a[n - l]); 1456 | l >>= 2; 1457 | } 1458 | for (k = l; k < n; k <<= 2) 1459 | { 1460 | for (j = k - l; j < n; j += 4 * k) 1461 | { 1462 | cftmdl1 (l, &a[j]); 1463 | cftfx41 (l, &a[j]); 1464 | cftmdl2 (l, &a[k + j]); 1465 | cftfx42 (l, &a[k + j]); 1466 | cftmdl1 (l, &a[2 * k + j]); 1467 | cftfx41 (l, &a[2 * k + j]); 1468 | } 1469 | } 1470 | cftmdl1 (l, &a[n - l]); 1471 | cftfx41 (l, &a[n - l]); 1472 | } 1473 | 1474 | 1475 | void 1476 | cftexp2 (int n, double *a) 1477 | { 1478 | void cftmdl1 (int n, double *a); 1479 | void cftmdl2 (int n, double *a); 1480 | void cftfx41 (int n, double *a); 1481 | void cftfx42 (int n, double *a); 1482 | int j, k, l, m; 1483 | 1484 | m = n >> 1; 1485 | l = n >> 2; 1486 | while (l > 128) 1487 | { 1488 | for (k = l; k < m; k <<= 2) 1489 | { 1490 | for (j = k - l; j < m; j += 2 * k) 1491 | { 1492 | cftmdl1 (l, &a[j]); 1493 | cftmdl1 (l, &a[m + j]); 1494 | } 1495 | for (j = 2 * k - l; j < m; j += 4 * k) 1496 | { 1497 | cftmdl2 (l, &a[j]); 1498 | cftmdl2 (l, &a[m + j]); 1499 | } 1500 | } 1501 | l >>= 2; 1502 | } 1503 | for (k = l; k < m; k <<= 2) 1504 | { 1505 | for (j = k - l; j < m; j += 2 * k) 1506 | { 1507 | cftmdl1 (l, &a[j]); 1508 | cftfx41 (l, &a[j]); 1509 | cftmdl1 (l, &a[m + j]); 1510 | cftfx41 (l, &a[m + j]); 1511 | } 1512 | for (j = 2 * k - l; j < m; j += 4 * k) 1513 | { 1514 | cftmdl2 (l, &a[j]); 1515 | cftfx42 (l, &a[j]); 1516 | cftmdl2 (l, &a[m + j]); 1517 | cftfx42 (l, &a[m + j]); 1518 | } 1519 | } 1520 | } 1521 | 1522 | 1523 | void 1524 | cftmdl1 (int n, double *a) 1525 | { 1526 | int i, i0, j, j0, j1, j2, j3, m, mh; 1527 | double ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, 1528 | wd1r, wd1i, wd3r, wd3i, ss1, ss3; 1529 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1530 | 1531 | mh = n >> 3; 1532 | m = 2 * mh; 1533 | j1 = m; 1534 | j2 = j1 + m; 1535 | j3 = j2 + m; 1536 | x0r = a[0] + a[j2]; 1537 | x0i = a[1] + a[j2 + 1]; 1538 | x1r = a[0] - a[j2]; 1539 | x1i = a[1] - a[j2 + 1]; 1540 | x2r = a[j1] + a[j3]; 1541 | x2i = a[j1 + 1] + a[j3 + 1]; 1542 | x3r = a[j1] - a[j3]; 1543 | x3i = a[j1 + 1] - a[j3 + 1]; 1544 | a[0] = x0r + x2r; 1545 | a[1] = x0i + x2i; 1546 | a[j1] = x0r - x2r; 1547 | a[j1 + 1] = x0i - x2i; 1548 | a[j2] = x1r - x3i; 1549 | a[j2 + 1] = x1i + x3r; 1550 | a[j3] = x1r + x3i; 1551 | a[j3 + 1] = x1i - x3r; 1552 | wd1r = 1; 1553 | wd1i = 0; 1554 | wd3r = 1; 1555 | wd3i = 0; 1556 | ew = M_PI_2 / m; 1557 | w1r = cos (2 * ew); 1558 | w1i = sin (2 * ew); 1559 | wk1r = w1r; 1560 | wk1i = w1i; 1561 | ss1 = 2 * w1i; 1562 | wk3i = 2 * ss1 * wk1r; 1563 | wk3r = wk1r - wk3i * wk1i; 1564 | wk3i = wk1i - wk3i * wk1r; 1565 | ss3 = 2 * wk3i; 1566 | i = 0; 1567 | for (;;) 1568 | { 1569 | i0 = i + 4 * CDFT_LOOP_DIV; 1570 | if (i0 > mh - 4) 1571 | { 1572 | i0 = mh - 4; 1573 | } 1574 | for (j = i + 2; j < i0; j += 4) 1575 | { 1576 | wd1r -= ss1 * wk1i; 1577 | wd1i += ss1 * wk1r; 1578 | wd3r -= ss3 * wk3i; 1579 | wd3i += ss3 * wk3r; 1580 | j1 = j + m; 1581 | j2 = j1 + m; 1582 | j3 = j2 + m; 1583 | x0r = a[j] + a[j2]; 1584 | x0i = a[j + 1] + a[j2 + 1]; 1585 | x1r = a[j] - a[j2]; 1586 | x1i = a[j + 1] - a[j2 + 1]; 1587 | x2r = a[j1] + a[j3]; 1588 | x2i = a[j1 + 1] + a[j3 + 1]; 1589 | x3r = a[j1] - a[j3]; 1590 | x3i = a[j1 + 1] - a[j3 + 1]; 1591 | a[j] = x0r + x2r; 1592 | a[j + 1] = x0i + x2i; 1593 | a[j1] = x0r - x2r; 1594 | a[j1 + 1] = x0i - x2i; 1595 | x0r = x1r - x3i; 1596 | x0i = x1i + x3r; 1597 | a[j2] = wk1r * x0r - wk1i * x0i; 1598 | a[j2 + 1] = wk1r * x0i + wk1i * x0r; 1599 | x0r = x1r + x3i; 1600 | x0i = x1i - x3r; 1601 | a[j3] = wk3r * x0r + wk3i * x0i; 1602 | a[j3 + 1] = wk3r * x0i - wk3i * x0r; 1603 | x0r = a[j + 2] + a[j2 + 2]; 1604 | x0i = a[j + 3] + a[j2 + 3]; 1605 | x1r = a[j + 2] - a[j2 + 2]; 1606 | x1i = a[j + 3] - a[j2 + 3]; 1607 | x2r = a[j1 + 2] + a[j3 + 2]; 1608 | x2i = a[j1 + 3] + a[j3 + 3]; 1609 | x3r = a[j1 + 2] - a[j3 + 2]; 1610 | x3i = a[j1 + 3] - a[j3 + 3]; 1611 | a[j + 2] = x0r + x2r; 1612 | a[j + 3] = x0i + x2i; 1613 | a[j1 + 2] = x0r - x2r; 1614 | a[j1 + 3] = x0i - x2i; 1615 | x0r = x1r - x3i; 1616 | x0i = x1i + x3r; 1617 | a[j2 + 2] = wd1r * x0r - wd1i * x0i; 1618 | a[j2 + 3] = wd1r * x0i + wd1i * x0r; 1619 | x0r = x1r + x3i; 1620 | x0i = x1i - x3r; 1621 | a[j3 + 2] = wd3r * x0r + wd3i * x0i; 1622 | a[j3 + 3] = wd3r * x0i - wd3i * x0r; 1623 | j0 = m - j; 1624 | j1 = j0 + m; 1625 | j2 = j1 + m; 1626 | j3 = j2 + m; 1627 | x0r = a[j0] + a[j2]; 1628 | x0i = a[j0 + 1] + a[j2 + 1]; 1629 | x1r = a[j0] - a[j2]; 1630 | x1i = a[j0 + 1] - a[j2 + 1]; 1631 | x2r = a[j1] + a[j3]; 1632 | x2i = a[j1 + 1] + a[j3 + 1]; 1633 | x3r = a[j1] - a[j3]; 1634 | x3i = a[j1 + 1] - a[j3 + 1]; 1635 | a[j0] = x0r + x2r; 1636 | a[j0 + 1] = x0i + x2i; 1637 | a[j1] = x0r - x2r; 1638 | a[j1 + 1] = x0i - x2i; 1639 | x0r = x1r - x3i; 1640 | x0i = x1i + x3r; 1641 | a[j2] = wk1i * x0r - wk1r * x0i; 1642 | a[j2 + 1] = wk1i * x0i + wk1r * x0r; 1643 | x0r = x1r + x3i; 1644 | x0i = x1i - x3r; 1645 | a[j3] = wk3i * x0r + wk3r * x0i; 1646 | a[j3 + 1] = wk3i * x0i - wk3r * x0r; 1647 | x0r = a[j0 - 2] + a[j2 - 2]; 1648 | x0i = a[j0 - 1] + a[j2 - 1]; 1649 | x1r = a[j0 - 2] - a[j2 - 2]; 1650 | x1i = a[j0 - 1] - a[j2 - 1]; 1651 | x2r = a[j1 - 2] + a[j3 - 2]; 1652 | x2i = a[j1 - 1] + a[j3 - 1]; 1653 | x3r = a[j1 - 2] - a[j3 - 2]; 1654 | x3i = a[j1 - 1] - a[j3 - 1]; 1655 | a[j0 - 2] = x0r + x2r; 1656 | a[j0 - 1] = x0i + x2i; 1657 | a[j1 - 2] = x0r - x2r; 1658 | a[j1 - 1] = x0i - x2i; 1659 | x0r = x1r - x3i; 1660 | x0i = x1i + x3r; 1661 | a[j2 - 2] = wd1i * x0r - wd1r * x0i; 1662 | a[j2 - 1] = wd1i * x0i + wd1r * x0r; 1663 | x0r = x1r + x3i; 1664 | x0i = x1i - x3r; 1665 | a[j3 - 2] = wd3i * x0r + wd3r * x0i; 1666 | a[j3 - 1] = wd3i * x0i - wd3r * x0r; 1667 | wk1r -= ss1 * wd1i; 1668 | wk1i += ss1 * wd1r; 1669 | wk3r -= ss3 * wd3i; 1670 | wk3i += ss3 * wd3r; 1671 | } 1672 | if (i0 == mh - 4) 1673 | { 1674 | break; 1675 | } 1676 | wd1r = cos (ew * i0); 1677 | wd1i = sin (ew * i0); 1678 | wd3i = 4 * wd1i * wd1r; 1679 | wd3r = wd1r - wd3i * wd1i; 1680 | wd3i = wd1i - wd3i * wd1r; 1681 | wk1r = w1r * wd1r - w1i * wd1i; 1682 | wk1i = w1r * wd1i + w1i * wd1r; 1683 | wk3i = 4 * wk1i * wk1r; 1684 | wk3r = wk1r - wk3i * wk1i; 1685 | wk3i = wk1i - wk3i * wk1r; 1686 | i = i0; 1687 | } 1688 | wd1r -= ss1 * wk1i; 1689 | j0 = mh; 1690 | j1 = j0 + m; 1691 | j2 = j1 + m; 1692 | j3 = j2 + m; 1693 | x0r = a[j0 - 2] + a[j2 - 2]; 1694 | x0i = a[j0 - 1] + a[j2 - 1]; 1695 | x1r = a[j0 - 2] - a[j2 - 2]; 1696 | x1i = a[j0 - 1] - a[j2 - 1]; 1697 | x2r = a[j1 - 2] + a[j3 - 2]; 1698 | x2i = a[j1 - 1] + a[j3 - 1]; 1699 | x3r = a[j1 - 2] - a[j3 - 2]; 1700 | x3i = a[j1 - 1] - a[j3 - 1]; 1701 | a[j0 - 2] = x0r + x2r; 1702 | a[j0 - 1] = x0i + x2i; 1703 | a[j1 - 2] = x0r - x2r; 1704 | a[j1 - 1] = x0i - x2i; 1705 | x0r = x1r - x3i; 1706 | x0i = x1i + x3r; 1707 | a[j2 - 2] = wk1r * x0r - wk1i * x0i; 1708 | a[j2 - 1] = wk1r * x0i + wk1i * x0r; 1709 | x0r = x1r + x3i; 1710 | x0i = x1i - x3r; 1711 | a[j3 - 2] = wk3r * x0r + wk3i * x0i; 1712 | a[j3 - 1] = wk3r * x0i - wk3i * x0r; 1713 | x0r = a[j0] + a[j2]; 1714 | x0i = a[j0 + 1] + a[j2 + 1]; 1715 | x1r = a[j0] - a[j2]; 1716 | x1i = a[j0 + 1] - a[j2 + 1]; 1717 | x2r = a[j1] + a[j3]; 1718 | x2i = a[j1 + 1] + a[j3 + 1]; 1719 | x3r = a[j1] - a[j3]; 1720 | x3i = a[j1 + 1] - a[j3 + 1]; 1721 | a[j0] = x0r + x2r; 1722 | a[j0 + 1] = x0i + x2i; 1723 | a[j1] = x0r - x2r; 1724 | a[j1 + 1] = x0i - x2i; 1725 | x0r = x1r - x3i; 1726 | x0i = x1i + x3r; 1727 | a[j2] = wd1r * (x0r - x0i); 1728 | a[j2 + 1] = wd1r * (x0i + x0r); 1729 | x0r = x1r + x3i; 1730 | x0i = x1i - x3r; 1731 | a[j3] = -wd1r * (x0r + x0i); 1732 | a[j3 + 1] = -wd1r * (x0i - x0r); 1733 | x0r = a[j0 + 2] + a[j2 + 2]; 1734 | x0i = a[j0 + 3] + a[j2 + 3]; 1735 | x1r = a[j0 + 2] - a[j2 + 2]; 1736 | x1i = a[j0 + 3] - a[j2 + 3]; 1737 | x2r = a[j1 + 2] + a[j3 + 2]; 1738 | x2i = a[j1 + 3] + a[j3 + 3]; 1739 | x3r = a[j1 + 2] - a[j3 + 2]; 1740 | x3i = a[j1 + 3] - a[j3 + 3]; 1741 | a[j0 + 2] = x0r + x2r; 1742 | a[j0 + 3] = x0i + x2i; 1743 | a[j1 + 2] = x0r - x2r; 1744 | a[j1 + 3] = x0i - x2i; 1745 | x0r = x1r - x3i; 1746 | x0i = x1i + x3r; 1747 | a[j2 + 2] = wk1i * x0r - wk1r * x0i; 1748 | a[j2 + 3] = wk1i * x0i + wk1r * x0r; 1749 | x0r = x1r + x3i; 1750 | x0i = x1i - x3r; 1751 | a[j3 + 2] = wk3i * x0r + wk3r * x0i; 1752 | a[j3 + 3] = wk3i * x0i - wk3r * x0r; 1753 | } 1754 | 1755 | 1756 | void 1757 | cftmdl2 (int n, double *a) 1758 | { 1759 | int i, i0, j, j0, j1, j2, j3, m, mh; 1760 | double ew, w1r, w1i, wn4r, wk1r, wk1i, wk3r, wk3i, 1761 | wl1r, wl1i, wl3r, wl3i, wd1r, wd1i, wd3r, wd3i, 1762 | we1r, we1i, we3r, we3i, ss1, ss3; 1763 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i; 1764 | 1765 | mh = n >> 3; 1766 | m = 2 * mh; 1767 | wn4r = WR5000; 1768 | j1 = m; 1769 | j2 = j1 + m; 1770 | j3 = j2 + m; 1771 | x0r = a[0] - a[j2 + 1]; 1772 | x0i = a[1] + a[j2]; 1773 | x1r = a[0] + a[j2 + 1]; 1774 | x1i = a[1] - a[j2]; 1775 | x2r = a[j1] - a[j3 + 1]; 1776 | x2i = a[j1 + 1] + a[j3]; 1777 | x3r = a[j1] + a[j3 + 1]; 1778 | x3i = a[j1 + 1] - a[j3]; 1779 | y0r = wn4r * (x2r - x2i); 1780 | y0i = wn4r * (x2i + x2r); 1781 | a[0] = x0r + y0r; 1782 | a[1] = x0i + y0i; 1783 | a[j1] = x0r - y0r; 1784 | a[j1 + 1] = x0i - y0i; 1785 | y0r = wn4r * (x3r - x3i); 1786 | y0i = wn4r * (x3i + x3r); 1787 | a[j2] = x1r - y0i; 1788 | a[j2 + 1] = x1i + y0r; 1789 | a[j3] = x1r + y0i; 1790 | a[j3 + 1] = x1i - y0r; 1791 | wl1r = 1; 1792 | wl1i = 0; 1793 | wl3r = 1; 1794 | wl3i = 0; 1795 | we1r = wn4r; 1796 | we1i = wn4r; 1797 | we3r = -wn4r; 1798 | we3i = -wn4r; 1799 | ew = M_PI_2 / (2 * m); 1800 | w1r = cos (2 * ew); 1801 | w1i = sin (2 * ew); 1802 | wk1r = w1r; 1803 | wk1i = w1i; 1804 | wd1r = wn4r * (w1r - w1i); 1805 | wd1i = wn4r * (w1i + w1r); 1806 | ss1 = 2 * w1i; 1807 | wk3i = 2 * ss1 * wk1r; 1808 | wk3r = wk1r - wk3i * wk1i; 1809 | wk3i = wk1i - wk3i * wk1r; 1810 | ss3 = 2 * wk3i; 1811 | wd3r = -wn4r * (wk3r - wk3i); 1812 | wd3i = -wn4r * (wk3i + wk3r); 1813 | i = 0; 1814 | for (;;) 1815 | { 1816 | i0 = i + 4 * CDFT_LOOP_DIV; 1817 | if (i0 > mh - 4) 1818 | { 1819 | i0 = mh - 4; 1820 | } 1821 | for (j = i + 2; j < i0; j += 4) 1822 | { 1823 | wl1r -= ss1 * wk1i; 1824 | wl1i += ss1 * wk1r; 1825 | wl3r -= ss3 * wk3i; 1826 | wl3i += ss3 * wk3r; 1827 | we1r -= ss1 * wd1i; 1828 | we1i += ss1 * wd1r; 1829 | we3r -= ss3 * wd3i; 1830 | we3i += ss3 * wd3r; 1831 | j1 = j + m; 1832 | j2 = j1 + m; 1833 | j3 = j2 + m; 1834 | x0r = a[j] - a[j2 + 1]; 1835 | x0i = a[j + 1] + a[j2]; 1836 | x1r = a[j] + a[j2 + 1]; 1837 | x1i = a[j + 1] - a[j2]; 1838 | x2r = a[j1] - a[j3 + 1]; 1839 | x2i = a[j1 + 1] + a[j3]; 1840 | x3r = a[j1] + a[j3 + 1]; 1841 | x3i = a[j1 + 1] - a[j3]; 1842 | y0r = wk1r * x0r - wk1i * x0i; 1843 | y0i = wk1r * x0i + wk1i * x0r; 1844 | y2r = wd1r * x2r - wd1i * x2i; 1845 | y2i = wd1r * x2i + wd1i * x2r; 1846 | a[j] = y0r + y2r; 1847 | a[j + 1] = y0i + y2i; 1848 | a[j1] = y0r - y2r; 1849 | a[j1 + 1] = y0i - y2i; 1850 | y0r = wk3r * x1r + wk3i * x1i; 1851 | y0i = wk3r * x1i - wk3i * x1r; 1852 | y2r = wd3r * x3r + wd3i * x3i; 1853 | y2i = wd3r * x3i - wd3i * x3r; 1854 | a[j2] = y0r + y2r; 1855 | a[j2 + 1] = y0i + y2i; 1856 | a[j3] = y0r - y2r; 1857 | a[j3 + 1] = y0i - y2i; 1858 | x0r = a[j + 2] - a[j2 + 3]; 1859 | x0i = a[j + 3] + a[j2 + 2]; 1860 | x1r = a[j + 2] + a[j2 + 3]; 1861 | x1i = a[j + 3] - a[j2 + 2]; 1862 | x2r = a[j1 + 2] - a[j3 + 3]; 1863 | x2i = a[j1 + 3] + a[j3 + 2]; 1864 | x3r = a[j1 + 2] + a[j3 + 3]; 1865 | x3i = a[j1 + 3] - a[j3 + 2]; 1866 | y0r = wl1r * x0r - wl1i * x0i; 1867 | y0i = wl1r * x0i + wl1i * x0r; 1868 | y2r = we1r * x2r - we1i * x2i; 1869 | y2i = we1r * x2i + we1i * x2r; 1870 | a[j + 2] = y0r + y2r; 1871 | a[j + 3] = y0i + y2i; 1872 | a[j1 + 2] = y0r - y2r; 1873 | a[j1 + 3] = y0i - y2i; 1874 | y0r = wl3r * x1r + wl3i * x1i; 1875 | y0i = wl3r * x1i - wl3i * x1r; 1876 | y2r = we3r * x3r + we3i * x3i; 1877 | y2i = we3r * x3i - we3i * x3r; 1878 | a[j2 + 2] = y0r + y2r; 1879 | a[j2 + 3] = y0i + y2i; 1880 | a[j3 + 2] = y0r - y2r; 1881 | a[j3 + 3] = y0i - y2i; 1882 | j0 = m - j; 1883 | j1 = j0 + m; 1884 | j2 = j1 + m; 1885 | j3 = j2 + m; 1886 | x0r = a[j0] - a[j2 + 1]; 1887 | x0i = a[j0 + 1] + a[j2]; 1888 | x1r = a[j0] + a[j2 + 1]; 1889 | x1i = a[j0 + 1] - a[j2]; 1890 | x2r = a[j1] - a[j3 + 1]; 1891 | x2i = a[j1 + 1] + a[j3]; 1892 | x3r = a[j1] + a[j3 + 1]; 1893 | x3i = a[j1 + 1] - a[j3]; 1894 | y0r = wd1i * x0r - wd1r * x0i; 1895 | y0i = wd1i * x0i + wd1r * x0r; 1896 | y2r = wk1i * x2r - wk1r * x2i; 1897 | y2i = wk1i * x2i + wk1r * x2r; 1898 | a[j0] = y0r + y2r; 1899 | a[j0 + 1] = y0i + y2i; 1900 | a[j1] = y0r - y2r; 1901 | a[j1 + 1] = y0i - y2i; 1902 | y0r = wd3i * x1r + wd3r * x1i; 1903 | y0i = wd3i * x1i - wd3r * x1r; 1904 | y2r = wk3i * x3r + wk3r * x3i; 1905 | y2i = wk3i * x3i - wk3r * x3r; 1906 | a[j2] = y0r + y2r; 1907 | a[j2 + 1] = y0i + y2i; 1908 | a[j3] = y0r - y2r; 1909 | a[j3 + 1] = y0i - y2i; 1910 | x0r = a[j0 - 2] - a[j2 - 1]; 1911 | x0i = a[j0 - 1] + a[j2 - 2]; 1912 | x1r = a[j0 - 2] + a[j2 - 1]; 1913 | x1i = a[j0 - 1] - a[j2 - 2]; 1914 | x2r = a[j1 - 2] - a[j3 - 1]; 1915 | x2i = a[j1 - 1] + a[j3 - 2]; 1916 | x3r = a[j1 - 2] + a[j3 - 1]; 1917 | x3i = a[j1 - 1] - a[j3 - 2]; 1918 | y0r = we1i * x0r - we1r * x0i; 1919 | y0i = we1i * x0i + we1r * x0r; 1920 | y2r = wl1i * x2r - wl1r * x2i; 1921 | y2i = wl1i * x2i + wl1r * x2r; 1922 | a[j0 - 2] = y0r + y2r; 1923 | a[j0 - 1] = y0i + y2i; 1924 | a[j1 - 2] = y0r - y2r; 1925 | a[j1 - 1] = y0i - y2i; 1926 | y0r = we3i * x1r + we3r * x1i; 1927 | y0i = we3i * x1i - we3r * x1r; 1928 | y2r = wl3i * x3r + wl3r * x3i; 1929 | y2i = wl3i * x3i - wl3r * x3r; 1930 | a[j2 - 2] = y0r + y2r; 1931 | a[j2 - 1] = y0i + y2i; 1932 | a[j3 - 2] = y0r - y2r; 1933 | a[j3 - 1] = y0i - y2i; 1934 | wk1r -= ss1 * wl1i; 1935 | wk1i += ss1 * wl1r; 1936 | wk3r -= ss3 * wl3i; 1937 | wk3i += ss3 * wl3r; 1938 | wd1r -= ss1 * we1i; 1939 | wd1i += ss1 * we1r; 1940 | wd3r -= ss3 * we3i; 1941 | wd3i += ss3 * we3r; 1942 | } 1943 | if (i0 == mh - 4) 1944 | { 1945 | break; 1946 | } 1947 | wl1r = cos (ew * i0); 1948 | wl1i = sin (ew * i0); 1949 | wl3i = 4 * wl1i * wl1r; 1950 | wl3r = wl1r - wl3i * wl1i; 1951 | wl3i = wl1i - wl3i * wl1r; 1952 | we1r = wn4r * (wl1r - wl1i); 1953 | we1i = wn4r * (wl1i + wl1r); 1954 | we3r = -wn4r * (wl3r - wl3i); 1955 | we3i = -wn4r * (wl3i + wl3r); 1956 | wk1r = w1r * wl1r - w1i * wl1i; 1957 | wk1i = w1r * wl1i + w1i * wl1r; 1958 | wk3i = 4 * wk1i * wk1r; 1959 | wk3r = wk1r - wk3i * wk1i; 1960 | wk3i = wk1i - wk3i * wk1r; 1961 | wd1r = wn4r * (wk1r - wk1i); 1962 | wd1i = wn4r * (wk1i + wk1r); 1963 | wd3r = -wn4r * (wk3r - wk3i); 1964 | wd3i = -wn4r * (wk3i + wk3r); 1965 | i = i0; 1966 | } 1967 | wl1r -= ss1 * wk1i; 1968 | wl1i += ss1 * wk1r; 1969 | j0 = mh; 1970 | j1 = j0 + m; 1971 | j2 = j1 + m; 1972 | j3 = j2 + m; 1973 | x0r = a[j0 - 2] - a[j2 - 1]; 1974 | x0i = a[j0 - 1] + a[j2 - 2]; 1975 | x1r = a[j0 - 2] + a[j2 - 1]; 1976 | x1i = a[j0 - 1] - a[j2 - 2]; 1977 | x2r = a[j1 - 2] - a[j3 - 1]; 1978 | x2i = a[j1 - 1] + a[j3 - 2]; 1979 | x3r = a[j1 - 2] + a[j3 - 1]; 1980 | x3i = a[j1 - 1] - a[j3 - 2]; 1981 | y0r = wk1r * x0r - wk1i * x0i; 1982 | y0i = wk1r * x0i + wk1i * x0r; 1983 | y2r = wd1r * x2r - wd1i * x2i; 1984 | y2i = wd1r * x2i + wd1i * x2r; 1985 | a[j0 - 2] = y0r + y2r; 1986 | a[j0 - 1] = y0i + y2i; 1987 | a[j1 - 2] = y0r - y2r; 1988 | a[j1 - 1] = y0i - y2i; 1989 | y0r = wk3r * x1r + wk3i * x1i; 1990 | y0i = wk3r * x1i - wk3i * x1r; 1991 | y2r = wd3r * x3r + wd3i * x3i; 1992 | y2i = wd3r * x3i - wd3i * x3r; 1993 | a[j2 - 2] = y0r + y2r; 1994 | a[j2 - 1] = y0i + y2i; 1995 | a[j3 - 2] = y0r - y2r; 1996 | a[j3 - 1] = y0i - y2i; 1997 | x0r = a[j0] - a[j2 + 1]; 1998 | x0i = a[j0 + 1] + a[j2]; 1999 | x1r = a[j0] + a[j2 + 1]; 2000 | x1i = a[j0 + 1] - a[j2]; 2001 | x2r = a[j1] - a[j3 + 1]; 2002 | x2i = a[j1 + 1] + a[j3]; 2003 | x3r = a[j1] + a[j3 + 1]; 2004 | x3i = a[j1 + 1] - a[j3]; 2005 | y0r = wl1r * x0r - wl1i * x0i; 2006 | y0i = wl1r * x0i + wl1i * x0r; 2007 | y2r = wl1i * x2r - wl1r * x2i; 2008 | y2i = wl1i * x2i + wl1r * x2r; 2009 | a[j0] = y0r + y2r; 2010 | a[j0 + 1] = y0i + y2i; 2011 | a[j1] = y0r - y2r; 2012 | a[j1 + 1] = y0i - y2i; 2013 | y0r = wl1i * x1r - wl1r * x1i; 2014 | y0i = wl1i * x1i + wl1r * x1r; 2015 | y2r = wl1r * x3r - wl1i * x3i; 2016 | y2i = wl1r * x3i + wl1i * x3r; 2017 | a[j2] = y0r - y2r; 2018 | a[j2 + 1] = y0i - y2i; 2019 | a[j3] = y0r + y2r; 2020 | a[j3 + 1] = y0i + y2i; 2021 | x0r = a[j0 + 2] - a[j2 + 3]; 2022 | x0i = a[j0 + 3] + a[j2 + 2]; 2023 | x1r = a[j0 + 2] + a[j2 + 3]; 2024 | x1i = a[j0 + 3] - a[j2 + 2]; 2025 | x2r = a[j1 + 2] - a[j3 + 3]; 2026 | x2i = a[j1 + 3] + a[j3 + 2]; 2027 | x3r = a[j1 + 2] + a[j3 + 3]; 2028 | x3i = a[j1 + 3] - a[j3 + 2]; 2029 | y0r = wd1i * x0r - wd1r * x0i; 2030 | y0i = wd1i * x0i + wd1r * x0r; 2031 | y2r = wk1i * x2r - wk1r * x2i; 2032 | y2i = wk1i * x2i + wk1r * x2r; 2033 | a[j0 + 2] = y0r + y2r; 2034 | a[j0 + 3] = y0i + y2i; 2035 | a[j1 + 2] = y0r - y2r; 2036 | a[j1 + 3] = y0i - y2i; 2037 | y0r = wd3i * x1r + wd3r * x1i; 2038 | y0i = wd3i * x1i - wd3r * x1r; 2039 | y2r = wk3i * x3r + wk3r * x3i; 2040 | y2i = wk3i * x3i - wk3r * x3r; 2041 | a[j2 + 2] = y0r + y2r; 2042 | a[j2 + 3] = y0i + y2i; 2043 | a[j3 + 2] = y0r - y2r; 2044 | a[j3 + 3] = y0i - y2i; 2045 | } 2046 | 2047 | 2048 | void 2049 | cftfx41 (int n, double *a) 2050 | { 2051 | void cftf161 (double *a); 2052 | void cftf162 (double *a); 2053 | void cftf081 (double *a); 2054 | void cftf082 (double *a); 2055 | 2056 | if (n == 128) 2057 | { 2058 | cftf161 (a); 2059 | cftf162 (&a[32]); 2060 | cftf161 (&a[64]); 2061 | cftf161 (&a[96]); 2062 | } 2063 | else 2064 | { 2065 | cftf081 (a); 2066 | cftf082 (&a[16]); 2067 | cftf081 (&a[32]); 2068 | cftf081 (&a[48]); 2069 | } 2070 | } 2071 | 2072 | 2073 | void 2074 | cftfx42 (int n, double *a) 2075 | { 2076 | void cftf161 (double *a); 2077 | void cftf162 (double *a); 2078 | void cftf081 (double *a); 2079 | void cftf082 (double *a); 2080 | 2081 | if (n == 128) 2082 | { 2083 | cftf161 (a); 2084 | cftf162 (&a[32]); 2085 | cftf161 (&a[64]); 2086 | cftf162 (&a[96]); 2087 | } 2088 | else 2089 | { 2090 | cftf081 (a); 2091 | cftf082 (&a[16]); 2092 | cftf081 (&a[32]); 2093 | cftf082 (&a[48]); 2094 | } 2095 | } 2096 | 2097 | 2098 | void 2099 | cftf161 (double *a) 2100 | { 2101 | double wn4r, wk1r, wk1i, 2102 | x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 2103 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 2104 | y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, 2105 | y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, 2106 | y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; 2107 | 2108 | wn4r = WR5000; 2109 | wk1r = WR2500; 2110 | wk1i = WI2500; 2111 | x0r = a[0] + a[16]; 2112 | x0i = a[1] + a[17]; 2113 | x1r = a[0] - a[16]; 2114 | x1i = a[1] - a[17]; 2115 | x2r = a[8] + a[24]; 2116 | x2i = a[9] + a[25]; 2117 | x3r = a[8] - a[24]; 2118 | x3i = a[9] - a[25]; 2119 | y0r = x0r + x2r; 2120 | y0i = x0i + x2i; 2121 | y4r = x0r - x2r; 2122 | y4i = x0i - x2i; 2123 | y8r = x1r - x3i; 2124 | y8i = x1i + x3r; 2125 | y12r = x1r + x3i; 2126 | y12i = x1i - x3r; 2127 | x0r = a[2] + a[18]; 2128 | x0i = a[3] + a[19]; 2129 | x1r = a[2] - a[18]; 2130 | x1i = a[3] - a[19]; 2131 | x2r = a[10] + a[26]; 2132 | x2i = a[11] + a[27]; 2133 | x3r = a[10] - a[26]; 2134 | x3i = a[11] - a[27]; 2135 | y1r = x0r + x2r; 2136 | y1i = x0i + x2i; 2137 | y5r = x0r - x2r; 2138 | y5i = x0i - x2i; 2139 | x0r = x1r - x3i; 2140 | x0i = x1i + x3r; 2141 | y9r = wk1r * x0r - wk1i * x0i; 2142 | y9i = wk1r * x0i + wk1i * x0r; 2143 | x0r = x1r + x3i; 2144 | x0i = x1i - x3r; 2145 | y13r = wk1i * x0r - wk1r * x0i; 2146 | y13i = wk1i * x0i + wk1r * x0r; 2147 | x0r = a[4] + a[20]; 2148 | x0i = a[5] + a[21]; 2149 | x1r = a[4] - a[20]; 2150 | x1i = a[5] - a[21]; 2151 | x2r = a[12] + a[28]; 2152 | x2i = a[13] + a[29]; 2153 | x3r = a[12] - a[28]; 2154 | x3i = a[13] - a[29]; 2155 | y2r = x0r + x2r; 2156 | y2i = x0i + x2i; 2157 | y6r = x0r - x2r; 2158 | y6i = x0i - x2i; 2159 | x0r = x1r - x3i; 2160 | x0i = x1i + x3r; 2161 | y10r = wn4r * (x0r - x0i); 2162 | y10i = wn4r * (x0i + x0r); 2163 | x0r = x1r + x3i; 2164 | x0i = x1i - x3r; 2165 | y14r = wn4r * (x0r + x0i); 2166 | y14i = wn4r * (x0i - x0r); 2167 | x0r = a[6] + a[22]; 2168 | x0i = a[7] + a[23]; 2169 | x1r = a[6] - a[22]; 2170 | x1i = a[7] - a[23]; 2171 | x2r = a[14] + a[30]; 2172 | x2i = a[15] + a[31]; 2173 | x3r = a[14] - a[30]; 2174 | x3i = a[15] - a[31]; 2175 | y3r = x0r + x2r; 2176 | y3i = x0i + x2i; 2177 | y7r = x0r - x2r; 2178 | y7i = x0i - x2i; 2179 | x0r = x1r - x3i; 2180 | x0i = x1i + x3r; 2181 | y11r = wk1i * x0r - wk1r * x0i; 2182 | y11i = wk1i * x0i + wk1r * x0r; 2183 | x0r = x1r + x3i; 2184 | x0i = x1i - x3r; 2185 | y15r = wk1r * x0r - wk1i * x0i; 2186 | y15i = wk1r * x0i + wk1i * x0r; 2187 | x0r = y12r - y14r; 2188 | x0i = y12i - y14i; 2189 | x1r = y12r + y14r; 2190 | x1i = y12i + y14i; 2191 | x2r = y13r - y15r; 2192 | x2i = y13i - y15i; 2193 | x3r = y13r + y15r; 2194 | x3i = y13i + y15i; 2195 | a[24] = x0r + x2r; 2196 | a[25] = x0i + x2i; 2197 | a[26] = x0r - x2r; 2198 | a[27] = x0i - x2i; 2199 | a[28] = x1r - x3i; 2200 | a[29] = x1i + x3r; 2201 | a[30] = x1r + x3i; 2202 | a[31] = x1i - x3r; 2203 | x0r = y8r + y10r; 2204 | x0i = y8i + y10i; 2205 | x1r = y8r - y10r; 2206 | x1i = y8i - y10i; 2207 | x2r = y9r + y11r; 2208 | x2i = y9i + y11i; 2209 | x3r = y9r - y11r; 2210 | x3i = y9i - y11i; 2211 | a[16] = x0r + x2r; 2212 | a[17] = x0i + x2i; 2213 | a[18] = x0r - x2r; 2214 | a[19] = x0i - x2i; 2215 | a[20] = x1r - x3i; 2216 | a[21] = x1i + x3r; 2217 | a[22] = x1r + x3i; 2218 | a[23] = x1i - x3r; 2219 | x0r = y5r - y7i; 2220 | x0i = y5i + y7r; 2221 | x2r = wn4r * (x0r - x0i); 2222 | x2i = wn4r * (x0i + x0r); 2223 | x0r = y5r + y7i; 2224 | x0i = y5i - y7r; 2225 | x3r = wn4r * (x0r - x0i); 2226 | x3i = wn4r * (x0i + x0r); 2227 | x0r = y4r - y6i; 2228 | x0i = y4i + y6r; 2229 | x1r = y4r + y6i; 2230 | x1i = y4i - y6r; 2231 | a[8] = x0r + x2r; 2232 | a[9] = x0i + x2i; 2233 | a[10] = x0r - x2r; 2234 | a[11] = x0i - x2i; 2235 | a[12] = x1r - x3i; 2236 | a[13] = x1i + x3r; 2237 | a[14] = x1r + x3i; 2238 | a[15] = x1i - x3r; 2239 | x0r = y0r + y2r; 2240 | x0i = y0i + y2i; 2241 | x1r = y0r - y2r; 2242 | x1i = y0i - y2i; 2243 | x2r = y1r + y3r; 2244 | x2i = y1i + y3i; 2245 | x3r = y1r - y3r; 2246 | x3i = y1i - y3i; 2247 | a[0] = x0r + x2r; 2248 | a[1] = x0i + x2i; 2249 | a[2] = x0r - x2r; 2250 | a[3] = x0i - x2i; 2251 | a[4] = x1r - x3i; 2252 | a[5] = x1i + x3r; 2253 | a[6] = x1r + x3i; 2254 | a[7] = x1i - x3r; 2255 | } 2256 | 2257 | 2258 | void 2259 | cftf162 (double *a) 2260 | { 2261 | double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, 2262 | x0r, x0i, x1r, x1i, x2r, x2i, 2263 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 2264 | y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, 2265 | y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, 2266 | y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; 2267 | 2268 | wn4r = WR5000; 2269 | wk1r = WR1250; 2270 | wk1i = WI1250; 2271 | wk2r = WR2500; 2272 | wk2i = WI2500; 2273 | wk3r = WR3750; 2274 | wk3i = WI3750; 2275 | x1r = a[0] - a[17]; 2276 | x1i = a[1] + a[16]; 2277 | x0r = a[8] - a[25]; 2278 | x0i = a[9] + a[24]; 2279 | x2r = wn4r * (x0r - x0i); 2280 | x2i = wn4r * (x0i + x0r); 2281 | y0r = x1r + x2r; 2282 | y0i = x1i + x2i; 2283 | y4r = x1r - x2r; 2284 | y4i = x1i - x2i; 2285 | x1r = a[0] + a[17]; 2286 | x1i = a[1] - a[16]; 2287 | x0r = a[8] + a[25]; 2288 | x0i = a[9] - a[24]; 2289 | x2r = wn4r * (x0r - x0i); 2290 | x2i = wn4r * (x0i + x0r); 2291 | y8r = x1r - x2i; 2292 | y8i = x1i + x2r; 2293 | y12r = x1r + x2i; 2294 | y12i = x1i - x2r; 2295 | x0r = a[2] - a[19]; 2296 | x0i = a[3] + a[18]; 2297 | x1r = wk1r * x0r - wk1i * x0i; 2298 | x1i = wk1r * x0i + wk1i * x0r; 2299 | x0r = a[10] - a[27]; 2300 | x0i = a[11] + a[26]; 2301 | x2r = wk3i * x0r - wk3r * x0i; 2302 | x2i = wk3i * x0i + wk3r * x0r; 2303 | y1r = x1r + x2r; 2304 | y1i = x1i + x2i; 2305 | y5r = x1r - x2r; 2306 | y5i = x1i - x2i; 2307 | x0r = a[2] + a[19]; 2308 | x0i = a[3] - a[18]; 2309 | x1r = wk3r * x0r - wk3i * x0i; 2310 | x1i = wk3r * x0i + wk3i * x0r; 2311 | x0r = a[10] + a[27]; 2312 | x0i = a[11] - a[26]; 2313 | x2r = wk1r * x0r + wk1i * x0i; 2314 | x2i = wk1r * x0i - wk1i * x0r; 2315 | y9r = x1r - x2r; 2316 | y9i = x1i - x2i; 2317 | y13r = x1r + x2r; 2318 | y13i = x1i + x2i; 2319 | x0r = a[4] - a[21]; 2320 | x0i = a[5] + a[20]; 2321 | x1r = wk2r * x0r - wk2i * x0i; 2322 | x1i = wk2r * x0i + wk2i * x0r; 2323 | x0r = a[12] - a[29]; 2324 | x0i = a[13] + a[28]; 2325 | x2r = wk2i * x0r - wk2r * x0i; 2326 | x2i = wk2i * x0i + wk2r * x0r; 2327 | y2r = x1r + x2r; 2328 | y2i = x1i + x2i; 2329 | y6r = x1r - x2r; 2330 | y6i = x1i - x2i; 2331 | x0r = a[4] + a[21]; 2332 | x0i = a[5] - a[20]; 2333 | x1r = wk2i * x0r - wk2r * x0i; 2334 | x1i = wk2i * x0i + wk2r * x0r; 2335 | x0r = a[12] + a[29]; 2336 | x0i = a[13] - a[28]; 2337 | x2r = wk2r * x0r - wk2i * x0i; 2338 | x2i = wk2r * x0i + wk2i * x0r; 2339 | y10r = x1r - x2r; 2340 | y10i = x1i - x2i; 2341 | y14r = x1r + x2r; 2342 | y14i = x1i + x2i; 2343 | x0r = a[6] - a[23]; 2344 | x0i = a[7] + a[22]; 2345 | x1r = wk3r * x0r - wk3i * x0i; 2346 | x1i = wk3r * x0i + wk3i * x0r; 2347 | x0r = a[14] - a[31]; 2348 | x0i = a[15] + a[30]; 2349 | x2r = wk1i * x0r - wk1r * x0i; 2350 | x2i = wk1i * x0i + wk1r * x0r; 2351 | y3r = x1r + x2r; 2352 | y3i = x1i + x2i; 2353 | y7r = x1r - x2r; 2354 | y7i = x1i - x2i; 2355 | x0r = a[6] + a[23]; 2356 | x0i = a[7] - a[22]; 2357 | x1r = wk1i * x0r + wk1r * x0i; 2358 | x1i = wk1i * x0i - wk1r * x0r; 2359 | x0r = a[14] + a[31]; 2360 | x0i = a[15] - a[30]; 2361 | x2r = wk3i * x0r - wk3r * x0i; 2362 | x2i = wk3i * x0i + wk3r * x0r; 2363 | y11r = x1r + x2r; 2364 | y11i = x1i + x2i; 2365 | y15r = x1r - x2r; 2366 | y15i = x1i - x2i; 2367 | x1r = y0r + y2r; 2368 | x1i = y0i + y2i; 2369 | x2r = y1r + y3r; 2370 | x2i = y1i + y3i; 2371 | a[0] = x1r + x2r; 2372 | a[1] = x1i + x2i; 2373 | a[2] = x1r - x2r; 2374 | a[3] = x1i - x2i; 2375 | x1r = y0r - y2r; 2376 | x1i = y0i - y2i; 2377 | x2r = y1r - y3r; 2378 | x2i = y1i - y3i; 2379 | a[4] = x1r - x2i; 2380 | a[5] = x1i + x2r; 2381 | a[6] = x1r + x2i; 2382 | a[7] = x1i - x2r; 2383 | x1r = y4r - y6i; 2384 | x1i = y4i + y6r; 2385 | x0r = y5r - y7i; 2386 | x0i = y5i + y7r; 2387 | x2r = wn4r * (x0r - x0i); 2388 | x2i = wn4r * (x0i + x0r); 2389 | a[8] = x1r + x2r; 2390 | a[9] = x1i + x2i; 2391 | a[10] = x1r - x2r; 2392 | a[11] = x1i - x2i; 2393 | x1r = y4r + y6i; 2394 | x1i = y4i - y6r; 2395 | x0r = y5r + y7i; 2396 | x0i = y5i - y7r; 2397 | x2r = wn4r * (x0r - x0i); 2398 | x2i = wn4r * (x0i + x0r); 2399 | a[12] = x1r - x2i; 2400 | a[13] = x1i + x2r; 2401 | a[14] = x1r + x2i; 2402 | a[15] = x1i - x2r; 2403 | x1r = y8r + y10r; 2404 | x1i = y8i + y10i; 2405 | x2r = y9r - y11r; 2406 | x2i = y9i - y11i; 2407 | a[16] = x1r + x2r; 2408 | a[17] = x1i + x2i; 2409 | a[18] = x1r - x2r; 2410 | a[19] = x1i - x2i; 2411 | x1r = y8r - y10r; 2412 | x1i = y8i - y10i; 2413 | x2r = y9r + y11r; 2414 | x2i = y9i + y11i; 2415 | a[20] = x1r - x2i; 2416 | a[21] = x1i + x2r; 2417 | a[22] = x1r + x2i; 2418 | a[23] = x1i - x2r; 2419 | x1r = y12r - y14i; 2420 | x1i = y12i + y14r; 2421 | x0r = y13r + y15i; 2422 | x0i = y13i - y15r; 2423 | x2r = wn4r * (x0r - x0i); 2424 | x2i = wn4r * (x0i + x0r); 2425 | a[24] = x1r + x2r; 2426 | a[25] = x1i + x2i; 2427 | a[26] = x1r - x2r; 2428 | a[27] = x1i - x2i; 2429 | x1r = y12r + y14i; 2430 | x1i = y12i - y14r; 2431 | x0r = y13r - y15i; 2432 | x0i = y13i + y15r; 2433 | x2r = wn4r * (x0r - x0i); 2434 | x2i = wn4r * (x0i + x0r); 2435 | a[28] = x1r - x2i; 2436 | a[29] = x1i + x2r; 2437 | a[30] = x1r + x2i; 2438 | a[31] = x1i - x2r; 2439 | } 2440 | 2441 | 2442 | void 2443 | cftf081 (double *a) 2444 | { 2445 | double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 2446 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 2447 | y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; 2448 | 2449 | wn4r = WR5000; 2450 | x0r = a[0] + a[8]; 2451 | x0i = a[1] + a[9]; 2452 | x1r = a[0] - a[8]; 2453 | x1i = a[1] - a[9]; 2454 | x2r = a[4] + a[12]; 2455 | x2i = a[5] + a[13]; 2456 | x3r = a[4] - a[12]; 2457 | x3i = a[5] - a[13]; 2458 | y0r = x0r + x2r; 2459 | y0i = x0i + x2i; 2460 | y2r = x0r - x2r; 2461 | y2i = x0i - x2i; 2462 | y1r = x1r - x3i; 2463 | y1i = x1i + x3r; 2464 | y3r = x1r + x3i; 2465 | y3i = x1i - x3r; 2466 | x0r = a[2] + a[10]; 2467 | x0i = a[3] + a[11]; 2468 | x1r = a[2] - a[10]; 2469 | x1i = a[3] - a[11]; 2470 | x2r = a[6] + a[14]; 2471 | x2i = a[7] + a[15]; 2472 | x3r = a[6] - a[14]; 2473 | x3i = a[7] - a[15]; 2474 | y4r = x0r + x2r; 2475 | y4i = x0i + x2i; 2476 | y6r = x0r - x2r; 2477 | y6i = x0i - x2i; 2478 | x0r = x1r - x3i; 2479 | x0i = x1i + x3r; 2480 | x2r = x1r + x3i; 2481 | x2i = x1i - x3r; 2482 | y5r = wn4r * (x0r - x0i); 2483 | y5i = wn4r * (x0r + x0i); 2484 | y7r = wn4r * (x2r - x2i); 2485 | y7i = wn4r * (x2r + x2i); 2486 | a[8] = y1r + y5r; 2487 | a[9] = y1i + y5i; 2488 | a[10] = y1r - y5r; 2489 | a[11] = y1i - y5i; 2490 | a[12] = y3r - y7i; 2491 | a[13] = y3i + y7r; 2492 | a[14] = y3r + y7i; 2493 | a[15] = y3i - y7r; 2494 | a[0] = y0r + y4r; 2495 | a[1] = y0i + y4i; 2496 | a[2] = y0r - y4r; 2497 | a[3] = y0i - y4i; 2498 | a[4] = y2r - y6i; 2499 | a[5] = y2i + y6r; 2500 | a[6] = y2r + y6i; 2501 | a[7] = y2i - y6r; 2502 | } 2503 | 2504 | 2505 | void 2506 | cftf082 (double *a) 2507 | { 2508 | double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, 2509 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 2510 | y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; 2511 | 2512 | wn4r = WR5000; 2513 | wk1r = WR2500; 2514 | wk1i = WI2500; 2515 | y0r = a[0] - a[9]; 2516 | y0i = a[1] + a[8]; 2517 | y1r = a[0] + a[9]; 2518 | y1i = a[1] - a[8]; 2519 | x0r = a[4] - a[13]; 2520 | x0i = a[5] + a[12]; 2521 | y2r = wn4r * (x0r - x0i); 2522 | y2i = wn4r * (x0i + x0r); 2523 | x0r = a[4] + a[13]; 2524 | x0i = a[5] - a[12]; 2525 | y3r = wn4r * (x0r - x0i); 2526 | y3i = wn4r * (x0i + x0r); 2527 | x0r = a[2] - a[11]; 2528 | x0i = a[3] + a[10]; 2529 | y4r = wk1r * x0r - wk1i * x0i; 2530 | y4i = wk1r * x0i + wk1i * x0r; 2531 | x0r = a[2] + a[11]; 2532 | x0i = a[3] - a[10]; 2533 | y5r = wk1i * x0r - wk1r * x0i; 2534 | y5i = wk1i * x0i + wk1r * x0r; 2535 | x0r = a[6] - a[15]; 2536 | x0i = a[7] + a[14]; 2537 | y6r = wk1i * x0r - wk1r * x0i; 2538 | y6i = wk1i * x0i + wk1r * x0r; 2539 | x0r = a[6] + a[15]; 2540 | x0i = a[7] - a[14]; 2541 | y7r = wk1r * x0r - wk1i * x0i; 2542 | y7i = wk1r * x0i + wk1i * x0r; 2543 | x0r = y0r + y2r; 2544 | x0i = y0i + y2i; 2545 | x1r = y4r + y6r; 2546 | x1i = y4i + y6i; 2547 | a[0] = x0r + x1r; 2548 | a[1] = x0i + x1i; 2549 | a[2] = x0r - x1r; 2550 | a[3] = x0i - x1i; 2551 | x0r = y0r - y2r; 2552 | x0i = y0i - y2i; 2553 | x1r = y4r - y6r; 2554 | x1i = y4i - y6i; 2555 | a[4] = x0r - x1i; 2556 | a[5] = x0i + x1r; 2557 | a[6] = x0r + x1i; 2558 | a[7] = x0i - x1r; 2559 | x0r = y1r - y3i; 2560 | x0i = y1i + y3r; 2561 | x1r = y5r - y7r; 2562 | x1i = y5i - y7i; 2563 | a[8] = x0r + x1r; 2564 | a[9] = x0i + x1i; 2565 | a[10] = x0r - x1r; 2566 | a[11] = x0i - x1i; 2567 | x0r = y1r + y3i; 2568 | x0i = y1i - y3r; 2569 | x1r = y5r + y7r; 2570 | x1i = y5i + y7i; 2571 | a[12] = x0r - x1i; 2572 | a[13] = x0i + x1r; 2573 | a[14] = x0r + x1i; 2574 | a[15] = x0i - x1r; 2575 | } 2576 | 2577 | 2578 | void 2579 | cftf040 (double *a) 2580 | { 2581 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 2582 | 2583 | x0r = a[0] + a[4]; 2584 | x0i = a[1] + a[5]; 2585 | x1r = a[0] - a[4]; 2586 | x1i = a[1] - a[5]; 2587 | x2r = a[2] + a[6]; 2588 | x2i = a[3] + a[7]; 2589 | x3r = a[2] - a[6]; 2590 | x3i = a[3] - a[7]; 2591 | a[0] = x0r + x2r; 2592 | a[1] = x0i + x2i; 2593 | a[4] = x0r - x2r; 2594 | a[5] = x0i - x2i; 2595 | a[2] = x1r - x3i; 2596 | a[3] = x1i + x3r; 2597 | a[6] = x1r + x3i; 2598 | a[7] = x1i - x3r; 2599 | } 2600 | 2601 | 2602 | void 2603 | cftb040 (double *a) 2604 | { 2605 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 2606 | 2607 | x0r = a[0] + a[4]; 2608 | x0i = a[1] + a[5]; 2609 | x1r = a[0] - a[4]; 2610 | x1i = a[1] - a[5]; 2611 | x2r = a[2] + a[6]; 2612 | x2i = a[3] + a[7]; 2613 | x3r = a[2] - a[6]; 2614 | x3i = a[3] - a[7]; 2615 | a[0] = x0r + x2r; 2616 | a[1] = x0i + x2i; 2617 | a[4] = x0r - x2r; 2618 | a[5] = x0i - x2i; 2619 | a[2] = x1r + x3i; 2620 | a[3] = x1i - x3r; 2621 | a[6] = x1r - x3i; 2622 | a[7] = x1i + x3r; 2623 | } 2624 | 2625 | 2626 | void 2627 | cftx020 (double *a) 2628 | { 2629 | double x0r, x0i; 2630 | 2631 | x0r = a[0] - a[2]; 2632 | x0i = a[1] - a[3]; 2633 | a[0] += a[2]; 2634 | a[1] += a[3]; 2635 | a[2] = x0r; 2636 | a[3] = x0i; 2637 | } 2638 | 2639 | 2640 | void 2641 | rftfsub (int n, double *a) 2642 | { 2643 | int i, i0, j, k; 2644 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; 2645 | 2646 | ec = 2 * M_PI_2 / n; 2647 | wkr = 0; 2648 | wki = 0; 2649 | wdi = cos (ec); 2650 | wdr = sin (ec); 2651 | wdi *= wdr; 2652 | wdr *= wdr; 2653 | w1r = 1 - 2 * wdr; 2654 | w1i = 2 * wdi; 2655 | ss = 2 * w1i; 2656 | i = n >> 1; 2657 | for (;;) 2658 | { 2659 | i0 = i - 4 * RDFT_LOOP_DIV; 2660 | if (i0 < 4) 2661 | { 2662 | i0 = 4; 2663 | } 2664 | for (j = i - 4; j >= i0; j -= 4) 2665 | { 2666 | k = n - j; 2667 | xr = a[j + 2] - a[k - 2]; 2668 | xi = a[j + 3] + a[k - 1]; 2669 | yr = wdr * xr - wdi * xi; 2670 | yi = wdr * xi + wdi * xr; 2671 | a[j + 2] -= yr; 2672 | a[j + 3] -= yi; 2673 | a[k - 2] += yr; 2674 | a[k - 1] -= yi; 2675 | wkr += ss * wdi; 2676 | wki += ss * (0.5 - wdr); 2677 | xr = a[j] - a[k]; 2678 | xi = a[j + 1] + a[k + 1]; 2679 | yr = wkr * xr - wki * xi; 2680 | yi = wkr * xi + wki * xr; 2681 | a[j] -= yr; 2682 | a[j + 1] -= yi; 2683 | a[k] += yr; 2684 | a[k + 1] -= yi; 2685 | wdr += ss * wki; 2686 | wdi += ss * (0.5 - wkr); 2687 | } 2688 | if (i0 == 4) 2689 | { 2690 | break; 2691 | } 2692 | wkr = 0.5 * sin (ec * i0); 2693 | wki = 0.5 * cos (ec * i0); 2694 | wdr = 0.5 - (wkr * w1r - wki * w1i); 2695 | wdi = wkr * w1i + wki * w1r; 2696 | wkr = 0.5 - wkr; 2697 | i = i0; 2698 | } 2699 | xr = a[2] - a[n - 2]; 2700 | xi = a[3] + a[n - 1]; 2701 | yr = wdr * xr - wdi * xi; 2702 | yi = wdr * xi + wdi * xr; 2703 | a[2] -= yr; 2704 | a[3] -= yi; 2705 | a[n - 2] += yr; 2706 | a[n - 1] -= yi; 2707 | } 2708 | 2709 | 2710 | void 2711 | rftbsub (int n, double *a) 2712 | { 2713 | int i, i0, j, k; 2714 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; 2715 | 2716 | ec = 2 * M_PI_2 / n; 2717 | wkr = 0; 2718 | wki = 0; 2719 | wdi = cos (ec); 2720 | wdr = sin (ec); 2721 | wdi *= wdr; 2722 | wdr *= wdr; 2723 | w1r = 1 - 2 * wdr; 2724 | w1i = 2 * wdi; 2725 | ss = 2 * w1i; 2726 | i = n >> 1; 2727 | for (;;) 2728 | { 2729 | i0 = i - 4 * RDFT_LOOP_DIV; 2730 | if (i0 < 4) 2731 | { 2732 | i0 = 4; 2733 | } 2734 | for (j = i - 4; j >= i0; j -= 4) 2735 | { 2736 | k = n - j; 2737 | xr = a[j + 2] - a[k - 2]; 2738 | xi = a[j + 3] + a[k - 1]; 2739 | yr = wdr * xr + wdi * xi; 2740 | yi = wdr * xi - wdi * xr; 2741 | a[j + 2] -= yr; 2742 | a[j + 3] -= yi; 2743 | a[k - 2] += yr; 2744 | a[k - 1] -= yi; 2745 | wkr += ss * wdi; 2746 | wki += ss * (0.5 - wdr); 2747 | xr = a[j] - a[k]; 2748 | xi = a[j + 1] + a[k + 1]; 2749 | yr = wkr * xr + wki * xi; 2750 | yi = wkr * xi - wki * xr; 2751 | a[j] -= yr; 2752 | a[j + 1] -= yi; 2753 | a[k] += yr; 2754 | a[k + 1] -= yi; 2755 | wdr += ss * wki; 2756 | wdi += ss * (0.5 - wkr); 2757 | } 2758 | if (i0 == 4) 2759 | { 2760 | break; 2761 | } 2762 | wkr = 0.5 * sin (ec * i0); 2763 | wki = 0.5 * cos (ec * i0); 2764 | wdr = 0.5 - (wkr * w1r - wki * w1i); 2765 | wdi = wkr * w1i + wki * w1r; 2766 | wkr = 0.5 - wkr; 2767 | i = i0; 2768 | } 2769 | xr = a[2] - a[n - 2]; 2770 | xi = a[3] + a[n - 1]; 2771 | yr = wdr * xr + wdi * xi; 2772 | yi = wdr * xi - wdi * xr; 2773 | a[2] -= yr; 2774 | a[3] -= yi; 2775 | a[n - 2] += yr; 2776 | a[n - 1] -= yi; 2777 | } 2778 | 2779 | 2780 | void 2781 | dctsub (int n, double *a) 2782 | { 2783 | int i, i0, j, k, m; 2784 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; 2785 | 2786 | ec = M_PI_2 / n; 2787 | wkr = 0.5; 2788 | wki = 0.5; 2789 | w1r = cos (ec); 2790 | w1i = sin (ec); 2791 | wdr = 0.5 * (w1r - w1i); 2792 | wdi = 0.5 * (w1r + w1i); 2793 | ss = 2 * w1i; 2794 | m = n >> 1; 2795 | i = 0; 2796 | for (;;) 2797 | { 2798 | i0 = i + 2 * DCST_LOOP_DIV; 2799 | if (i0 > m - 2) 2800 | { 2801 | i0 = m - 2; 2802 | } 2803 | for (j = i + 2; j <= i0; j += 2) 2804 | { 2805 | k = n - j; 2806 | xr = wdi * a[j - 1] - wdr * a[k + 1]; 2807 | xi = wdr * a[j - 1] + wdi * a[k + 1]; 2808 | wkr -= ss * wdi; 2809 | wki += ss * wdr; 2810 | yr = wki * a[j] - wkr * a[k]; 2811 | yi = wkr * a[j] + wki * a[k]; 2812 | wdr -= ss * wki; 2813 | wdi += ss * wkr; 2814 | a[k + 1] = xr; 2815 | a[k] = yr; 2816 | a[j - 1] = xi; 2817 | a[j] = yi; 2818 | } 2819 | if (i0 == m - 2) 2820 | { 2821 | break; 2822 | } 2823 | wdr = cos (ec * i0); 2824 | wdi = sin (ec * i0); 2825 | wkr = 0.5 * (wdr - wdi); 2826 | wki = 0.5 * (wdr + wdi); 2827 | wdr = wkr * w1r - wki * w1i; 2828 | wdi = wkr * w1i + wki * w1r; 2829 | i = i0; 2830 | } 2831 | xr = wdi * a[m - 1] - wdr * a[m + 1]; 2832 | a[m - 1] = wdr * a[m - 1] + wdi * a[m + 1]; 2833 | a[m + 1] = xr; 2834 | a[m] *= wki + ss * wdr; 2835 | } 2836 | 2837 | 2838 | void 2839 | dstsub (int n, double *a) 2840 | { 2841 | int i, i0, j, k, m; 2842 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; 2843 | 2844 | ec = M_PI_2 / n; 2845 | wkr = 0.5; 2846 | wki = 0.5; 2847 | w1r = cos (ec); 2848 | w1i = sin (ec); 2849 | wdr = 0.5 * (w1r - w1i); 2850 | wdi = 0.5 * (w1r + w1i); 2851 | ss = 2 * w1i; 2852 | m = n >> 1; 2853 | i = 0; 2854 | for (;;) 2855 | { 2856 | i0 = i + 2 * DCST_LOOP_DIV; 2857 | if (i0 > m - 2) 2858 | { 2859 | i0 = m - 2; 2860 | } 2861 | for (j = i + 2; j <= i0; j += 2) 2862 | { 2863 | k = n - j; 2864 | xr = wdi * a[k + 1] - wdr * a[j - 1]; 2865 | xi = wdr * a[k + 1] + wdi * a[j - 1]; 2866 | wkr -= ss * wdi; 2867 | wki += ss * wdr; 2868 | yr = wki * a[k] - wkr * a[j]; 2869 | yi = wkr * a[k] + wki * a[j]; 2870 | wdr -= ss * wki; 2871 | wdi += ss * wkr; 2872 | a[j - 1] = xr; 2873 | a[j] = yr; 2874 | a[k + 1] = xi; 2875 | a[k] = yi; 2876 | } 2877 | if (i0 == m - 2) 2878 | { 2879 | break; 2880 | } 2881 | wdr = cos (ec * i0); 2882 | wdi = sin (ec * i0); 2883 | wkr = 0.5 * (wdr - wdi); 2884 | wki = 0.5 * (wdr + wdi); 2885 | wdr = wkr * w1r - wki * w1i; 2886 | wdi = wkr * w1i + wki * w1r; 2887 | i = i0; 2888 | } 2889 | xr = wdi * a[m + 1] - wdr * a[m - 1]; 2890 | a[m + 1] = wdr * a[m + 1] + wdi * a[m - 1]; 2891 | a[m - 1] = xr; 2892 | a[m] *= wki + ss * wdr; 2893 | } 2894 | 2895 | 2896 | void 2897 | dctsub4 (int n, double *a) 2898 | { 2899 | int m; 2900 | double wki, wdr, wdi, xr; 2901 | 2902 | wki = WR5000; 2903 | m = n >> 1; 2904 | if (m == 2) 2905 | { 2906 | wdr = wki * WI2500; 2907 | wdi = wki * WR2500; 2908 | xr = wdi * a[1] - wdr * a[3]; 2909 | a[1] = wdr * a[1] + wdi * a[3]; 2910 | a[3] = xr; 2911 | } 2912 | a[m] *= wki; 2913 | } 2914 | 2915 | 2916 | void 2917 | dstsub4 (int n, double *a) 2918 | { 2919 | int m; 2920 | double wki, wdr, wdi, xr; 2921 | 2922 | wki = WR5000; 2923 | m = n >> 1; 2924 | if (m == 2) 2925 | { 2926 | wdr = wki * WI2500; 2927 | wdi = wki * WR2500; 2928 | xr = wdi * a[3] - wdr * a[1]; 2929 | a[3] = wdr * a[3] + wdi * a[1]; 2930 | a[1] = xr; 2931 | } 2932 | a[m] *= wki; 2933 | } 2934 | -------------------------------------------------------------------------------- /pi_fftcs.c: -------------------------------------------------------------------------------- 1 | /* 2 | ---- calculation of PI(= 3.14159...) using FFT and AGM ---- 3 | by T.Ooura, Nov. 1999. 4 | 5 | Example compilation: 6 | GNU : gcc -O -funroll-loops -fomit-frame-pointer pi_fftcs.c fftsg_h.c -lm -o pi_css5 7 | SUN : cc -fast pi_fftcs.c fftsg_h.c -lm -o pi_css5 8 | HP : aCC -fast pi_fftcs.c fftsg_h.c -lm -o pi_css5 9 | Microsoft: cl -O2 pi_fftcs.c fftsg_h.c -o pi_css5 10 | ... 11 | etc. 12 | */ 13 | 14 | #define PI_FFTC_VER "ver. LG1.1.2-MP1.5.2a.memsave" 15 | 16 | /* Please check the following macros before compiling */ 17 | #ifndef DBL_ERROR_MARGIN 18 | #define DBL_ERROR_MARGIN 0.4 /* must be < 0.5 */ 19 | #endif 20 | 21 | #define DGTINT short int /* sizeof(DGTINT) == 2 */ 22 | #define DGTINT_MAX SHRT_MAX 23 | 24 | #define DGT_PACK 10 25 | #define DGT_PACK_LINE 5 26 | #define DGT_LINE_BLOCK 20 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | void mp_load_0 (int n, int radix, int out[]); 37 | void mp_load_1 (int n, int radix, int out[]); 38 | void mp_round (int n, int radix, int m, int inout[]); 39 | int mp_cmp (int n, int radix, int in1[], int in2[]); 40 | void mp_add (int n, int radix, int in1[], int in2[], int out[]); 41 | void mp_sub (int n, int radix, int in1[], int in2[], int out[]); 42 | void mp_imul (int n, int radix, int in1[], int in2, int out[]); 43 | int mp_idiv (int n, int radix, int in1[], int in2, int out[]); 44 | void mp_idiv_2 (int n, int radix, int in[], int out[]); 45 | double mp_mul_radix_test (int n, int radix, int nfft, double tmpfft[]); 46 | void mp_mul (int n, int radix, int in1[], int in2[], int out[], 47 | int tmp[], int nfft, double tmp1fft[], double tmp2fft[], 48 | double tmp3fft[]); 49 | void mp_squ (int n, int radix, int in[], int out[], int tmp[], 50 | int nfft, double tmp1fft[], double tmp2fft[]); 51 | void mp_mulhf (int n, int radix, int in1[], int in2[], int out[], 52 | int tmp[], int nfft, double in1fft[], double tmpfft[]); 53 | void mp_mulhf_use_in1fft (int n, int radix, double in1fft[], int in2[], 54 | int out[], int tmp[], int nfft, double tmpfft[]); 55 | void mp_squhf_use_infft (int n, int radix, double infft[], int in[], 56 | int out[], int tmp[], int nfft, double tmpfft[]); 57 | void mp_mulh (int n, int radix, int in1[], int in2[], int out[], 58 | int nfft, double in1fft[], double outfft[]); 59 | void mp_squh (int n, int radix, int in[], int out[], 60 | int nfft, double outfft[]); 61 | int mp_inv (int n, int radix, int in[], int out[], 62 | int tmp1[], int tmp2[], int nfft, 63 | double tmp1fft[], double tmp2fft[]); 64 | int mp_sqrt (int n, int radix, int in[], int out[], 65 | int tmp1[], int tmp2[], int nfft, 66 | double tmp1fft[], double tmp2fft[]); 67 | int mp_invisqrt (int n, int radix, int in, int out[], 68 | int tmp1[], int tmp2[], int nfft, 69 | double tmp1fft[], double tmp2fft[]); 70 | void mp_sprintf (int n, int log10_radix, int in[], char out[]); 71 | void mp_sscanf (int n, int log10_radix, char in[], int out[]); 72 | 73 | 74 | int 75 | main (int argc, char *argv[]) 76 | { 77 | int nfft, log2_nfft, radix, log10_radix, n, j = 0, k = 0, l = 0, npow, nprc; 78 | double err; 79 | int *a, *b, *c, *e, *i1, *i2; 80 | double *d1, *d2, *d3; 81 | char *dgt, filename[100]; 82 | clock_t start_time; 83 | double elap_time, loop_time; 84 | FILE *f_out; 85 | #ifndef QUIET_OUT 86 | fprintf (stdout, "Calculation of PI using FFT and AGM, %s\n", PI_FFTC_VER); 87 | #endif 88 | 89 | /* if not run with the proper parameters from the command line */ 90 | if (argc != 2) 91 | { 92 | printf ("\nUsage: %s digits\n", argv[0]); 93 | printf ("\nNumber of digits of pi to calculate?\n"); 94 | scanf ("%d", &nfft); 95 | } 96 | else 97 | nfft = atoi (argv[1]); 98 | 99 | #ifndef QUIET_OUT 100 | fprintf (stdout, "initializing...\n"); 101 | #endif 102 | nfft /= 4; 103 | start_time = clock (); 104 | for (log2_nfft = 1; (1 << log2_nfft) < nfft; log2_nfft++); 105 | nfft = 1 << log2_nfft; 106 | n = nfft + 2; 107 | a = (int *) malloc (2 * sizeof (int) + n * sizeof (DGTINT)); 108 | b = (int *) malloc (2 * sizeof (int) + n * sizeof (DGTINT)); 109 | c = (int *) malloc (2 * sizeof (int) + n * sizeof (DGTINT)); 110 | e = (int *) malloc (2 * sizeof (int) + n * sizeof (DGTINT)); 111 | i1 = (int *) malloc (2 * sizeof (int) + n * sizeof (DGTINT)); 112 | i2 = (int *) malloc (2 * sizeof (int) + n * sizeof (DGTINT)); 113 | d1 = (double *) malloc ((nfft + 2) * sizeof (double)); 114 | d2 = (double *) malloc ((nfft + 2) * sizeof (double)); 115 | d3 = (double *) malloc ((nfft + 2) * sizeof (double)); 116 | if (d3 == NULL) 117 | { 118 | printf ("Allocation Failure!\n"); 119 | exit (1); 120 | } 121 | /* ---- radix test ---- */ 122 | log10_radix = 1; 123 | radix = 10; 124 | err = mp_mul_radix_test (n, radix, nfft, d1); 125 | err += DBL_EPSILON * (n * radix * radix / 4); 126 | while (100 * err < DBL_ERROR_MARGIN && radix <= DGTINT_MAX / 20) 127 | { 128 | err *= 100; 129 | log10_radix++; 130 | radix *= 10; 131 | } 132 | #ifndef QUIET_OUT 133 | fprintf (stdout, "nfft= %d\nradix= %d\nerror_margin= %g\n", nfft, radix, 134 | err); 135 | fprintf (stdout, "calculating %d digits of PI...\n", log10_radix * (n - 2)); 136 | #endif 137 | /* 138 | * ---- a formula based on the AGM (Arithmetic-Geometric Mean) ---- 139 | * c = sqrt(0.125); 140 | * a = 1 + 3 * c; 141 | * b = sqrt(a); 142 | * e = b - 0.625; 143 | * b = 2 * b; 144 | * c = e - c; 145 | * a = a + e; 146 | * npow = 4; 147 | * do { 148 | * npow = 2 * npow; 149 | * e = (a + b) / 2; 150 | * b = sqrt(a * b); 151 | * e = e - b; 152 | * b = 2 * b; 153 | * c = c - e; 154 | * a = e + b; 155 | * } while (e > SQRT_SQRT_EPSILON); 156 | * e = e * e / 4; 157 | * a = a + b; 158 | * pi = (a * a - e - e / 2) / (a * c - e) / npow; 159 | * ---- modification ---- 160 | * This is a modified version of Gauss-Legendre formula 161 | * (by T.Ooura). It is faster than original version. 162 | * ---- reference ---- 163 | * 1. E.Salamin, 164 | * Computation of PI Using Arithmetic-Geometric Mean, 165 | * Mathematics of Computation, Vol.30 1976. 166 | * 2. R.P.Brent, 167 | * Fast Multiple-Precision Evaluation of Elementary Functions, 168 | * J. ACM 23 1976. 169 | * 3. D.Takahasi, Y.Kanada, 170 | * Calculation of PI to 51.5 Billion Decimal Digits on 171 | * Distributed Memoriy Parallel Processors, 172 | * Transactions of Information Processing Society of Japan, 173 | * Vol.39 No.7 1998. 174 | * 4. T.Ooura, 175 | * Improvement of the PI Calculation Algorithm and 176 | * Implementation of Fast Multiple-Precision Computation, 177 | * Information Processing Society of Japan SIG Notes, 178 | * 98-HPC-74, 1998. 179 | */ 180 | /* ---- c = 1 / sqrt(8) ---- */ 181 | mp_invisqrt (n, radix, 8, c, i1, i2, nfft, d1, d2); 182 | /* ---- a = 1 + 3 * c ---- */ 183 | mp_imul (n, radix, c, 3, e); 184 | mp_sscanf (n, log10_radix, "1", a); 185 | mp_add (n, radix, a, e, a); 186 | /* ---- b = sqrt(a) ---- */ 187 | mp_sqrt (n, radix, a, b, i1, i2, nfft, d1, d2); 188 | /* ---- e = b - 0.625 ---- */ 189 | mp_sscanf (n, log10_radix, "0.625", e); 190 | mp_sub (n, radix, b, e, e); 191 | /* ---- b = 2 * b ---- */ 192 | mp_add (n, radix, b, b, b); 193 | /* ---- c = e - c ---- */ 194 | mp_sub (n, radix, e, c, c); 195 | /* ---- a = a + e ---- */ 196 | mp_add (n, radix, a, e, a); 197 | #ifndef QUIET_OUT 198 | fprintf (stdout, "AGM iteration\n"); 199 | #endif 200 | npow = 4; 201 | elap_time = ((double) clock () - (double) start_time) / CLOCKS_PER_SEC; 202 | 203 | do 204 | { 205 | clock_t start_loop_time = clock (); 206 | npow *= 2; 207 | /* ---- e = (a + b) / 2 ---- */ 208 | mp_add (n, radix, a, b, e); 209 | mp_idiv_2 (n, radix, e, e); 210 | /* ---- b = sqrt(a * b) ---- */ 211 | mp_mul (n, radix, a, b, a, i1, nfft, d1, d2, d3); 212 | mp_sqrt (n, radix, a, b, i1, i2, nfft, d1, d2); 213 | /* ---- e = e - b ---- */ 214 | mp_sub (n, radix, e, b, e); 215 | /* ---- b = 2 * b ---- */ 216 | mp_add (n, radix, b, b, b); 217 | /* ---- c = c - e ---- */ 218 | mp_sub (n, radix, c, e, c); 219 | /* ---- a = e + b ---- */ 220 | mp_add (n, radix, e, b, a); 221 | /* ---- convergence check ---- */ 222 | nprc = -e[1]; 223 | if (e[0] == 0) 224 | { 225 | nprc = n; 226 | } 227 | loop_time = 228 | ((double) clock () - (double) start_loop_time) / CLOCKS_PER_SEC; 229 | elap_time += loop_time; 230 | #ifndef QUIET_OUT 231 | fprintf (stdout, "precision= %d: %0.2f sec\n", 4 * nprc * log10_radix, 232 | loop_time); 233 | #endif 234 | } 235 | while (4 * nprc <= n); 236 | start_time = clock (); 237 | /* ---- e = e * e / 4 (half precision) ---- */ 238 | mp_idiv_2 (n, radix, e, e); 239 | mp_squh (n, radix, e, e, nfft, d1); 240 | /* ---- a = a + b ---- */ 241 | mp_add (n, radix, a, b, a); 242 | /* ---- a = (a * a - e - e / 2) / (a * c - e) / npow ---- */ 243 | mp_mulhf (n, radix, a, c, c, i1, nfft, d1, d2); 244 | mp_sub (n, radix, c, e, c); 245 | mp_inv (n, radix, c, b, i1, i2, nfft, d2, d3); 246 | mp_squhf_use_infft (n, radix, d1, a, a, i1, nfft, d2); 247 | mp_sub (n, radix, a, e, a); 248 | mp_idiv_2 (n, radix, e, e); 249 | mp_sub (n, radix, a, e, a); 250 | mp_mul (n, radix, a, b, a, i1, nfft, d1, d2, d3); 251 | mp_idiv (n, radix, a, npow, a); 252 | /* ---- output ---- */ 253 | dgt = (char *) d1; 254 | mp_sprintf (n - 1, log10_radix, a, dgt); 255 | elap_time += ((double) clock () - (double) start_time) / CLOCKS_PER_SEC; 256 | 257 | sprintf (filename, "pi%i.txt", log10_radix * (n - 2)); 258 | 259 | f_out = fopen (filename, "w"); 260 | #ifndef QUIET_OUT 261 | fprintf (stdout, "writing %s...\n", filename); 262 | #endif 263 | do 264 | { 265 | if (!isdigit (*dgt)) 266 | { 267 | if (isalpha (*dgt) != 0) 268 | { 269 | fputc ('\n', f_out); 270 | fputc ('\n', f_out); 271 | } 272 | fputc (*dgt, f_out); 273 | fputc ('\n', f_out); 274 | fputc ('\n', f_out); 275 | j = 0; 276 | k = 0; 277 | l = 0; 278 | continue; 279 | } 280 | fputc (*dgt, f_out); 281 | if (++j >= DGT_PACK) 282 | { 283 | j = 0; 284 | if (++k >= DGT_PACK_LINE) 285 | { 286 | k = 0; 287 | fputc ('\n', f_out); 288 | if (++l >= DGT_LINE_BLOCK) 289 | { 290 | l = 0; 291 | fputc ('\n', f_out); 292 | } 293 | } 294 | else 295 | { 296 | fputc (' ', f_out); 297 | } 298 | } 299 | } 300 | while (*dgt++ && *dgt!='e'); 301 | fputc ('\n', f_out); 302 | fprintf (f_out, "%s\n", dgt); 303 | fclose (f_out); 304 | free (d3); 305 | free (d2); 306 | free (d1); 307 | free (i2); 308 | free (i1); 309 | free (e); 310 | free (c); 311 | free (b); 312 | free (a); 313 | /* ---- difftime ---- */ 314 | fprintf (stdout, "%0.2f sec. (real time)\n", elap_time); 315 | 316 | /* don't quite before allowing the user to see all output */ 317 | if(argc!=2) 318 | { 319 | fgets(filename,99,stdin); 320 | fprintf(stdout,"Hit RETURN to exit.\n"); 321 | fgets(filename,99,stdin); 322 | } 323 | 324 | return (0); 325 | } 326 | 327 | 328 | /* -------- multiple precision routines -------- */ 329 | 330 | 331 | #include 332 | #include 333 | #include 334 | 335 | /* ---- floating point format ---- 336 | data := data[0] * pow(radix, data[1]) * 337 | (data[2] + data[3]/radix + data[4]/radix/radix + ...), 338 | data[0] : sign (1;data>0, -1;data<0, 0;data==0) 339 | data[1] : exponent (0;data==0) 340 | data[2...n+1] : digits 341 | ---- function prototypes ---- 342 | void mp_load_0(int n, int radix, int out[]); 343 | void mp_load_1(int n, int radix, int out[]); 344 | void mp_round(int n, int radix, int m, int inout[]); 345 | int mp_cmp(int n, int radix, int in1[], int in2[]); 346 | void mp_add(int n, int radix, int in1[], int in2[], int out[]); 347 | void mp_sub(int n, int radix, int in1[], int in2[], int out[]); 348 | void mp_imul(int n, int radix, int in1[], int in2, int out[]); 349 | int mp_idiv(int n, int radix, int in1[], int in2, int out[]); 350 | void mp_idiv_2(int n, int radix, int in[], int out[]); 351 | double mp_mul_radix_test(int n, int radix, int nfft, 352 | double tmpfft[]); 353 | void mp_mul(int n, int radix, int in1[], int in2[], int out[], 354 | int tmp[], int nfft, double tmp1fft[], double tmp2fft[], 355 | double tmp3fft[]); 356 | void mp_squ(int n, int radix, int in[], int out[], int tmp[], 357 | int nfft, double tmp1fft[], double tmp2fft[]); 358 | void mp_mulhf(int n, int radix, int in1[], int in2[], int out[], 359 | int tmp[], int nfft, double in1fft[], double tmpfft[]); 360 | void mp_mulhf_use_in1fft(int n, int radix, double in1fft[], int in2[], 361 | int out[], int tmp[], int nfft, double tmpfft[]); 362 | void mp_squhf_use_infft(int n, int radix, double infft[], int in[], 363 | int out[], int tmp[], int nfft, double tmpfft[]); 364 | void mp_mulh(int n, int radix, int in1[], int in2[], int out[], 365 | int nfft, double in1fft[], double outfft[]); 366 | void mp_squh(int n, int radix, int in[], int out[], 367 | int nfft, double outfft[]); 368 | int mp_inv(int n, int radix, int in[], int out[], 369 | int tmp1[], int tmp2[], int nfft, 370 | double tmp1fft[], double tmp2fft[]); 371 | int mp_sqrt(int n, int radix, int in[], int out[], 372 | int tmp1[], int tmp2[], int nfft, 373 | double tmp1fft[], double tmp2fft[]); 374 | int mp_invisqrt(int n, int radix, int in, int out[], 375 | int tmp1[], int tmp2[], int nfft, 376 | double tmp1fft[], double tmp2fft[]); 377 | void mp_sprintf(int n, int log10_radix, int in[], char out[]); 378 | void mp_sscanf(int n, int log10_radix, char in[], int out[]); 379 | ---- 380 | */ 381 | 382 | 383 | /* -------- mp_load routines -------- */ 384 | 385 | 386 | void 387 | mp_load_0 (int n, int radix, int out[]) 388 | { 389 | int j; 390 | DGTINT *outr; 391 | 392 | outr = ((DGTINT *) & out[2]) - 2; 393 | out[0] = 0; 394 | out[1] = 0; 395 | for (j = 2; j <= n + 1; j++) 396 | { 397 | outr[j] = 0; 398 | } 399 | } 400 | 401 | 402 | void 403 | mp_load_1 (int n, int radix, int out[]) 404 | { 405 | int j; 406 | DGTINT *outr; 407 | 408 | outr = ((DGTINT *) & out[2]) - 2; 409 | out[0] = 1; 410 | out[1] = 0; 411 | outr[2] = 1; 412 | for (j = 3; j <= n + 1; j++) 413 | { 414 | outr[j] = 0; 415 | } 416 | } 417 | 418 | 419 | void 420 | mp_round (int n, int radix, int m, int inout[]) 421 | { 422 | int j, x; 423 | DGTINT *inoutr; 424 | 425 | inoutr = ((DGTINT *) & inout[2]) - 2; 426 | if (m < n) 427 | { 428 | for (j = n + 1; j > m + 2; j--) 429 | { 430 | inoutr[j] = 0; 431 | } 432 | x = 2 * inoutr[m + 2]; 433 | inoutr[m + 2] = 0; 434 | if (x >= radix) 435 | { 436 | for (j = m + 1; j >= 2; j--) 437 | { 438 | x = inoutr[j] + 1; 439 | if (x < radix) 440 | { 441 | inoutr[j] = (DGTINT) x; 442 | break; 443 | } 444 | inoutr[j] = 0; 445 | } 446 | if (x >= radix) 447 | { 448 | inoutr[2] = 1; 449 | inout[1]++; 450 | } 451 | } 452 | } 453 | } 454 | 455 | 456 | /* -------- mp_add routines -------- */ 457 | 458 | 459 | int 460 | mp_cmp (int n, int radix, int in1[], int in2[]) 461 | { 462 | int mp_unsgn_cmp (int n, int in1[], int in2[]); 463 | 464 | if (in1[0] > in2[0]) 465 | { 466 | return 1; 467 | } 468 | else if (in1[0] < in2[0]) 469 | { 470 | return -1; 471 | } 472 | return in1[0] * mp_unsgn_cmp (n, &in1[1], &in2[1]); 473 | } 474 | 475 | 476 | void 477 | mp_add (int n, int radix, int in1[], int in2[], int out[]) 478 | { 479 | int mp_unsgn_cmp (int n, int in1[], int in2[]); 480 | int mp_unexp_add (int n, int radix, int expdif, 481 | DGTINT in1[], DGTINT in2[], DGTINT out[]); 482 | int mp_unexp_sub (int n, int radix, int expdif, 483 | DGTINT in1[], DGTINT in2[], DGTINT out[]); 484 | int outsgn, outexp, expdif; 485 | 486 | expdif = in1[1] - in2[1]; 487 | outexp = in1[1]; 488 | if (expdif < 0) 489 | { 490 | outexp = in2[1]; 491 | } 492 | outsgn = in1[0] * in2[0]; 493 | if (outsgn >= 0) 494 | { 495 | if (outsgn > 0) 496 | { 497 | outsgn = in1[0]; 498 | } 499 | else 500 | { 501 | outsgn = in1[0] + in2[0]; 502 | outexp = in1[1] + in2[1]; 503 | expdif = 0; 504 | } 505 | if (expdif >= 0) 506 | { 507 | outexp += mp_unexp_add (n, radix, expdif, 508 | (DGTINT *) & in1[2], (DGTINT *) & in2[2], 509 | (DGTINT *) & out[2]); 510 | } 511 | else 512 | { 513 | outexp += mp_unexp_add (n, radix, -expdif, 514 | (DGTINT *) & in2[2], (DGTINT *) & in1[2], 515 | (DGTINT *) & out[2]); 516 | } 517 | } 518 | else 519 | { 520 | outsgn = mp_unsgn_cmp (n, &in1[1], &in2[1]); 521 | if (outsgn >= 0) 522 | { 523 | expdif = mp_unexp_sub (n, radix, expdif, 524 | (DGTINT *) & in1[2], (DGTINT *) & in2[2], 525 | (DGTINT *) & out[2]); 526 | } 527 | else 528 | { 529 | expdif = mp_unexp_sub (n, radix, -expdif, 530 | (DGTINT *) & in2[2], (DGTINT *) & in1[2], 531 | (DGTINT *) & out[2]); 532 | } 533 | outexp -= expdif; 534 | outsgn *= in1[0]; 535 | if (expdif == n) 536 | { 537 | outsgn = 0; 538 | } 539 | } 540 | if (outsgn == 0) 541 | { 542 | outexp = 0; 543 | } 544 | out[0] = outsgn; 545 | out[1] = outexp; 546 | } 547 | 548 | 549 | void 550 | mp_sub (int n, int radix, int in1[], int in2[], int out[]) 551 | { 552 | int mp_unsgn_cmp (int n, int in1[], int in2[]); 553 | int mp_unexp_add (int n, int radix, int expdif, 554 | DGTINT in1[], DGTINT in2[], DGTINT out[]); 555 | int mp_unexp_sub (int n, int radix, int expdif, 556 | DGTINT in1[], DGTINT in2[], DGTINT out[]); 557 | int outsgn, outexp, expdif; 558 | 559 | expdif = in1[1] - in2[1]; 560 | outexp = in1[1]; 561 | if (expdif < 0) 562 | { 563 | outexp = in2[1]; 564 | } 565 | outsgn = in1[0] * in2[0]; 566 | if (outsgn <= 0) 567 | { 568 | if (outsgn < 0) 569 | { 570 | outsgn = in1[0]; 571 | } 572 | else 573 | { 574 | outsgn = in1[0] - in2[0]; 575 | outexp = in1[1] + in2[1]; 576 | expdif = 0; 577 | } 578 | if (expdif >= 0) 579 | { 580 | outexp += mp_unexp_add (n, radix, expdif, 581 | (DGTINT *) & in1[2], (DGTINT *) & in2[2], 582 | (DGTINT *) & out[2]); 583 | } 584 | else 585 | { 586 | outexp += mp_unexp_add (n, radix, -expdif, 587 | (DGTINT *) & in2[2], (DGTINT *) & in1[2], 588 | (DGTINT *) & out[2]); 589 | } 590 | } 591 | else 592 | { 593 | outsgn = mp_unsgn_cmp (n, &in1[1], &in2[1]); 594 | if (outsgn >= 0) 595 | { 596 | expdif = mp_unexp_sub (n, radix, expdif, 597 | (DGTINT *) & in1[2], (DGTINT *) & in2[2], 598 | (DGTINT *) & out[2]); 599 | } 600 | else 601 | { 602 | expdif = mp_unexp_sub (n, radix, -expdif, 603 | (DGTINT *) & in2[2], (DGTINT *) & in1[2], 604 | (DGTINT *) & out[2]); 605 | } 606 | outexp -= expdif; 607 | outsgn *= in1[0]; 608 | if (expdif == n) 609 | { 610 | outsgn = 0; 611 | } 612 | } 613 | if (outsgn == 0) 614 | { 615 | outexp = 0; 616 | } 617 | out[0] = outsgn; 618 | out[1] = outexp; 619 | } 620 | 621 | 622 | /* -------- mp_add child routines -------- */ 623 | 624 | 625 | int 626 | mp_unsgn_cmp (int n, int in1[], int in2[]) 627 | { 628 | int j, cmp; 629 | DGTINT *in1r, *in2r; 630 | 631 | in1r = ((DGTINT *) & in1[1]) - 1; 632 | in2r = ((DGTINT *) & in2[1]) - 1; 633 | cmp = in1[0] - in2[0]; 634 | for (j = 1; j <= n && cmp == 0; j++) 635 | { 636 | cmp = in1r[j] - in2r[j]; 637 | } 638 | if (cmp > 0) 639 | { 640 | cmp = 1; 641 | } 642 | else if (cmp < 0) 643 | { 644 | cmp = -1; 645 | } 646 | return cmp; 647 | } 648 | 649 | 650 | int 651 | mp_unexp_add (int n, int radix, int expdif, 652 | DGTINT in1[], DGTINT in2[], DGTINT out[]) 653 | { 654 | int j, x, carry; 655 | 656 | carry = 0; 657 | if (expdif == 0 && in1[0] + in2[0] >= radix) 658 | { 659 | x = in1[n - 1] + in2[n - 1]; 660 | carry = x >= radix ? -1 : 0; 661 | for (j = n - 1; j > 0; j--) 662 | { 663 | x = in1[j - 1] + in2[j - 1] - carry; 664 | carry = x >= radix ? -1 : 0; 665 | out[j] = (DGTINT) (x - (radix & carry)); 666 | } 667 | out[0] = (DGTINT) - carry; 668 | } 669 | else 670 | { 671 | if (expdif > n) 672 | { 673 | expdif = n; 674 | } 675 | for (j = n - 1; j >= expdif; j--) 676 | { 677 | x = in1[j] + in2[j - expdif] - carry; 678 | carry = x >= radix ? -1 : 0; 679 | out[j] = (DGTINT) (x - (radix & carry)); 680 | } 681 | for (j = expdif - 1; j >= 0; j--) 682 | { 683 | x = in1[j] - carry; 684 | carry = x >= radix ? -1 : 0; 685 | out[j] = (DGTINT) (x - (radix & carry)); 686 | } 687 | if (carry != 0) 688 | { 689 | for (j = n - 1; j > 0; j--) 690 | { 691 | out[j] = out[j - 1]; 692 | } 693 | out[0] = (DGTINT) - carry; 694 | } 695 | } 696 | return -carry; 697 | } 698 | 699 | 700 | int 701 | mp_unexp_sub (int n, int radix, int expdif, 702 | DGTINT in1[], DGTINT in2[], DGTINT out[]) 703 | { 704 | int j, x, borrow, ncancel; 705 | 706 | if (expdif > n) 707 | { 708 | expdif = n; 709 | } 710 | borrow = 0; 711 | for (j = n - 1; j >= expdif; j--) 712 | { 713 | x = in1[j] - in2[j - expdif] + borrow; 714 | borrow = x < 0 ? -1 : 0; 715 | out[j] = (DGTINT) (x + (radix & borrow)); 716 | } 717 | for (j = expdif - 1; j >= 0; j--) 718 | { 719 | x = in1[j] + borrow; 720 | borrow = x < 0 ? -1 : 0; 721 | out[j] = (DGTINT) (x + (radix & borrow)); 722 | } 723 | ncancel = 0; 724 | for (j = 0; j < n && out[j] == 0; j++) 725 | { 726 | ncancel = j + 1; 727 | } 728 | if (ncancel > 0 && ncancel < n) 729 | { 730 | for (j = 0; j < n - ncancel; j++) 731 | { 732 | out[j] = out[j + ncancel]; 733 | } 734 | for (j = n - ncancel; j < n; j++) 735 | { 736 | out[j] = 0; 737 | } 738 | } 739 | return ncancel; 740 | } 741 | 742 | 743 | /* -------- mp_imul routines -------- */ 744 | 745 | 746 | void 747 | mp_imul (int n, int radix, int in1[], int in2, int out[]) 748 | { 749 | void mp_unsgn_imul (int n, double dradix, int in1[], double din2, 750 | int out[]); 751 | 752 | if (in2 > 0) 753 | { 754 | out[0] = in1[0]; 755 | } 756 | else if (in2 < 0) 757 | { 758 | out[0] = -in1[0]; 759 | in2 = -in2; 760 | } 761 | else 762 | { 763 | out[0] = 0; 764 | } 765 | mp_unsgn_imul (n, radix, &in1[1], in2, &out[1]); 766 | if (out[0] == 0) 767 | { 768 | out[1] = 0; 769 | } 770 | } 771 | 772 | 773 | int 774 | mp_idiv (int n, int radix, int in1[], int in2, int out[]) 775 | { 776 | void mp_load_0 (int n, int radix, int out[]); 777 | void mp_unsgn_idiv (int n, double dradix, int in1[], double din2, 778 | int out[]); 779 | 780 | if (in2 == 0) 781 | { 782 | return -1; 783 | } 784 | if (in2 > 0) 785 | { 786 | out[0] = in1[0]; 787 | } 788 | else 789 | { 790 | out[0] = -in1[0]; 791 | in2 = -in2; 792 | } 793 | if (in1[0] == 0) 794 | { 795 | mp_load_0 (n, radix, out); 796 | return 0; 797 | } 798 | mp_unsgn_idiv (n, radix, &in1[1], in2, &out[1]); 799 | return 0; 800 | } 801 | 802 | 803 | void 804 | mp_idiv_2 (int n, int radix, int in[], int out[]) 805 | { 806 | int j, ix, carry, shift; 807 | DGTINT *inr, *outr; 808 | 809 | inr = ((DGTINT *) & in[2]) - 2; 810 | outr = ((DGTINT *) & out[2]) - 2; 811 | out[0] = in[0]; 812 | shift = 0; 813 | if (inr[2] == 1) 814 | { 815 | shift = 1; 816 | } 817 | out[1] = in[1] - shift; 818 | carry = -shift; 819 | for (j = 2; j <= n + 1 - shift; j++) 820 | { 821 | ix = inr[j + shift] + (radix & carry); 822 | carry = -(ix & 1); 823 | outr[j] = (DGTINT) (ix >> 1); 824 | } 825 | if (shift > 0) 826 | { 827 | outr[n + 1] = (DGTINT) ((radix & carry) >> 1); 828 | } 829 | } 830 | 831 | 832 | /* -------- mp_imul child routines -------- */ 833 | 834 | 835 | void 836 | mp_unsgn_imul (int n, double dradix, int in1[], double din2, int out[]) 837 | { 838 | int j, carry, shift; 839 | double x, d1_radix; 840 | DGTINT *in1r, *outr; 841 | 842 | in1r = ((DGTINT *) & in1[1]) - 1; 843 | outr = ((DGTINT *) & out[1]) - 1; 844 | d1_radix = 1.0 / dradix; 845 | carry = 0; 846 | for (j = n; j >= 1; j--) 847 | { 848 | x = din2 * in1r[j] + carry + 0.5; 849 | carry = (int) (d1_radix * x); 850 | outr[j] = (DGTINT) (x - dradix * carry); 851 | } 852 | shift = 0; 853 | x = carry + 0.5; 854 | while (x > 1) 855 | { 856 | x *= d1_radix; 857 | shift++; 858 | } 859 | out[0] = in1[0] + shift; 860 | if (shift > 0) 861 | { 862 | while (shift > n) 863 | { 864 | carry = (int) (d1_radix * carry + 0.5); 865 | shift--; 866 | } 867 | for (j = n; j >= shift + 1; j--) 868 | { 869 | outr[j] = outr[j - shift]; 870 | } 871 | for (j = shift; j >= 1; j--) 872 | { 873 | x = carry + 0.5; 874 | carry = (int) (d1_radix * x); 875 | outr[j] = (DGTINT) (x - dradix * carry); 876 | } 877 | } 878 | } 879 | 880 | 881 | void 882 | mp_unsgn_idiv (int n, double dradix, int in1[], double din2, int out[]) 883 | { 884 | int j, ix, carry, shift; 885 | double x, d1_in2; 886 | DGTINT *in1r, *outr; 887 | 888 | in1r = ((DGTINT *) & in1[1]) - 1; 889 | outr = ((DGTINT *) & out[1]) - 1; 890 | d1_in2 = 1.0 / din2; 891 | shift = 0; 892 | x = 0; 893 | do 894 | { 895 | shift++; 896 | x *= dradix; 897 | if (shift <= n) 898 | { 899 | x += in1r[shift]; 900 | } 901 | } 902 | while (x < din2 - 0.5); 903 | x += 0.5; 904 | ix = (int) (d1_in2 * x); 905 | carry = (int) (x - din2 * ix); 906 | outr[1] = (DGTINT) ix; 907 | shift--; 908 | out[0] = in1[0] - shift; 909 | if (shift >= n) 910 | { 911 | shift = n - 1; 912 | } 913 | for (j = 2; j <= n - shift; j++) 914 | { 915 | x = in1r[j + shift] + dradix * carry + 0.5; 916 | ix = (int) (d1_in2 * x); 917 | carry = (int) (x - din2 * ix); 918 | outr[j] = (DGTINT) ix; 919 | } 920 | for (j = n - shift + 1; j <= n; j++) 921 | { 922 | x = dradix * carry + 0.5; 923 | ix = (int) (d1_in2 * x); 924 | carry = (int) (x - din2 * ix); 925 | outr[j] = (DGTINT) ix; 926 | } 927 | } 928 | 929 | 930 | /* -------- mp_mul routines -------- */ 931 | 932 | 933 | double 934 | mp_mul_radix_test (int n, int radix, int nfft, double tmpfft[]) 935 | { 936 | void mp_mul_csqu (int nfft, double d1[]); 937 | double mp_mul_d2i_test (int radix, int nfft, double din[]); 938 | int j, ndata, radix_2; 939 | 940 | ndata = (nfft >> 1) + 1; 941 | if (ndata > n) 942 | { 943 | ndata = n; 944 | } 945 | tmpfft[nfft + 1] = radix - 1; 946 | for (j = nfft; j > ndata; j--) 947 | { 948 | tmpfft[j] = 0; 949 | } 950 | radix_2 = (radix + 1) / 2; 951 | for (j = ndata; j > 2; j--) 952 | { 953 | tmpfft[j] = radix_2; 954 | } 955 | tmpfft[2] = radix; 956 | tmpfft[1] = radix - 1; 957 | tmpfft[0] = 0; 958 | mp_mul_csqu (nfft, tmpfft); 959 | return 2 * mp_mul_d2i_test (radix, nfft, tmpfft); 960 | } 961 | 962 | 963 | void 964 | mp_mul (int n, int radix, int in1[], int in2[], int out[], 965 | int tmp[], int nfft, double tmp1fft[], double tmp2fft[], 966 | double tmp3fft[]) 967 | { 968 | void mp_add (int n, int radix, int in1[], int in2[], int out[]); 969 | void mp_mul_i2d (int n, int radix, int nfft, int shift, 970 | int in[], double dout[]); 971 | void mp_mul_cmul_nt_out (int nfft, double d1[], double d2[]); 972 | void mp_mul_cmul_nt_d2 (int nfft, double d1[], double d2[]); 973 | void mp_mul_cmul_nt_d1_add (int nfft, double d1[], double d2[], 974 | double d3[]); 975 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 976 | int n_h, shift; 977 | DGTINT *in1r, *in2r; 978 | 979 | in1r = ((DGTINT *) & in1[2]) - 2; 980 | in2r = ((DGTINT *) & in2[2]) - 2; 981 | shift = (nfft >> 1) + 1; 982 | while (n > shift) 983 | { 984 | if (in1r[shift + 2] + in2r[shift + 2] != 0) 985 | { 986 | break; 987 | } 988 | shift++; 989 | } 990 | n_h = n / 2 + 1; 991 | if (n_h < n - shift) 992 | { 993 | n_h = n - shift; 994 | } 995 | /* ---- tmp3fft = (upper) in1 * (lower) in2 ---- */ 996 | mp_mul_i2d (n, radix, nfft, 0, in1, tmp1fft); 997 | mp_mul_i2d (n, radix, nfft, shift, in2, tmp3fft); 998 | mp_mul_cmul_nt_out (nfft, tmp1fft, tmp3fft); 999 | /* ---- tmp = (upper) in1 * (upper) in2 ---- */ 1000 | mp_mul_i2d (n, radix, nfft, 0, in2, tmp2fft); 1001 | mp_mul_cmul_nt_d2 (nfft, tmp2fft, tmp1fft); 1002 | mp_mul_d2i (n, radix, nfft, tmp1fft, tmp); 1003 | /* ---- tmp3fft += (upper) in2 * (lower) in1 ---- */ 1004 | mp_mul_i2d (n, radix, nfft, shift, in1, tmp1fft); 1005 | mp_mul_cmul_nt_d1_add (nfft, tmp2fft, tmp1fft, tmp3fft); 1006 | /* ---- out = tmp + tmp3fft ---- */ 1007 | mp_mul_d2i (n_h, radix, nfft, tmp3fft, out); 1008 | mp_add (n, radix, out, tmp, out); 1009 | } 1010 | 1011 | 1012 | void 1013 | mp_squ (int n, int radix, int in[], int out[], int tmp[], 1014 | int nfft, double tmp1fft[], double tmp2fft[]) 1015 | { 1016 | void mp_add (int n, int radix, int in1[], int in2[], int out[]); 1017 | void mp_mul_i2d (int n, int radix, int nfft, int shift, 1018 | int in[], double dout[]); 1019 | void mp_mul_cmul (int nfft, double d1[], double d2[]); 1020 | void mp_mul_csqu_nt_d1 (int nfft, double d1[]); 1021 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 1022 | int n_h, shift; 1023 | DGTINT *inr; 1024 | 1025 | inr = ((DGTINT *) & in[2]) - 2; 1026 | shift = (nfft >> 1) + 1; 1027 | while (n > shift) 1028 | { 1029 | if (inr[shift + 2] != 0) 1030 | { 1031 | break; 1032 | } 1033 | shift++; 1034 | } 1035 | n_h = n / 2 + 1; 1036 | if (n_h < n - shift) 1037 | { 1038 | n_h = n - shift; 1039 | } 1040 | /* ---- tmp = 2 * (upper) in * (lower) in ---- */ 1041 | mp_mul_i2d (n, radix, nfft, 0, in, tmp1fft); 1042 | mp_mul_i2d (n, radix, nfft, shift, in, tmp2fft); 1043 | mp_mul_cmul (nfft, tmp1fft, tmp2fft); 1044 | mp_mul_d2i (n_h, radix, nfft, tmp2fft, tmp); 1045 | mp_add (n_h, radix, tmp, tmp, tmp); 1046 | /* ---- out = tmp + ((upper) in)^2 ---- */ 1047 | mp_mul_csqu_nt_d1 (nfft, tmp1fft); 1048 | mp_mul_d2i (n, radix, nfft, tmp1fft, out); 1049 | mp_add (n, radix, out, tmp, out); 1050 | } 1051 | 1052 | 1053 | void 1054 | mp_mulhf (int n, int radix, int in1[], int in2[], int out[], 1055 | int tmp[], int nfft, double in1fft[], double tmpfft[]) 1056 | { 1057 | void mp_add (int n, int radix, int in1[], int in2[], int out[]); 1058 | void mp_mul_i2d (int n, int radix, int nfft, int shift, 1059 | int in[], double dout[]); 1060 | void mp_mul_cmul (int nfft, double d1[], double d2[]); 1061 | void mp_mul_cmul_nt_d1 (int nfft, double d1[], double d2[]); 1062 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 1063 | int n_h, shift; 1064 | DGTINT *in2r; 1065 | 1066 | in2r = ((DGTINT *) & in2[2]) - 2; 1067 | shift = (nfft >> 1) + 1; 1068 | while (n > shift) 1069 | { 1070 | if (in2r[shift + 2] != 0) 1071 | { 1072 | break; 1073 | } 1074 | shift++; 1075 | } 1076 | n_h = n / 2 + 1; 1077 | if (n_h < n - shift) 1078 | { 1079 | n_h = n - shift; 1080 | } 1081 | /* ---- tmp = (upper) in1 * (upper) in2 ---- */ 1082 | mp_mul_i2d (n, radix, nfft, 0, in1, in1fft); 1083 | mp_mul_i2d (n, radix, nfft, 0, in2, tmpfft); 1084 | mp_mul_cmul (nfft, in1fft, tmpfft); 1085 | mp_mul_d2i (n, radix, nfft, tmpfft, tmp); 1086 | /* ---- out = tmp + (upper) in1 * (lower) in2 ---- */ 1087 | mp_mul_i2d (n, radix, nfft, shift, in2, tmpfft); 1088 | mp_mul_cmul_nt_d1 (nfft, in1fft, tmpfft); 1089 | mp_mul_d2i (n_h, radix, nfft, tmpfft, out); 1090 | mp_add (n, radix, out, tmp, out); 1091 | } 1092 | 1093 | 1094 | void 1095 | mp_mulhf_use_in1fft (int n, int radix, double in1fft[], int in2[], 1096 | int out[], int tmp[], int nfft, double tmpfft[]) 1097 | { 1098 | void mp_add (int n, int radix, int in1[], int in2[], int out[]); 1099 | void mp_mul_i2d (int n, int radix, int nfft, int shift, 1100 | int in[], double dout[]); 1101 | void mp_mul_cmul_nt_d1 (int nfft, double d1[], double d2[]); 1102 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 1103 | int n_h, shift; 1104 | DGTINT *in2r; 1105 | 1106 | in2r = ((DGTINT *) & in2[2]) - 2; 1107 | shift = (nfft >> 1) + 1; 1108 | while (n > shift) 1109 | { 1110 | if (in2r[shift + 2] != 0) 1111 | { 1112 | break; 1113 | } 1114 | shift++; 1115 | } 1116 | n_h = n / 2 + 1; 1117 | if (n_h < n - shift) 1118 | { 1119 | n_h = n - shift; 1120 | } 1121 | /* ---- tmp = (upper) in1fft * (upper) in2 ---- */ 1122 | mp_mul_i2d (n, radix, nfft, 0, in2, tmpfft); 1123 | mp_mul_cmul_nt_d1 (nfft, in1fft, tmpfft); 1124 | mp_mul_d2i (n, radix, nfft, tmpfft, tmp); 1125 | /* ---- out = tmp + (upper) in1 * (lower) in2 ---- */ 1126 | mp_mul_i2d (n, radix, nfft, shift, in2, tmpfft); 1127 | mp_mul_cmul_nt_d1 (nfft, in1fft, tmpfft); 1128 | mp_mul_d2i (n_h, radix, nfft, tmpfft, out); 1129 | mp_add (n, radix, out, tmp, out); 1130 | } 1131 | 1132 | 1133 | void 1134 | mp_squhf_use_infft (int n, int radix, double infft[], int in[], 1135 | int out[], int tmp[], int nfft, double tmpfft[]) 1136 | { 1137 | void mp_add (int n, int radix, int in1[], int in2[], int out[]); 1138 | void mp_mul_i2d (int n, int radix, int nfft, int shift, 1139 | int in[], double dout[]); 1140 | void mp_mul_cmul_nt_d1 (int nfft, double d1[], double d2[]); 1141 | void mp_mul_csqu_nt_d1 (int nfft, double d1[]); 1142 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 1143 | int n_h, shift; 1144 | DGTINT *inr; 1145 | 1146 | inr = ((DGTINT *) & in[2]) - 2; 1147 | shift = (nfft >> 1) + 1; 1148 | while (n > shift) 1149 | { 1150 | if (inr[shift + 2] != 0) 1151 | { 1152 | break; 1153 | } 1154 | shift++; 1155 | } 1156 | n_h = n / 2 + 1; 1157 | if (n_h < n - shift) 1158 | { 1159 | n_h = n - shift; 1160 | } 1161 | /* ---- tmp = (upper) infft * (lower) in ---- */ 1162 | mp_mul_i2d (n, radix, nfft, shift, in, tmpfft); 1163 | mp_mul_cmul_nt_d1 (nfft, infft, tmpfft); 1164 | mp_mul_d2i (n_h, radix, nfft, tmpfft, tmp); 1165 | /* ---- out = tmp + ((upper) infft)^2 ---- */ 1166 | mp_mul_csqu_nt_d1 (nfft, infft); 1167 | mp_mul_d2i (n, radix, nfft, infft, out); 1168 | mp_add (n, radix, out, tmp, out); 1169 | } 1170 | 1171 | 1172 | void 1173 | mp_mulh (int n, int radix, int in1[], int in2[], int out[], 1174 | int nfft, double in1fft[], double outfft[]) 1175 | { 1176 | void mp_mul_i2d (int n, int radix, int nfft, int shift, 1177 | int in[], double dout[]); 1178 | void mp_mul_cmul (int nfft, double d1[], double d2[]); 1179 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 1180 | 1181 | mp_mul_i2d (n, radix, nfft, 0, in1, in1fft); 1182 | mp_mul_i2d (n, radix, nfft, 0, in2, outfft); 1183 | mp_mul_cmul (nfft, in1fft, outfft); 1184 | mp_mul_d2i (n, radix, nfft, outfft, out); 1185 | } 1186 | 1187 | 1188 | void 1189 | mp_mulh_use_in1fft (int n, int radix, double in1fft[], 1190 | int shift, int in2[], int out[], int nfft, 1191 | double outfft[]) 1192 | { 1193 | void mp_mul_i2d (int n, int radix, int nfft, int shift, 1194 | int in[], double dout[]); 1195 | void mp_mul_cmul_nt_d1 (int nfft, double d1[], double d2[]); 1196 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 1197 | DGTINT *in2r; 1198 | 1199 | in2r = ((DGTINT *) & in2[2]) - 2; 1200 | while (n > shift) 1201 | { 1202 | if (in2r[shift + 2] != 0) 1203 | { 1204 | break; 1205 | } 1206 | shift++; 1207 | } 1208 | mp_mul_i2d (n, radix, nfft, shift, in2, outfft); 1209 | mp_mul_cmul_nt_d1 (nfft, in1fft, outfft); 1210 | mp_mul_d2i (n, radix, nfft, outfft, out); 1211 | } 1212 | 1213 | 1214 | void 1215 | mp_squh (int n, int radix, int in[], int out[], int nfft, double outfft[]) 1216 | { 1217 | void mp_mul_i2d (int n, int radix, int nfft, int shift, 1218 | int in[], double dout[]); 1219 | void mp_mul_csqu (int nfft, double d1[]); 1220 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 1221 | 1222 | mp_mul_i2d (n, radix, nfft, 0, in, outfft); 1223 | mp_mul_csqu (nfft, outfft); 1224 | mp_mul_d2i (n, radix, nfft, outfft, out); 1225 | } 1226 | 1227 | 1228 | void 1229 | mp_squh_save_infft (int n, int radix, int in[], int out[], 1230 | int nfft, double infft[], double outfft[]) 1231 | { 1232 | void mp_mul_i2d (int n, int radix, int nfft, int shift, 1233 | int in[], double dout[]); 1234 | void mp_mul_csqu_save_d1 (int nfft, double d1[], double d2[]); 1235 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 1236 | 1237 | mp_mul_i2d (n, radix, nfft, 0, in, infft); 1238 | mp_mul_csqu_save_d1 (nfft, infft, outfft); 1239 | mp_mul_d2i (n, radix, nfft, outfft, out); 1240 | } 1241 | 1242 | 1243 | void 1244 | mp_squh_use_in1fft (int n, int radix, double inoutfft[], int out[], int nfft) 1245 | { 1246 | void mp_mul_csqu_nt_d1 (int nfft, double d1[]); 1247 | void mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]); 1248 | 1249 | mp_mul_csqu_nt_d1 (nfft, inoutfft); 1250 | mp_mul_d2i (n, radix, nfft, inoutfft, out); 1251 | } 1252 | 1253 | 1254 | /* -------- mp_mul child routines -------- */ 1255 | 1256 | 1257 | void 1258 | mp_mul_i2d (int n, int radix, int nfft, int shift, int in[], double dout[]) 1259 | { 1260 | int j, x, carry, ndata, radix_2, topdgt; 1261 | DGTINT *inr; 1262 | 1263 | inr = ((DGTINT *) & in[2]) - 2; 1264 | ndata = 0; 1265 | topdgt = 0; 1266 | if (n > shift) 1267 | { 1268 | topdgt = inr[shift + 2]; 1269 | ndata = (nfft >> 1) + 1; 1270 | if (ndata > n - shift) 1271 | { 1272 | ndata = n - shift; 1273 | } 1274 | } 1275 | dout[nfft + 1] = in[0] * topdgt; 1276 | for (j = nfft; j > ndata; j--) 1277 | { 1278 | dout[j] = 0; 1279 | } 1280 | /* ---- abs(dout[j]) <= radix/2 (to keep FFT precision) ---- */ 1281 | if (ndata > 1) 1282 | { 1283 | radix_2 = radix / 2; 1284 | carry = 0; 1285 | for (j = ndata + 1; j > 3; j--) 1286 | { 1287 | x = inr[j + shift] - carry; 1288 | carry = x >= radix_2 ? -1 : 0; 1289 | dout[j - 1] = x - (radix & carry); 1290 | } 1291 | dout[2] = inr[shift + 3] - carry; 1292 | } 1293 | dout[1] = topdgt; 1294 | dout[0] = in[1] - shift; 1295 | } 1296 | 1297 | 1298 | void 1299 | mp_mul_cmul (int nfft, double d1[], double d2[]) 1300 | { 1301 | void cdft (int n, int isgn, double *a); 1302 | void mp_mul_rcmul (int n, double *a, double *b); 1303 | double xr, xi; 1304 | 1305 | cdft (nfft, 1, &d1[1]); 1306 | cdft (nfft, 1, &d2[1]); 1307 | d2[0] += d1[0]; 1308 | xr = d1[1] * d2[1] + d1[2] * d2[2]; 1309 | xi = d1[1] * d2[2] + d1[2] * d2[1]; 1310 | d2[1] = xr; 1311 | d2[2] = xi; 1312 | if (nfft > 2) 1313 | { 1314 | mp_mul_rcmul (nfft, &d1[1], &d2[1]); 1315 | } 1316 | d2[nfft + 1] *= d1[nfft + 1]; 1317 | cdft (nfft, -1, &d2[1]); 1318 | } 1319 | 1320 | 1321 | void 1322 | mp_mul_cmul_nt_d1 (int nfft, double d1[], double d2[]) 1323 | { 1324 | void cdft (int n, int isgn, double *a); 1325 | void mp_mul_rcmul_nt_in1 (int n, double *a, double *b); 1326 | double xr, xi; 1327 | 1328 | cdft (nfft, 1, &d2[1]); 1329 | d2[0] += d1[0]; 1330 | xr = d1[1] * d2[1] + d1[2] * d2[2]; 1331 | xi = d1[1] * d2[2] + d1[2] * d2[1]; 1332 | d2[1] = xr; 1333 | d2[2] = xi; 1334 | if (nfft > 2) 1335 | { 1336 | mp_mul_rcmul_nt_in1 (nfft, &d1[1], &d2[1]); 1337 | } 1338 | d2[nfft + 1] *= d1[nfft + 1]; 1339 | cdft (nfft, -1, &d2[1]); 1340 | } 1341 | 1342 | 1343 | void 1344 | mp_mul_cmul_nt_d2 (int nfft, double d1[], double d2[]) 1345 | { 1346 | void cdft (int n, int isgn, double *a); 1347 | void mp_mul_rcmul_nt_in2 (int n, double *a, double *b); 1348 | double xr, xi; 1349 | 1350 | cdft (nfft, 1, &d1[1]); 1351 | d2[0] += d1[0]; 1352 | xr = d1[1] * d2[1] + d1[2] * d2[2]; 1353 | xi = d1[1] * d2[2] + d1[2] * d2[1]; 1354 | d2[1] = xr; 1355 | d2[2] = xi; 1356 | if (nfft > 2) 1357 | { 1358 | mp_mul_rcmul_nt_in2 (nfft, &d1[1], &d2[1]); 1359 | } 1360 | d2[nfft + 1] *= d1[nfft + 1]; 1361 | cdft (nfft, -1, &d2[1]); 1362 | } 1363 | 1364 | 1365 | void 1366 | mp_mul_cmul_nt_out (int nfft, double d1[], double d2[]) 1367 | { 1368 | void cdft (int n, int isgn, double *a); 1369 | void mp_mul_rcmul_nt_out (int n, double *a, double *b); 1370 | double xr, xi; 1371 | 1372 | cdft (nfft, 1, &d1[1]); 1373 | cdft (nfft, 1, &d2[1]); 1374 | d2[0] += d1[0]; 1375 | xr = d1[1] * d2[1] + d1[2] * d2[2]; 1376 | xi = d1[1] * d2[2] + d1[2] * d2[1]; 1377 | d2[1] = xr; 1378 | d2[2] = xi; 1379 | if (nfft > 2) 1380 | { 1381 | mp_mul_rcmul_nt_out (nfft, &d1[1], &d2[1]); 1382 | } 1383 | d2[nfft + 1] *= d1[nfft + 1]; 1384 | } 1385 | 1386 | 1387 | void 1388 | mp_mul_cmul_nt_d1_add (int nfft, double d1[], double d2[], double d3[]) 1389 | { 1390 | void cdft (int n, int isgn, double *a); 1391 | void mp_mul_rcmul_nt_in1_add (int n, double *a, double *b, double *badd); 1392 | double xr, xi; 1393 | 1394 | cdft (nfft, 1, &d2[1]); 1395 | xr = d1[1] * d2[1] + d1[2] * d2[2]; 1396 | xi = d1[1] * d2[2] + d1[2] * d2[1]; 1397 | d3[1] += xr; 1398 | d3[2] += xi; 1399 | if (nfft > 2) 1400 | { 1401 | mp_mul_rcmul_nt_in1_add (nfft, &d1[1], &d2[1], &d3[1]); 1402 | } 1403 | d3[nfft + 1] += d1[nfft + 1] * d2[nfft + 1]; 1404 | cdft (nfft, -1, &d3[1]); 1405 | } 1406 | 1407 | 1408 | void 1409 | mp_mul_csqu (int nfft, double d1[]) 1410 | { 1411 | void cdft (int n, int isgn, double *a); 1412 | void mp_mul_rcsqu (int n, double *a); 1413 | double xr, xi; 1414 | 1415 | cdft (nfft, 1, &d1[1]); 1416 | d1[0] *= 2; 1417 | xr = d1[1] * d1[1] + d1[2] * d1[2]; 1418 | xi = 2 * d1[1] * d1[2]; 1419 | d1[1] = xr; 1420 | d1[2] = xi; 1421 | if (nfft > 2) 1422 | { 1423 | mp_mul_rcsqu (nfft, &d1[1]); 1424 | } 1425 | d1[nfft + 1] *= d1[nfft + 1]; 1426 | cdft (nfft, -1, &d1[1]); 1427 | } 1428 | 1429 | 1430 | void 1431 | mp_mul_csqu_save_d1 (int nfft, double d1[], double d2[]) 1432 | { 1433 | void cdft (int n, int isgn, double *a); 1434 | void mp_mul_rcsqu_save (int n, double *a, double *b); 1435 | double xr, xi; 1436 | 1437 | cdft (nfft, 1, &d1[1]); 1438 | d2[0] = 2 * d1[0]; 1439 | xr = d1[1] * d1[1] + d1[2] * d1[2]; 1440 | xi = 2 * d1[1] * d1[2]; 1441 | d2[1] = xr; 1442 | d2[2] = xi; 1443 | if (nfft > 2) 1444 | { 1445 | mp_mul_rcsqu_save (nfft, &d1[1], &d2[1]); 1446 | } 1447 | d2[nfft + 1] = d1[nfft + 1] * d1[nfft + 1]; 1448 | cdft (nfft, -1, &d2[1]); 1449 | } 1450 | 1451 | 1452 | void 1453 | mp_mul_csqu_nt_d1 (int nfft, double d1[]) 1454 | { 1455 | void cdft (int n, int isgn, double *a); 1456 | void mp_mul_rcsqu_nt_in (int n, double *a); 1457 | double xr, xi; 1458 | 1459 | d1[0] *= 2; 1460 | xr = d1[1] * d1[1] + d1[2] * d1[2]; 1461 | xi = 2 * d1[1] * d1[2]; 1462 | d1[1] = xr; 1463 | d1[2] = xi; 1464 | if (nfft > 2) 1465 | { 1466 | mp_mul_rcsqu_nt_in (nfft, &d1[1]); 1467 | } 1468 | d1[nfft + 1] *= d1[nfft + 1]; 1469 | cdft (nfft, -1, &d1[1]); 1470 | } 1471 | 1472 | 1473 | void 1474 | mp_mul_d2i (int n, int radix, int nfft, double din[], int out[]) 1475 | { 1476 | int j, carry, carry1, carry2, shift, ndata; 1477 | double x, scale, d1_radix, d1_radix2, pow_radix, topdgt; 1478 | DGTINT *outr; 1479 | 1480 | outr = ((DGTINT *) & out[2]) - 2; 1481 | scale = 2.0 / nfft; 1482 | d1_radix = 1.0 / radix; 1483 | d1_radix2 = d1_radix * d1_radix; 1484 | topdgt = din[nfft + 1]; 1485 | x = topdgt < 0 ? -topdgt : topdgt; 1486 | shift = x + 0.5 >= radix ? 1 : 0; 1487 | /* ---- correction of cyclic convolution of din[1] ---- */ 1488 | x *= nfft * 0.5; 1489 | din[nfft + 1] = din[1] - x; 1490 | din[1] = x; 1491 | /* ---- output of digits ---- */ 1492 | ndata = n; 1493 | if (n > nfft + 1 + shift) 1494 | { 1495 | ndata = nfft + 1 + shift; 1496 | for (j = n + 1; j > ndata + 1; j--) 1497 | { 1498 | outr[j] = 0; 1499 | } 1500 | } 1501 | x = 0; 1502 | pow_radix = 1; 1503 | for (j = ndata + 1 - shift; j <= nfft + 1; j++) 1504 | { 1505 | x += pow_radix * din[j]; 1506 | pow_radix *= d1_radix; 1507 | if (pow_radix < DBL_EPSILON) 1508 | { 1509 | break; 1510 | } 1511 | } 1512 | x = d1_radix2 * (scale * x + 0.5); 1513 | carry2 = ((int) x) - 1; 1514 | carry = (int) (radix * (x - carry2) + 0.5); 1515 | for (j = ndata; j > 1; j--) 1516 | { 1517 | x = d1_radix2 * (scale * din[j - shift] + carry + 0.5); 1518 | carry = carry2; 1519 | carry2 = ((int) x) - 1; 1520 | x = radix * (x - carry2); 1521 | carry1 = (int) x; 1522 | outr[j + 1] = (DGTINT) (radix * (x - carry1)); 1523 | carry += carry1; 1524 | } 1525 | x = carry + ((double) radix) * carry2 + 0.5; 1526 | if (shift == 0) 1527 | { 1528 | x += scale * din[1]; 1529 | } 1530 | carry = (int) (d1_radix * x); 1531 | outr[2] = (DGTINT) (x - ((double) radix) * carry); 1532 | if (carry > 0) 1533 | { 1534 | for (j = n + 1; j > 2; j--) 1535 | { 1536 | outr[j] = outr[j - 1]; 1537 | } 1538 | outr[2] = (DGTINT) carry; 1539 | shift++; 1540 | } 1541 | /* ---- output of exp, sgn ---- */ 1542 | x = din[0] + shift + 0.5; 1543 | shift = ((int) x) - 1; 1544 | out[1] = shift + ((int) (x - shift)); 1545 | out[0] = topdgt > 0.5 ? 1 : -1; 1546 | if (outr[2] == 0) 1547 | { 1548 | out[0] = 0; 1549 | out[1] = 0; 1550 | } 1551 | } 1552 | 1553 | 1554 | double 1555 | mp_mul_d2i_test (int radix, int nfft, double din[]) 1556 | { 1557 | int j, carry, carry1, carry2; 1558 | double x, scale, d1_radix, d1_radix2, err; 1559 | 1560 | scale = 2.0 / nfft; 1561 | d1_radix = 1.0 / radix; 1562 | d1_radix2 = d1_radix * d1_radix; 1563 | /* ---- correction of cyclic convolution of din[1] ---- */ 1564 | x = din[nfft + 1] * nfft * 0.5; 1565 | if (x < 0) 1566 | { 1567 | x = -x; 1568 | } 1569 | din[nfft + 1] = din[1] - x; 1570 | /* ---- check of digits ---- */ 1571 | err = 0; 1572 | carry = 0; 1573 | carry2 = 0; 1574 | for (j = nfft + 1; j > 1; j--) 1575 | { 1576 | x = d1_radix2 * (scale * din[j] + carry + 0.5); 1577 | carry = carry2; 1578 | carry2 = ((int) x) - 1; 1579 | x = radix * (x - carry2); 1580 | carry1 = (int) x; 1581 | x = radix * (x - carry1); 1582 | carry += carry1; 1583 | x = x - 0.5 - ((int) x); 1584 | if (x > err) 1585 | { 1586 | err = x; 1587 | } 1588 | else if (-x > err) 1589 | { 1590 | err = -x; 1591 | } 1592 | } 1593 | return err; 1594 | } 1595 | 1596 | 1597 | /* -------- mp_mul child^2 routines (mix RFFT routines) -------- */ 1598 | 1599 | 1600 | #ifndef M_PI_2 1601 | #define M_PI_2 1.570796326794896619231321691639751442098584699687 1602 | #endif 1603 | 1604 | 1605 | #ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */ 1606 | #define RDFT_LOOP_DIV 64 1607 | #endif 1608 | 1609 | 1610 | void 1611 | mp_mul_rcmul (int n, double *a, double *b) 1612 | { 1613 | int i, i0, j, k; 1614 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss; 1615 | double xr, xi, yr, yi, ajr, aji, akr, aki, bjr, bji, bkr, bki; 1616 | 1617 | ec = 2 * M_PI_2 / n; 1618 | wkr = 0; 1619 | wki = 0; 1620 | wdi = cos (ec); 1621 | wdr = sin (ec); 1622 | wdi *= wdr; 1623 | wdr *= wdr; 1624 | w1r = 1 - 2 * wdr; 1625 | w1i = 2 * wdi; 1626 | ss = 2 * w1i; 1627 | i = n >> 1; 1628 | xr = a[i]; 1629 | xi = a[i + 1]; 1630 | yr = b[i]; 1631 | yi = b[i + 1]; 1632 | b[i] = xr * yr - xi * yi; 1633 | b[i + 1] = xr * yi + xi * yr; 1634 | for (;;) 1635 | { 1636 | i0 = i - 4 * RDFT_LOOP_DIV; 1637 | if (i0 < 2) 1638 | { 1639 | i0 = 2; 1640 | } 1641 | for (j = i - 2; j >= i0; j -= 2) 1642 | { 1643 | k = n - j; 1644 | xr = wkr + ss * wdi; 1645 | xi = wki + ss * (0.5 - wdr); 1646 | wkr = wdr; 1647 | wki = wdi; 1648 | wdr = xr; 1649 | wdi = xi; 1650 | /* ---- transform CFFT data a[] into RFFT data ---- */ 1651 | xr = a[j] - a[k]; 1652 | xi = a[j + 1] + a[k + 1]; 1653 | yr = wkr * xr - wki * xi; 1654 | yi = wkr * xi + wki * xr; 1655 | ajr = a[j] - yr; 1656 | aji = a[j + 1] - yi; 1657 | akr = a[k] + yr; 1658 | aki = a[k + 1] - yi; 1659 | a[j] = ajr; 1660 | a[j + 1] = aji; 1661 | a[k] = akr; 1662 | a[k + 1] = aki; 1663 | /* ---- transform CFFT data b[] into RFFT data ---- */ 1664 | xr = b[j] - b[k]; 1665 | xi = b[j + 1] + b[k + 1]; 1666 | yr = wkr * xr - wki * xi; 1667 | yi = wkr * xi + wki * xr; 1668 | xr = b[j] - yr; 1669 | xi = b[j + 1] - yi; 1670 | yr = b[k] + yr; 1671 | yi = b[k + 1] - yi; 1672 | /* ---- cmul ---- */ 1673 | bjr = ajr * xr - aji * xi; 1674 | bji = ajr * xi + aji * xr; 1675 | bkr = akr * yr - aki * yi; 1676 | bki = akr * yi + aki * yr; 1677 | /* ---- transform RFFT data bxx into CFFT data ---- */ 1678 | xr = bjr - bkr; 1679 | xi = bji + bki; 1680 | yr = wkr * xr + wki * xi; 1681 | yi = wkr * xi - wki * xr; 1682 | b[j] = bjr - yr; 1683 | b[j + 1] = bji - yi; 1684 | b[k] = bkr + yr; 1685 | b[k + 1] = bki - yi; 1686 | } 1687 | if (i0 == 2) 1688 | { 1689 | break; 1690 | } 1691 | wkr = 0.5 * sin (ec * i0); 1692 | wki = 0.5 * cos (ec * i0); 1693 | wdr = 0.5 - (wkr * w1r - wki * w1i); 1694 | wdi = wkr * w1i + wki * w1r; 1695 | wkr = 0.5 - wkr; 1696 | i = i0; 1697 | } 1698 | } 1699 | 1700 | 1701 | void 1702 | mp_mul_rcmul_nt_in1 (int n, double *a, double *b) 1703 | { 1704 | int i, i0, j, k; 1705 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss; 1706 | double xr, xi, yr, yi, bjr, bji, bkr, bki; 1707 | 1708 | ec = 2 * M_PI_2 / n; 1709 | wkr = 0; 1710 | wki = 0; 1711 | wdi = cos (ec); 1712 | wdr = sin (ec); 1713 | wdi *= wdr; 1714 | wdr *= wdr; 1715 | w1r = 1 - 2 * wdr; 1716 | w1i = 2 * wdi; 1717 | ss = 2 * w1i; 1718 | i = n >> 1; 1719 | xr = a[i]; 1720 | xi = a[i + 1]; 1721 | yr = b[i]; 1722 | yi = b[i + 1]; 1723 | b[i] = xr * yr - xi * yi; 1724 | b[i + 1] = xr * yi + xi * yr; 1725 | for (;;) 1726 | { 1727 | i0 = i - 4 * RDFT_LOOP_DIV; 1728 | if (i0 < 2) 1729 | { 1730 | i0 = 2; 1731 | } 1732 | for (j = i - 2; j >= i0; j -= 2) 1733 | { 1734 | k = n - j; 1735 | xr = wkr + ss * wdi; 1736 | xi = wki + ss * (0.5 - wdr); 1737 | wkr = wdr; 1738 | wki = wdi; 1739 | wdr = xr; 1740 | wdi = xi; 1741 | /* ---- transform CFFT data b[] into RFFT data ---- */ 1742 | xr = b[j] - b[k]; 1743 | xi = b[j + 1] + b[k + 1]; 1744 | yr = wkr * xr - wki * xi; 1745 | yi = wkr * xi + wki * xr; 1746 | xr = b[j] - yr; 1747 | xi = b[j + 1] - yi; 1748 | yr = b[k] + yr; 1749 | yi = b[k + 1] - yi; 1750 | /* ---- cmul ---- */ 1751 | bjr = a[j] * xr - a[j + 1] * xi; 1752 | bji = a[j] * xi + a[j + 1] * xr; 1753 | bkr = a[k] * yr - a[k + 1] * yi; 1754 | bki = a[k] * yi + a[k + 1] * yr; 1755 | /* ---- transform RFFT data bxx into CFFT data ---- */ 1756 | xr = bjr - bkr; 1757 | xi = bji + bki; 1758 | yr = wkr * xr + wki * xi; 1759 | yi = wkr * xi - wki * xr; 1760 | b[j] = bjr - yr; 1761 | b[j + 1] = bji - yi; 1762 | b[k] = bkr + yr; 1763 | b[k + 1] = bki - yi; 1764 | } 1765 | if (i0 == 2) 1766 | { 1767 | break; 1768 | } 1769 | wkr = 0.5 * sin (ec * i0); 1770 | wki = 0.5 * cos (ec * i0); 1771 | wdr = 0.5 - (wkr * w1r - wki * w1i); 1772 | wdi = wkr * w1i + wki * w1r; 1773 | wkr = 0.5 - wkr; 1774 | i = i0; 1775 | } 1776 | } 1777 | 1778 | 1779 | void 1780 | mp_mul_rcmul_nt_in2 (int n, double *a, double *b) 1781 | { 1782 | int i, i0, j, k; 1783 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss; 1784 | double xr, xi, yr, yi, bjr, bji, bkr, bki; 1785 | 1786 | ec = 2 * M_PI_2 / n; 1787 | wkr = 0; 1788 | wki = 0; 1789 | wdi = cos (ec); 1790 | wdr = sin (ec); 1791 | wdi *= wdr; 1792 | wdr *= wdr; 1793 | w1r = 1 - 2 * wdr; 1794 | w1i = 2 * wdi; 1795 | ss = 2 * w1i; 1796 | i = n >> 1; 1797 | xr = a[i]; 1798 | xi = a[i + 1]; 1799 | yr = b[i]; 1800 | yi = b[i + 1]; 1801 | b[i] = xr * yr - xi * yi; 1802 | b[i + 1] = xr * yi + xi * yr; 1803 | for (;;) 1804 | { 1805 | i0 = i - 4 * RDFT_LOOP_DIV; 1806 | if (i0 < 2) 1807 | { 1808 | i0 = 2; 1809 | } 1810 | for (j = i - 2; j >= i0; j -= 2) 1811 | { 1812 | k = n - j; 1813 | xr = wkr + ss * wdi; 1814 | xi = wki + ss * (0.5 - wdr); 1815 | wkr = wdr; 1816 | wki = wdi; 1817 | wdr = xr; 1818 | wdi = xi; 1819 | /* ---- transform CFFT data a[] into RFFT data ---- */ 1820 | xr = a[j] - a[k]; 1821 | xi = a[j + 1] + a[k + 1]; 1822 | yr = wkr * xr - wki * xi; 1823 | yi = wkr * xi + wki * xr; 1824 | xr = a[j] - yr; 1825 | xi = a[j + 1] - yi; 1826 | yr = a[k] + yr; 1827 | yi = a[k + 1] - yi; 1828 | a[j] = xr; 1829 | a[j + 1] = xi; 1830 | a[k] = yr; 1831 | a[k + 1] = yi; 1832 | /* ---- cmul ---- */ 1833 | bjr = b[j] * xr - b[j + 1] * xi; 1834 | bji = b[j] * xi + b[j + 1] * xr; 1835 | bkr = b[k] * yr - b[k + 1] * yi; 1836 | bki = b[k] * yi + b[k + 1] * yr; 1837 | /* ---- transform RFFT data bxx into CFFT data ---- */ 1838 | xr = bjr - bkr; 1839 | xi = bji + bki; 1840 | yr = wkr * xr + wki * xi; 1841 | yi = wkr * xi - wki * xr; 1842 | b[j] = bjr - yr; 1843 | b[j + 1] = bji - yi; 1844 | b[k] = bkr + yr; 1845 | b[k + 1] = bki - yi; 1846 | } 1847 | if (i0 == 2) 1848 | { 1849 | break; 1850 | } 1851 | wkr = 0.5 * sin (ec * i0); 1852 | wki = 0.5 * cos (ec * i0); 1853 | wdr = 0.5 - (wkr * w1r - wki * w1i); 1854 | wdi = wkr * w1i + wki * w1r; 1855 | wkr = 0.5 - wkr; 1856 | i = i0; 1857 | } 1858 | } 1859 | 1860 | 1861 | void 1862 | mp_mul_rcmul_nt_out (int n, double *a, double *b) 1863 | { 1864 | int i, i0, j, k; 1865 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss; 1866 | double xr, xi, yr, yi, ajr, aji, akr, aki; 1867 | 1868 | ec = 2 * M_PI_2 / n; 1869 | wkr = 0; 1870 | wki = 0; 1871 | wdi = cos (ec); 1872 | wdr = sin (ec); 1873 | wdi *= wdr; 1874 | wdr *= wdr; 1875 | w1r = 1 - 2 * wdr; 1876 | w1i = 2 * wdi; 1877 | ss = 2 * w1i; 1878 | i = n >> 1; 1879 | xr = a[i]; 1880 | xi = a[i + 1]; 1881 | yr = b[i]; 1882 | yi = b[i + 1]; 1883 | b[i] = xr * yr - xi * yi; 1884 | b[i + 1] = xr * yi + xi * yr; 1885 | for (;;) 1886 | { 1887 | i0 = i - 4 * RDFT_LOOP_DIV; 1888 | if (i0 < 2) 1889 | { 1890 | i0 = 2; 1891 | } 1892 | for (j = i - 2; j >= i0; j -= 2) 1893 | { 1894 | k = n - j; 1895 | xr = wkr + ss * wdi; 1896 | xi = wki + ss * (0.5 - wdr); 1897 | wkr = wdr; 1898 | wki = wdi; 1899 | wdr = xr; 1900 | wdi = xi; 1901 | /* ---- transform CFFT data a[] into RFFT data ---- */ 1902 | xr = a[j] - a[k]; 1903 | xi = a[j + 1] + a[k + 1]; 1904 | yr = wkr * xr - wki * xi; 1905 | yi = wkr * xi + wki * xr; 1906 | ajr = a[j] - yr; 1907 | aji = a[j + 1] - yi; 1908 | akr = a[k] + yr; 1909 | aki = a[k + 1] - yi; 1910 | a[j] = ajr; 1911 | a[j + 1] = aji; 1912 | a[k] = akr; 1913 | a[k + 1] = aki; 1914 | /* ---- transform CFFT data b[] into RFFT data ---- */ 1915 | xr = b[j] - b[k]; 1916 | xi = b[j + 1] + b[k + 1]; 1917 | yr = wkr * xr - wki * xi; 1918 | yi = wkr * xi + wki * xr; 1919 | xr = b[j] - yr; 1920 | xi = b[j + 1] - yi; 1921 | yr = b[k] + yr; 1922 | yi = b[k + 1] - yi; 1923 | /* ---- cmul ---- */ 1924 | b[j] = ajr * xr - aji * xi; 1925 | b[j + 1] = ajr * xi + aji * xr; 1926 | b[k] = akr * yr - aki * yi; 1927 | b[k + 1] = akr * yi + aki * yr; 1928 | } 1929 | if (i0 == 2) 1930 | { 1931 | break; 1932 | } 1933 | wkr = 0.5 * sin (ec * i0); 1934 | wki = 0.5 * cos (ec * i0); 1935 | wdr = 0.5 - (wkr * w1r - wki * w1i); 1936 | wdi = wkr * w1i + wki * w1r; 1937 | wkr = 0.5 - wkr; 1938 | i = i0; 1939 | } 1940 | } 1941 | 1942 | 1943 | void 1944 | mp_mul_rcmul_nt_in1_add (int n, double *a, double *b, double *badd) 1945 | { 1946 | int i, i0, j, k; 1947 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss; 1948 | double xr, xi, yr, yi, bjr, bji, bkr, bki; 1949 | 1950 | ec = 2 * M_PI_2 / n; 1951 | wkr = 0; 1952 | wki = 0; 1953 | wdi = cos (ec); 1954 | wdr = sin (ec); 1955 | wdi *= wdr; 1956 | wdr *= wdr; 1957 | w1r = 1 - 2 * wdr; 1958 | w1i = 2 * wdi; 1959 | ss = 2 * w1i; 1960 | i = n >> 1; 1961 | xr = a[i]; 1962 | xi = a[i + 1]; 1963 | yr = b[i]; 1964 | yi = b[i + 1]; 1965 | badd[i] += xr * yr - xi * yi; 1966 | badd[i + 1] += xr * yi + xi * yr; 1967 | for (;;) 1968 | { 1969 | i0 = i - 4 * RDFT_LOOP_DIV; 1970 | if (i0 < 2) 1971 | { 1972 | i0 = 2; 1973 | } 1974 | for (j = i - 2; j >= i0; j -= 2) 1975 | { 1976 | k = n - j; 1977 | xr = wkr + ss * wdi; 1978 | xi = wki + ss * (0.5 - wdr); 1979 | wkr = wdr; 1980 | wki = wdi; 1981 | wdr = xr; 1982 | wdi = xi; 1983 | /* ---- transform CFFT data b[] into RFFT data ---- */ 1984 | xr = b[j] - b[k]; 1985 | xi = b[j + 1] + b[k + 1]; 1986 | yr = wkr * xr - wki * xi; 1987 | yi = wkr * xi + wki * xr; 1988 | xr = b[j] - yr; 1989 | xi = b[j + 1] - yi; 1990 | yr = b[k] + yr; 1991 | yi = b[k + 1] - yi; 1992 | /* ---- cmul + add ---- */ 1993 | bjr = badd[j] + (a[j] * xr - a[j + 1] * xi); 1994 | bji = badd[j + 1] + (a[j] * xi + a[j + 1] * xr); 1995 | bkr = badd[k] + (a[k] * yr - a[k + 1] * yi); 1996 | bki = badd[k + 1] + (a[k] * yi + a[k + 1] * yr); 1997 | /* ---- transform RFFT data bxx into CFFT data ---- */ 1998 | xr = bjr - bkr; 1999 | xi = bji + bki; 2000 | yr = wkr * xr + wki * xi; 2001 | yi = wkr * xi - wki * xr; 2002 | badd[j] = bjr - yr; 2003 | badd[j + 1] = bji - yi; 2004 | badd[k] = bkr + yr; 2005 | badd[k + 1] = bki - yi; 2006 | } 2007 | if (i0 == 2) 2008 | { 2009 | break; 2010 | } 2011 | wkr = 0.5 * sin (ec * i0); 2012 | wki = 0.5 * cos (ec * i0); 2013 | wdr = 0.5 - (wkr * w1r - wki * w1i); 2014 | wdi = wkr * w1i + wki * w1r; 2015 | wkr = 0.5 - wkr; 2016 | i = i0; 2017 | } 2018 | } 2019 | 2020 | 2021 | void 2022 | mp_mul_rcsqu (int n, double *a) 2023 | { 2024 | int i, i0, j, k; 2025 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss; 2026 | double xr, xi, yr, yi, ajr, aji, akr, aki; 2027 | 2028 | ec = 2 * M_PI_2 / n; 2029 | wkr = 0; 2030 | wki = 0; 2031 | wdi = cos (ec); 2032 | wdr = sin (ec); 2033 | wdi *= wdr; 2034 | wdr *= wdr; 2035 | w1r = 1 - 2 * wdr; 2036 | w1i = 2 * wdi; 2037 | ss = 2 * w1i; 2038 | i = n >> 1; 2039 | xr = a[i]; 2040 | xi = a[i + 1]; 2041 | a[i] = xr * xr - xi * xi; 2042 | a[i + 1] = 2 * xr * xi; 2043 | for (;;) 2044 | { 2045 | i0 = i - 4 * RDFT_LOOP_DIV; 2046 | if (i0 < 2) 2047 | { 2048 | i0 = 2; 2049 | } 2050 | for (j = i - 2; j >= i0; j -= 2) 2051 | { 2052 | k = n - j; 2053 | xr = wkr + ss * wdi; 2054 | xi = wki + ss * (0.5 - wdr); 2055 | wkr = wdr; 2056 | wki = wdi; 2057 | wdr = xr; 2058 | wdi = xi; 2059 | /* ---- transform CFFT data a[] into RFFT data ---- */ 2060 | xr = a[j] - a[k]; 2061 | xi = a[j + 1] + a[k + 1]; 2062 | yr = wkr * xr - wki * xi; 2063 | yi = wkr * xi + wki * xr; 2064 | xr = a[j] - yr; 2065 | xi = a[j + 1] - yi; 2066 | yr = a[k] + yr; 2067 | yi = a[k + 1] - yi; 2068 | /* ---- csqu ---- */ 2069 | ajr = xr * xr - xi * xi; 2070 | aji = 2 * xr * xi; 2071 | akr = yr * yr - yi * yi; 2072 | aki = 2 * yr * yi; 2073 | /* ---- transform RFFT data axx into CFFT data ---- */ 2074 | xr = ajr - akr; 2075 | xi = aji + aki; 2076 | yr = wkr * xr + wki * xi; 2077 | yi = wkr * xi - wki * xr; 2078 | a[j] = ajr - yr; 2079 | a[j + 1] = aji - yi; 2080 | a[k] = akr + yr; 2081 | a[k + 1] = aki - yi; 2082 | } 2083 | if (i0 == 2) 2084 | { 2085 | break; 2086 | } 2087 | wkr = 0.5 * sin (ec * i0); 2088 | wki = 0.5 * cos (ec * i0); 2089 | wdr = 0.5 - (wkr * w1r - wki * w1i); 2090 | wdi = wkr * w1i + wki * w1r; 2091 | wkr = 0.5 - wkr; 2092 | i = i0; 2093 | } 2094 | } 2095 | 2096 | 2097 | void 2098 | mp_mul_rcsqu_save (int n, double *a, double *b) 2099 | { 2100 | int i, i0, j, k; 2101 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss; 2102 | double xr, xi, yr, yi, ajr, aji, akr, aki; 2103 | 2104 | ec = 2 * M_PI_2 / n; 2105 | wkr = 0; 2106 | wki = 0; 2107 | wdi = cos (ec); 2108 | wdr = sin (ec); 2109 | wdi *= wdr; 2110 | wdr *= wdr; 2111 | w1r = 1 - 2 * wdr; 2112 | w1i = 2 * wdi; 2113 | ss = 2 * w1i; 2114 | i = n >> 1; 2115 | xr = a[i]; 2116 | xi = a[i + 1]; 2117 | b[i] = xr * xr - xi * xi; 2118 | b[i + 1] = 2 * xr * xi; 2119 | for (;;) 2120 | { 2121 | i0 = i - 4 * RDFT_LOOP_DIV; 2122 | if (i0 < 2) 2123 | { 2124 | i0 = 2; 2125 | } 2126 | for (j = i - 2; j >= i0; j -= 2) 2127 | { 2128 | k = n - j; 2129 | xr = wkr + ss * wdi; 2130 | xi = wki + ss * (0.5 - wdr); 2131 | wkr = wdr; 2132 | wki = wdi; 2133 | wdr = xr; 2134 | wdi = xi; 2135 | /* ---- transform CFFT data a[] into RFFT data ---- */ 2136 | xr = a[j] - a[k]; 2137 | xi = a[j + 1] + a[k + 1]; 2138 | yr = wkr * xr - wki * xi; 2139 | yi = wkr * xi + wki * xr; 2140 | xr = a[j] - yr; 2141 | xi = a[j + 1] - yi; 2142 | yr = a[k] + yr; 2143 | yi = a[k + 1] - yi; 2144 | a[j] = xr; 2145 | a[j + 1] = xi; 2146 | a[k] = yr; 2147 | a[k + 1] = yi; 2148 | /* ---- csqu ---- */ 2149 | ajr = xr * xr - xi * xi; 2150 | aji = 2 * xr * xi; 2151 | akr = yr * yr - yi * yi; 2152 | aki = 2 * yr * yi; 2153 | /* ---- transform RFFT data axx into CFFT data ---- */ 2154 | xr = ajr - akr; 2155 | xi = aji + aki; 2156 | yr = wkr * xr + wki * xi; 2157 | yi = wkr * xi - wki * xr; 2158 | b[j] = ajr - yr; 2159 | b[j + 1] = aji - yi; 2160 | b[k] = akr + yr; 2161 | b[k + 1] = aki - yi; 2162 | } 2163 | if (i0 == 2) 2164 | { 2165 | break; 2166 | } 2167 | wkr = 0.5 * sin (ec * i0); 2168 | wki = 0.5 * cos (ec * i0); 2169 | wdr = 0.5 - (wkr * w1r - wki * w1i); 2170 | wdi = wkr * w1i + wki * w1r; 2171 | wkr = 0.5 - wkr; 2172 | i = i0; 2173 | } 2174 | } 2175 | 2176 | 2177 | void 2178 | mp_mul_rcsqu_nt_in (int n, double *a) 2179 | { 2180 | int i, i0, j, k; 2181 | double ec, w1r, w1i, wkr, wki, wdr, wdi, ss; 2182 | double xr, xi, yr, yi, ajr, aji, akr, aki; 2183 | 2184 | ec = 2 * M_PI_2 / n; 2185 | wkr = 0; 2186 | wki = 0; 2187 | wdi = cos (ec); 2188 | wdr = sin (ec); 2189 | wdi *= wdr; 2190 | wdr *= wdr; 2191 | w1r = 1 - 2 * wdr; 2192 | w1i = 2 * wdi; 2193 | ss = 2 * w1i; 2194 | i = n >> 1; 2195 | xr = a[i]; 2196 | xi = a[i + 1]; 2197 | a[i] = xr * xr - xi * xi; 2198 | a[i + 1] = 2 * xr * xi; 2199 | for (;;) 2200 | { 2201 | i0 = i - 4 * RDFT_LOOP_DIV; 2202 | if (i0 < 2) 2203 | { 2204 | i0 = 2; 2205 | } 2206 | for (j = i - 2; j >= i0; j -= 2) 2207 | { 2208 | k = n - j; 2209 | xr = wkr + ss * wdi; 2210 | xi = wki + ss * (0.5 - wdr); 2211 | wkr = wdr; 2212 | wki = wdi; 2213 | wdr = xr; 2214 | wdi = xi; 2215 | /* ---- csqu ---- */ 2216 | xr = a[j]; 2217 | xi = a[j + 1]; 2218 | yr = a[k]; 2219 | yi = a[k + 1]; 2220 | ajr = xr * xr - xi * xi; 2221 | aji = 2 * xr * xi; 2222 | akr = yr * yr - yi * yi; 2223 | aki = 2 * yr * yi; 2224 | /* ---- transform RFFT data axx into CFFT data ---- */ 2225 | xr = ajr - akr; 2226 | xi = aji + aki; 2227 | yr = wkr * xr + wki * xi; 2228 | yi = wkr * xi - wki * xr; 2229 | a[j] = ajr - yr; 2230 | a[j + 1] = aji - yi; 2231 | a[k] = akr + yr; 2232 | a[k + 1] = aki - yi; 2233 | } 2234 | if (i0 == 2) 2235 | { 2236 | break; 2237 | } 2238 | wkr = 0.5 * sin (ec * i0); 2239 | wki = 0.5 * cos (ec * i0); 2240 | wdr = 0.5 - (wkr * w1r - wki * w1i); 2241 | wdi = wkr * w1i + wki * w1r; 2242 | wkr = 0.5 - wkr; 2243 | i = i0; 2244 | } 2245 | } 2246 | 2247 | 2248 | /* -------- mp_inv routines -------- */ 2249 | 2250 | 2251 | int 2252 | mp_inv (int n, int radix, int in[], int out[], 2253 | int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[]) 2254 | { 2255 | int mp_get_nfft_init (int radix, int nfft_max); 2256 | void mp_inv_init (int n, int radix, int in[], int out[]); 2257 | int mp_inv_newton (int n, int radix, int in[], int inout[], 2258 | int tmp1[], int tmp2[], int nfft, double tmp1fft[], 2259 | double tmp2fft[]); 2260 | int n_nwt, nfft_nwt, thr, prc; 2261 | 2262 | if (in[0] == 0) 2263 | { 2264 | return -1; 2265 | } 2266 | nfft_nwt = mp_get_nfft_init (radix, nfft); 2267 | n_nwt = nfft_nwt + 2; 2268 | if (n_nwt > n) 2269 | { 2270 | n_nwt = n; 2271 | } 2272 | mp_inv_init (n_nwt, radix, in, out); 2273 | thr = 8; 2274 | do 2275 | { 2276 | n_nwt = nfft_nwt + 2; 2277 | if (n_nwt > n) 2278 | { 2279 | n_nwt = n; 2280 | } 2281 | prc = mp_inv_newton (n_nwt, radix, in, out, 2282 | tmp1, tmp2, nfft_nwt, tmp1fft, tmp2fft); 2283 | #ifdef DEBUG 2284 | printf ("n=%d, nfft=%d, prc=%d\n", n_nwt, nfft_nwt, prc); 2285 | #endif 2286 | if (thr * nfft_nwt >= nfft) 2287 | { 2288 | thr = 0; 2289 | if (2 * prc <= n_nwt - 2) 2290 | { 2291 | nfft_nwt >>= 1; 2292 | } 2293 | } 2294 | else 2295 | { 2296 | if (3 * prc < n_nwt - 2) 2297 | { 2298 | nfft_nwt >>= 1; 2299 | } 2300 | } 2301 | nfft_nwt <<= 1; 2302 | } 2303 | while (nfft_nwt <= nfft); 2304 | return 0; 2305 | } 2306 | 2307 | 2308 | int 2309 | mp_sqrt (int n, int radix, int in[], int out[], 2310 | int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[]) 2311 | { 2312 | void mp_load_0 (int n, int radix, int out[]); 2313 | int mp_get_nfft_init (int radix, int nfft_max); 2314 | void mp_sqrt_init (int n, int radix, int in[], int out[], int out_rev[]); 2315 | int mp_sqrt_newton (int n, int radix, int in[], int inout[], 2316 | int inout_rev[], int tmp[], int nfft, double tmp1fft[], 2317 | double tmp2fft[], int *n_tmp1fft); 2318 | int n_nwt, nfft_nwt, thr, prc, n_tmp1fft; 2319 | 2320 | if (in[0] < 0) 2321 | { 2322 | return -1; 2323 | } 2324 | else if (in[0] == 0) 2325 | { 2326 | mp_load_0 (n, radix, out); 2327 | return 0; 2328 | } 2329 | nfft_nwt = mp_get_nfft_init (radix, nfft); 2330 | n_nwt = nfft_nwt + 2; 2331 | if (n_nwt > n) 2332 | { 2333 | n_nwt = n; 2334 | } 2335 | mp_sqrt_init (n_nwt, radix, in, out, tmp1); 2336 | n_tmp1fft = 0; 2337 | thr = 8; 2338 | do 2339 | { 2340 | n_nwt = nfft_nwt + 2; 2341 | if (n_nwt > n) 2342 | { 2343 | n_nwt = n; 2344 | } 2345 | prc = mp_sqrt_newton (n_nwt, radix, in, out, 2346 | tmp1, tmp2, nfft_nwt, tmp1fft, tmp2fft, 2347 | &n_tmp1fft); 2348 | #ifdef DEBUG 2349 | printf ("n=%d, nfft=%d, prc=%d\n", n_nwt, nfft_nwt, prc); 2350 | #endif 2351 | if (thr * nfft_nwt >= nfft) 2352 | { 2353 | thr = 0; 2354 | if (2 * prc <= n_nwt - 2) 2355 | { 2356 | nfft_nwt >>= 1; 2357 | } 2358 | } 2359 | else 2360 | { 2361 | if (3 * prc < n_nwt - 2) 2362 | { 2363 | nfft_nwt >>= 1; 2364 | } 2365 | } 2366 | nfft_nwt <<= 1; 2367 | } 2368 | while (nfft_nwt <= nfft); 2369 | return 0; 2370 | } 2371 | 2372 | 2373 | int 2374 | mp_invisqrt (int n, int radix, int in, int out[], 2375 | int tmp1[], int tmp2[], int nfft, 2376 | double tmp1fft[], double tmp2fft[]) 2377 | { 2378 | int mp_get_nfft_init (int radix, int nfft_max); 2379 | void mp_invisqrt_init (int n, int radix, int in, int out[]); 2380 | int mp_invisqrt_newton (int n, int radix, int in, int inout[], 2381 | int tmp1[], int tmp2[], int nfft, double tmp1fft[], 2382 | double tmp2fft[]); 2383 | int n_nwt, nfft_nwt, thr, prc; 2384 | 2385 | if (in <= 0) 2386 | { 2387 | return -1; 2388 | } 2389 | nfft_nwt = mp_get_nfft_init (radix, nfft); 2390 | n_nwt = nfft_nwt + 2; 2391 | if (n_nwt > n) 2392 | { 2393 | n_nwt = n; 2394 | } 2395 | mp_invisqrt_init (n_nwt, radix, in, out); 2396 | thr = 8; 2397 | do 2398 | { 2399 | n_nwt = nfft_nwt + 2; 2400 | if (n_nwt > n) 2401 | { 2402 | n_nwt = n; 2403 | } 2404 | prc = mp_invisqrt_newton (n_nwt, radix, in, out, 2405 | tmp1, tmp2, nfft_nwt, tmp1fft, tmp2fft); 2406 | #ifdef DEBUG 2407 | printf ("n=%d, nfft=%d, prc=%d\n", n_nwt, nfft_nwt, prc); 2408 | #endif 2409 | if (thr * nfft_nwt >= nfft) 2410 | { 2411 | thr = 0; 2412 | if (2 * prc <= n_nwt - 2) 2413 | { 2414 | nfft_nwt >>= 1; 2415 | } 2416 | } 2417 | else 2418 | { 2419 | if (3 * prc < n_nwt - 2) 2420 | { 2421 | nfft_nwt >>= 1; 2422 | } 2423 | } 2424 | nfft_nwt <<= 1; 2425 | } 2426 | while (nfft_nwt <= nfft); 2427 | return 0; 2428 | } 2429 | 2430 | 2431 | /* -------- mp_inv child routines -------- */ 2432 | 2433 | 2434 | int 2435 | mp_get_nfft_init (int radix, int nfft_max) 2436 | { 2437 | int nfft_init; 2438 | double r; 2439 | 2440 | r = radix; 2441 | nfft_init = 1; 2442 | do 2443 | { 2444 | r *= r; 2445 | nfft_init <<= 1; 2446 | } 2447 | while (DBL_EPSILON * r < 1 && nfft_init < nfft_max); 2448 | return nfft_init; 2449 | } 2450 | 2451 | 2452 | void 2453 | mp_inv_init (int n, int radix, int in[], int out[]) 2454 | { 2455 | void mp_unexp_d2mp (int n, int radix, double din, DGTINT out[]); 2456 | double mp_unexp_mp2d (int n, int radix, DGTINT in[]); 2457 | int outexp; 2458 | double din; 2459 | 2460 | out[0] = in[0]; 2461 | outexp = -in[1]; 2462 | din = 1.0 / mp_unexp_mp2d (n, radix, (DGTINT *) & in[2]); 2463 | while (din < 1) 2464 | { 2465 | din *= radix; 2466 | outexp--; 2467 | } 2468 | out[1] = outexp; 2469 | mp_unexp_d2mp (n, radix, din, (DGTINT *) & out[2]); 2470 | } 2471 | 2472 | 2473 | void 2474 | mp_sqrt_init (int n, int radix, int in[], int out[], int out_rev[]) 2475 | { 2476 | void mp_unexp_d2mp (int n, int radix, double din, DGTINT out[]); 2477 | double mp_unexp_mp2d (int n, int radix, DGTINT in[]); 2478 | int outexp; 2479 | double din; 2480 | 2481 | out[0] = 1; 2482 | out_rev[0] = 1; 2483 | outexp = in[1]; 2484 | din = mp_unexp_mp2d (n, radix, (DGTINT *) & in[2]); 2485 | if (outexp % 2 != 0) 2486 | { 2487 | din *= radix; 2488 | outexp--; 2489 | } 2490 | outexp /= 2; 2491 | din = sqrt (din); 2492 | if (din < 1) 2493 | { 2494 | din *= radix; 2495 | outexp--; 2496 | } 2497 | out[1] = outexp; 2498 | mp_unexp_d2mp (n, radix, din, (DGTINT *) & out[2]); 2499 | outexp = -outexp; 2500 | din = 1.0 / din; 2501 | while (din < 1) 2502 | { 2503 | din *= radix; 2504 | outexp--; 2505 | } 2506 | out_rev[1] = outexp; 2507 | mp_unexp_d2mp (n, radix, din, (DGTINT *) & out_rev[2]); 2508 | } 2509 | 2510 | 2511 | void 2512 | mp_invisqrt_init (int n, int radix, int in, int out[]) 2513 | { 2514 | void mp_unexp_d2mp (int n, int radix, double din, DGTINT out[]); 2515 | int outexp; 2516 | double dout; 2517 | 2518 | out[0] = 1; 2519 | outexp = 0; 2520 | dout = sqrt (1.0 / in); 2521 | while (dout < 1) 2522 | { 2523 | dout *= radix; 2524 | outexp--; 2525 | } 2526 | out[1] = outexp; 2527 | mp_unexp_d2mp (n, radix, dout, (DGTINT *) & out[2]); 2528 | } 2529 | 2530 | 2531 | void 2532 | mp_unexp_d2mp (int n, int radix, double din, DGTINT out[]) 2533 | { 2534 | int j, x; 2535 | 2536 | for (j = 0; j < n; j++) 2537 | { 2538 | x = (int) din; 2539 | if (x >= radix) 2540 | { 2541 | x = radix - 1; 2542 | din = radix; 2543 | } 2544 | din = radix * (din - x); 2545 | out[j] = (DGTINT) x; 2546 | } 2547 | } 2548 | 2549 | 2550 | double 2551 | mp_unexp_mp2d (int n, int radix, DGTINT in[]) 2552 | { 2553 | int j; 2554 | double d1_radix, dout; 2555 | 2556 | d1_radix = 1.0 / radix; 2557 | dout = 0; 2558 | for (j = n - 1; j >= 0; j--) 2559 | { 2560 | dout = d1_radix * dout + in[j]; 2561 | } 2562 | return dout; 2563 | } 2564 | 2565 | 2566 | int 2567 | mp_inv_newton (int n, int radix, int in[], int inout[], 2568 | int tmp1[], int tmp2[], int nfft, double tmp1fft[], 2569 | double tmp2fft[]) 2570 | { 2571 | void mp_load_1 (int n, int radix, int out[]); 2572 | void mp_round (int n, int radix, int m, int inout[]); 2573 | void mp_add (int n, int radix, int in1[], int in2[], int out[]); 2574 | void mp_sub (int n, int radix, int in1[], int in2[], int out[]); 2575 | void mp_mulh (int n, int radix, int in1[], int in2[], int out[], 2576 | int nfft, double in1fft[], double outfft[]); 2577 | void mp_mulh_use_in1fft (int n, int radix, double in1fft[], 2578 | int shift, int in2[], int out[], int nfft, 2579 | double outfft[]); 2580 | int n_h, shift, prc; 2581 | 2582 | shift = (nfft >> 1) + 1; 2583 | n_h = n / 2 + 1; 2584 | if (n_h < n - shift) 2585 | { 2586 | n_h = n - shift; 2587 | } 2588 | /* ---- tmp1 = inout * (upper) in (half to normal precision) ---- */ 2589 | mp_round (n, radix, shift, inout); 2590 | mp_mulh (n, radix, inout, in, tmp1, nfft, tmp1fft, tmp2fft); 2591 | /* ---- tmp2 = 1 - tmp1 ---- */ 2592 | mp_load_1 (n, radix, tmp2); 2593 | mp_sub (n, radix, tmp2, tmp1, tmp2); 2594 | /* ---- tmp2 -= inout * (lower) in (half precision) ---- */ 2595 | mp_mulh_use_in1fft (n, radix, tmp1fft, shift, in, tmp1, nfft, tmp2fft); 2596 | mp_sub (n_h, radix, tmp2, tmp1, tmp2); 2597 | /* ---- get precision ---- */ 2598 | prc = -tmp2[1]; 2599 | if (tmp2[0] == 0) 2600 | { 2601 | prc = nfft + 1; 2602 | } 2603 | /* ---- tmp2 *= inout (half precision) ---- */ 2604 | mp_mulh_use_in1fft (n_h, radix, tmp1fft, 0, tmp2, tmp2, nfft, tmp2fft); 2605 | /* ---- inout += tmp2 ---- */ 2606 | mp_add (n, radix, inout, tmp2, inout); 2607 | return prc; 2608 | } 2609 | 2610 | 2611 | int 2612 | mp_sqrt_newton (int n, int radix, int in[], int inout[], 2613 | int inout_rev[], int tmp[], int nfft, double tmp1fft[], 2614 | double tmp2fft[], int *n_tmp1fft) 2615 | { 2616 | void mp_round (int n, int radix, int m, int inout[]); 2617 | void mp_add (int n, int radix, int in1[], int in2[], int out[]); 2618 | void mp_sub (int n, int radix, int in1[], int in2[], int out[]); 2619 | void mp_idiv_2 (int n, int radix, int in[], int out[]); 2620 | void mp_mulh (int n, int radix, int in1[], int in2[], int out[], 2621 | int nfft, double in1fft[], double outfft[]); 2622 | void mp_squh (int n, int radix, int in[], int out[], 2623 | int nfft, double outfft[]); 2624 | void mp_squh_use_in1fft (int n, int radix, double inoutfft[], int out[], 2625 | int nfft); 2626 | int n_h, nfft_h, shift, prc; 2627 | 2628 | nfft_h = nfft >> 1; 2629 | shift = nfft_h + 1; 2630 | if (nfft_h < 2) 2631 | { 2632 | nfft_h = 2; 2633 | } 2634 | n_h = n / 2 + 1; 2635 | if (n_h < n - shift) 2636 | { 2637 | n_h = n - shift; 2638 | } 2639 | /* ---- tmp = inout_rev^2 (1/4 to half precision) ---- */ 2640 | mp_round (n_h, radix, (nfft_h >> 1) + 1, inout_rev); 2641 | if (*n_tmp1fft != nfft_h) 2642 | { 2643 | mp_squh (n_h, radix, inout_rev, tmp, nfft_h, tmp1fft); 2644 | } 2645 | else 2646 | { 2647 | mp_squh_use_in1fft (n_h, radix, tmp1fft, tmp, nfft_h); 2648 | } 2649 | /* ---- tmp = inout_rev - inout * tmp (half precision) ---- */ 2650 | mp_round (n, radix, shift, inout); 2651 | mp_mulh (n_h, radix, inout, tmp, tmp, nfft, tmp1fft, tmp2fft); 2652 | mp_sub (n_h, radix, inout_rev, tmp, tmp); 2653 | /* ---- inout_rev += tmp ---- */ 2654 | mp_add (n_h, radix, inout_rev, tmp, inout_rev); 2655 | /* ---- tmp = in - inout^2 (half to normal precision) ---- */ 2656 | mp_squh_use_in1fft (n, radix, tmp1fft, tmp, nfft); 2657 | mp_sub (n, radix, in, tmp, tmp); 2658 | /* ---- get precision ---- */ 2659 | prc = in[1] - tmp[1]; 2660 | if (((DGTINT *) & in[2])[0] > ((DGTINT *) & tmp[2])[0]) 2661 | { 2662 | prc++; 2663 | } 2664 | if (tmp[0] == 0) 2665 | { 2666 | prc = nfft + 1; 2667 | } 2668 | /* ---- tmp = tmp * inout_rev / 2 (half precision) ---- */ 2669 | mp_round (n_h, radix, shift, inout_rev); 2670 | mp_mulh (n_h, radix, inout_rev, tmp, tmp, nfft, tmp1fft, tmp2fft); 2671 | *n_tmp1fft = nfft; 2672 | mp_idiv_2 (n_h, radix, tmp, tmp); 2673 | /* ---- inout += tmp ---- */ 2674 | mp_add (n, radix, inout, tmp, inout); 2675 | return prc; 2676 | } 2677 | 2678 | 2679 | int 2680 | mp_invisqrt_newton (int n, int radix, int in, int inout[], 2681 | int tmp1[], int tmp2[], int nfft, double tmp1fft[], 2682 | double tmp2fft[]) 2683 | { 2684 | void mp_load_1 (int n, int radix, int out[]); 2685 | void mp_round (int n, int radix, int m, int inout[]); 2686 | void mp_add (int n, int radix, int in1[], int in2[], int out[]); 2687 | void mp_sub (int n, int radix, int in1[], int in2[], int out[]); 2688 | void mp_imul (int n, int radix, int in1[], int in2, int out[]); 2689 | void mp_idiv_2 (int n, int radix, int in[], int out[]); 2690 | void mp_squh_save_infft (int n, int radix, int in[], int out[], 2691 | int nfft, double infft[], double outfft[]); 2692 | void mp_mulh_use_in1fft (int n, int radix, double in1fft[], 2693 | int shift, int in2[], int out[], int nfft, 2694 | double outfft[]); 2695 | int n_h, shift, prc; 2696 | 2697 | shift = (nfft >> 1) + 1; 2698 | n_h = n / 2 + 1; 2699 | if (n_h < n - shift) 2700 | { 2701 | n_h = n - shift; 2702 | } 2703 | /* ---- tmp1 = in * inout^2 (half to normal precision) ---- */ 2704 | mp_round (n, radix, shift, inout); 2705 | mp_squh_save_infft (n, radix, inout, tmp1, nfft, tmp1fft, tmp2fft); 2706 | mp_imul (n, radix, tmp1, in, tmp1); 2707 | /* ---- tmp2 = 1 - tmp1 ---- */ 2708 | mp_load_1 (n, radix, tmp2); 2709 | mp_sub (n, radix, tmp2, tmp1, tmp2); 2710 | /* ---- get precision ---- */ 2711 | prc = -tmp2[1]; 2712 | if (tmp2[0] == 0) 2713 | { 2714 | prc = nfft + 1; 2715 | } 2716 | /* ---- tmp2 *= inout / 2 (half precision) ---- */ 2717 | mp_mulh_use_in1fft (n_h, radix, tmp1fft, 0, tmp2, tmp2, nfft, tmp2fft); 2718 | mp_idiv_2 (n_h, radix, tmp2, tmp2); 2719 | /* ---- inout += tmp2 ---- */ 2720 | mp_add (n, radix, inout, tmp2, inout); 2721 | return prc; 2722 | } 2723 | 2724 | 2725 | /* -------- mp_io routines -------- */ 2726 | 2727 | 2728 | void 2729 | mp_sprintf (int n, int log10_radix, int in[], char out[]) 2730 | { 2731 | int j, k, x, y, outexp, shift; 2732 | DGTINT *inr; 2733 | 2734 | inr = ((DGTINT *) & in[2]) - 2; 2735 | if (in[0] < 0) 2736 | { 2737 | *out++ = '-'; 2738 | } 2739 | x = inr[2]; 2740 | shift = log10_radix; 2741 | for (k = log10_radix; k > 0; k--) 2742 | { 2743 | y = x % 10; 2744 | x /= 10; 2745 | out[k] = '0' + y; 2746 | if (y != 0) 2747 | { 2748 | shift = k; 2749 | } 2750 | } 2751 | out[0] = out[shift]; 2752 | out[1] = '.'; 2753 | for (k = 1; k <= log10_radix - shift; k++) 2754 | { 2755 | out[k + 1] = out[k + shift]; 2756 | } 2757 | outexp = log10_radix - shift; 2758 | out += outexp + 2; 2759 | for (j = 3; j <= n + 1; j++) 2760 | { 2761 | x = inr[j]; 2762 | for (k = log10_radix - 1; k >= 0; k--) 2763 | { 2764 | y = x % 10; 2765 | x /= 10; 2766 | out[k] = '0' + y; 2767 | } 2768 | out += log10_radix; 2769 | } 2770 | *out++ = 'e'; 2771 | outexp += log10_radix * in[1]; 2772 | sprintf (out, "%d", outexp); 2773 | } 2774 | 2775 | 2776 | void 2777 | mp_sscanf (int n, int log10_radix, char in[], int out[]) 2778 | { 2779 | char *s; 2780 | int j, x, outexp, outexp_mod; 2781 | DGTINT *outr; 2782 | 2783 | outr = ((DGTINT *) & out[2]) - 2; 2784 | while (*in == ' ') 2785 | { 2786 | in++; 2787 | } 2788 | out[0] = 1; 2789 | if (*in == '-') 2790 | { 2791 | out[0] = -1; 2792 | in++; 2793 | } 2794 | else if (*in == '+') 2795 | { 2796 | in++; 2797 | } 2798 | while (*in == ' ' || *in == '0') 2799 | { 2800 | in++; 2801 | } 2802 | outexp = 0; 2803 | for (s = in; *s != '\0'; s++) 2804 | { 2805 | if (*s == 'e' || *s == 'E' || *s == 'd' || *s == 'D') 2806 | { 2807 | if (sscanf (++s, "%d", &outexp) != 1) 2808 | { 2809 | outexp = 0; 2810 | } 2811 | break; 2812 | } 2813 | } 2814 | if (*in == '.') 2815 | { 2816 | do 2817 | { 2818 | outexp--; 2819 | while (*++in == ' '); 2820 | } 2821 | while (*in == '0' && *in != '\0'); 2822 | } 2823 | else if (*in != '\0') 2824 | { 2825 | s = in; 2826 | while (*++s == ' '); 2827 | while (*s >= '0' && *s <= '9' && *s != '\0') 2828 | { 2829 | outexp++; 2830 | while (*++s == ' '); 2831 | } 2832 | } 2833 | x = outexp / log10_radix; 2834 | outexp_mod = outexp - log10_radix * x; 2835 | if (outexp_mod < 0) 2836 | { 2837 | x--; 2838 | outexp_mod += log10_radix; 2839 | } 2840 | out[1] = x; 2841 | x = 0; 2842 | j = 2; 2843 | for (s = in; *s != '\0'; s++) 2844 | { 2845 | if (*s == '.' || *s == ' ') 2846 | { 2847 | continue; 2848 | } 2849 | if (*s < '0' || *s > '9') 2850 | { 2851 | break; 2852 | } 2853 | x = 10 * x + (*s - '0'); 2854 | if (--outexp_mod < 0) 2855 | { 2856 | if (j > n + 1) 2857 | { 2858 | break; 2859 | } 2860 | outr[j++] = (DGTINT) x; 2861 | x = 0; 2862 | outexp_mod = log10_radix - 1; 2863 | } 2864 | } 2865 | while (outexp_mod-- >= 0) 2866 | { 2867 | x *= 10; 2868 | } 2869 | while (j <= n + 1) 2870 | { 2871 | outr[j++] = (DGTINT) x; 2872 | x = 0; 2873 | } 2874 | if (outr[2] == 0) 2875 | { 2876 | out[0] = 0; 2877 | out[1] = 0; 2878 | } 2879 | } 2880 | --------------------------------------------------------------------------------