├── .github
    └── workflows
    │   ├── files
    │       └── ql.lisp
    │   └── test.yml
├── .gitignore
├── LICENSE.txt
├── Makefile
├── README.md
├── REFERENCES.md
├── hypergeometrica-manager.asd
├── hypergeometrica-worker.asd
├── hypergeometrica.asd
├── logs
    ├── 12aug20-888.txt
    ├── 13aug20-999.txt
    ├── 16may21-888.txt
    ├── 16may21-999.txt
    ├── 24apr21-999-ram.txt
    ├── 8sept19-999-disk.txt
    └── 8sept19-999-ram.txt
├── src-manager
    ├── main.lisp
    └── package.lisp
├── src-worker
    ├── main.lisp
    └── package.lisp
├── src
    ├── bbp.lisp
    ├── binary-splitting.lisp
    ├── config.lisp
    ├── digit.lisp
    ├── disk-vec.lisp
    ├── disk.lisp
    ├── divrem.lisp
    ├── fft-multiply.lisp
    ├── fixed-width-arithmetic.lisp
    ├── logging.lisp
    ├── math-utilities.lisp
    ├── mmap.lisp
    ├── modular-arithmetic.lisp
    ├── moduli.lisp
    ├── mpd-reciprocal.lisp
    ├── mpd-sqrt.lisp
    ├── mpd.lisp
    ├── mpz-protocol.lisp
    ├── mpz-ram.lisp
    ├── mpz-string.lisp
    ├── multiply.lisp
    ├── ntt-multiply.lisp
    ├── number-theoretic-transform.lisp
    ├── package.lisp
    ├── pi.lisp
    ├── ram-vec.lisp
    ├── sbcl-intrinsics-ppc64el.lisp
    ├── sbcl-intrinsics-x86-64.lisp
    ├── sbcl-intrinsics.lisp
    ├── solinas.lisp
    ├── strandh-elster-reversal.lisp
    ├── timing-utilities.lisp
    ├── vec.lisp
    └── xxx.lisp
└── tests
    ├── arithmetic.lisp
    ├── debug
        └── debug-routines.lisp
    ├── divrem.lisp
    ├── moduli.lisp
    ├── mpz.lisp
    ├── multiplication.lisp
    ├── ntt.lisp
    ├── package.lisp
    ├── pi.lisp
    ├── primes.lisp
    ├── suite.lisp
    ├── sundries.lisp
    ├── vec.lisp
    └── write-number.lisp


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Linux
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     env:
11 |        LISP: sbcl-bin
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v1
15 |     - uses: 40ants/setup-lisp@v2
16 |     - name: update and install system dependencies
17 |       run: |
18 |         sudo apt-get -qq update | true
19 |         sudo apt-get -qq install libmpfr6 libmpfr-dev
20 |     - uses: 40ants/run-tests@v2
21 |       with:
22 |         asdf-system: hypergeometrica
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.fasl
2 | .DS_Store
3 | hypergeometrica-worker
4 | hypergeometrica-manager
5 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Robert Smith
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all
 2 | all: hypergeometrica-manager hypergeometrica-worker
 3 | 
 4 | # FIXME: this seems to depend on quicklisp being loaded, probably so
 5 | # ASDF can know about the paths.
 6 | hypergeometrica-manager:
 7 | 	sbcl --non-interactive --eval '(asdf:make "hypergeometrica-manager")'
 8 | 	mv src-manager/hypergeometrica-manager .
 9 | 
10 | hypergeometrica-worker:
11 | 	sbcl --non-interactive --eval '(asdf:make "hypergeometrica-worker")'
12 | 	mv src-worker/hypergeometrica-worker .
13 | 
14 | .PHONY: clean
15 | clean:
16 | 	rm -f hypergeometrica-manager hypergeometrica-worker
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Hypergeometrica
  2 | 
  3 | Hypergeometrica aims to democratize the calculation of pi to trillions of digits. As of March 2020, the software used to break world-record computations has remained closed source. This has been a 20+ year trend, and includes famous software such as y-cruncher, TachusPI, PiFast, and SuperPi. 
  4 | 
  5 | Please watch this [introductory video](https://www.youtube.com/watch?v=XanjZw5hPvE).
  6 | 
  7 | ```
  8 | CL-USER> (asdf:load-system :hypergeometrica)
  9 | CL-USER> (in-package :hypergeometrica)
 10 | #<PACKAGE "HYPERGEOMETRICA">
 11 | HYPERGEOMETRICA> (compute-pi/ramanujan 100)
 12 | 31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679
 13 | HYPERGEOMETRICA> (compute-pi/chudnovsky 100)
 14 | 31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679
 15 | HYPERGEOMETRICA> (compute-e 100)
 16 | 27182818284590452353602874713526624977572470936999595749669676277240766303535475945713821785251664274
 17 | ```
 18 | 
 19 | **Unfortunately**, Hypergeometrica cannot yet calculate pi in a completely *competent* way. What you see above does actually compute pi, but is taking a few very inefficient shortcuts. In order to be efficient, Hypergeometrica needs some additional key routines to eliminate these inefficient shortcuts.
 20 | 
 21 | 
 22 | ## What is the most efficient way to calculate pi with Hypergeometrica?
 23 | 
 24 | The call `(MPD-PI b)` for $b$ bits of precision is the fastest way to compute pi with Hypergeometrica. Here is a call to calculate at least $b=100$ bits of pi.
 25 | 
 26 | ```
 27 | CL-USER> (in-package :hypergeometrica)
 28 | HYPERGEOMETRICA> (mpd-pi 100)
 29 | 
 30 | terms = 4
 31 | [0 Δ0] split
 32 | [0 Δ0] sqrt
 33 | [4 Δ4] recip
 34 | [8 Δ4] final
 35 | #<MPD {10160AD883}>
 36 | ```
 37 | 
 38 | This output can be interpreted as:
 39 | 
 40 | - `terms = 4` means 4 terms of the Chudnovsky series were calculated.
 41 | - `[x Δy] thing` means `x` milliseconds have elapsed since the start of the computation, and `thing` took `y` milliseconds since the last step
 42 | - `#<MPD {...}>` is the object returned. Currently there is no base conversion routine to actually show this. One can use `mpd-mpfr` to convert it into an MPFR object for viewing or checking (as is done in `hypergeometrica-tests::test-pi`).
 43 | 
 44 | 
 45 | ## What is it?
 46 | 
 47 | Hypergeometrica is a Lisp library for performing extremely fast, extremely high-precision arithmetic. At the heart of it all are routines for doing fast multiplication. Hypergeometrica aims to support:
 48 | 
 49 | - Fast in-core multiplication using a variety of algorithms, from schoolbook to floating-point FFTs.
 50 | 
 51 | - Fast in-core multiplication for extremely huge numbers using exact convolutions via number-theoretic transforms. This is enabled by extremely fast 64-bit modular arithmetic.
 52 | 
 53 | - Fast out-of-core multiplication using derivatives of the original Cooley-Tukey algorithm.
 54 | 
 55 | - Implementation of dyadic rationals for arbitrary precision float-like numbers.
 56 | 
 57 | - Elementary automatic parallelization when reasonable.
 58 | 
 59 | On top of multiplication, one can build checkpointed algorithms for computing a variety of classical constants, like pi.
 60 | 
 61 | 
 62 | ## How is it implemented?
 63 | 
 64 | It's a Lisp library that takes advantage of assembly code via SBCL's VOP facilities.
 65 | 
 66 | It would probably be easier to get higher performance quicker in C or C++, but there's a lot of non-hot-loop code (such as calculating suitable primes) that are better served without the baggage of a low-level language.
 67 | 
 68 | 
 69 | ## What works and what doesn't?
 70 | 
 71 | There's a test suite, I recommend looking at that to see what (should be) working. In any case, a short list of features:
 72 | 
 73 | - Basic bigint (`MPZ`) routines.
 74 | 
 75 | - Basic dyadic rational (`MPD`) routines.
 76 | 
 77 | - Code to compute "suitable primes" for number-theoretic transforms.
 78 | 
 79 | - An in-core number-theoretic transform employing tricks for fast modular arithmetic.
 80 | 
 81 | - Binary-splitting for the computation of arbitrary hypergeometric series.
 82 | 
 83 | - Out-of-core/disk-based number representation and automatic upgrading, with specialized algorithms.
 84 | 
 85 | - In-core computation of pi, with basic asymptotically fast algorithms for division, square root, or inversion.
 86 | 
 87 | 
 88 | An implementation of disk-backed bigints exists, but it's not vetted and I'm not sure it's a good architecture.
 89 | 
 90 | There's also a broken implementation of out-of-core multiplication called the "matrix Fourier algorithm" following Arndt. Some corner case isn't working, and I'm not even sure this is the best way to do out-of-core multiplication.
 91 | 
 92 | 
 93 | ## Can I contribute?
 94 | 
 95 | Please, yes. Even if it's just telling me to document something. File an issue!
 96 | 
 97 | 
 98 | ## I know a lot about {I/O, disks, computer arithmetic, assembly, SBCL, ...} but I'm not really interested in rolling up my sleeves for this project.
 99 | 
100 | Please contact me so I have somebody to ask questions to!
101 | 
102 | 
103 | ## Where can I learn more about arbitrary precision arithmetic?
104 | 
105 | I'm keeping a [list of references](REFERENCES.md).
106 | 


--------------------------------------------------------------------------------
/REFERENCES.md:
--------------------------------------------------------------------------------
  1 | # References
  2 | 
  3 | ## "The Computation of Classical Constants" by the Chudnovsky Bros
  4 | 
  5 | This is a _classic_ paper introducing Chudnovsky's algorithm for
  6 | computing pi and its efficient computation. I consider it landmark.
  7 | 
  8 | - [Link](https://pdfs.semanticscholar.org/51ac/805b2cd8cb287c47aa04442e414e24c0881b.pdf)
  9 | 
 10 | Some more helpful materials:
 11 | 
 12 | - [Tito Piezas articles](https://sites.google.com/site/tpiezas/0027)
 13 | 
 14 | - [Ramanujan's Series for 1/pi](https://faculty.math.illinois.edu/~berndt/articles/monthly567-587.pdf)
 15 | 
 16 | - [A Detailed Proof of Chudnovsky Formula](https://arxiv.org/abs/1809.00533): Excellent first-principles presentation of the formula.
 17 | 
 18 | 
 19 | ## "Matters Computational" (aka fxtbook)
 20 | 
 21 | Joerg Arndt's tour de force on computer arithmetic algorithms. He's
 22 | been in the business for a while.
 23 | 
 24 | - [Link](https://www.jjj.de/fxt/)
 25 | 
 26 | ## Binary Splitting
 27 | 
 28 | There are a few references on binary splitting. The canonical one for
 29 | me is the Haible paper.
 30 | 
 31 | - [Haible & Papanikolaou](https://www.ginac.de/CLN/binsplit.pdf)
 32 | 
 33 | - [Gourdon & Sebah](http://numbers.computation.free.fr/Constants/Algorithms/splitting.ps)
 34 | 
 35 | 
 36 | ## Number-Theoretic Transforms
 37 | 
 38 | - [Link](https://pdfs.semanticscholar.org/c48a/2408d3ff16836935275bab16947fefc00f1a.pdf): "Faster arithmetic for number-theoretic transforms" by David Harvey. These slides are generally useful for the implementation of NTTs. Includes Shoup's modular arithmetic trick.
 39 | 
 40 | - [Link](https://www.nayuki.io/page/number-theoretic-transform-integer-dft): A simple NTT tutorial going through the basics.
 41 | 
 42 | - [Link](http://www.apfloat.org/ntt.html): Mikko Tommila's notes on the NTT.
 43 | 
 44 | - [Speeding up the number theoretic transform](https://eprint.iacr.org/2016/504.pdf) by Longa and Naehrig
 45 | 
 46 | - [The FFT - an algorithm the whole family can use](https://www.cs.dartmouth.edu/~rockmore/cse-fft.pdf): Just a fun paper title. Not useful for this project.
 47 | 
 48 | ## Primes and Modular Arithmetic
 49 | 
 50 | - [Generalized Mersenne Primes](http://cacr.uwaterloo.ca/techreports/1999/corr99-39.pdf): Solinas gives a method for reducing modular reduction of "nice" moduli to adds, subtracts, and shifts. Finnicky to get right though. Understanding this paper requires you to know what a [LFSR](https://pdfs.semanticscholar.org/a47e/2c91605fd3f0753a736d26f3bf3d8e1ef548.pdf) is.
 51 | 
 52 | - [Montgomery Multiplication](https://cp-algorithms.com/algebra/montgomery_multiplication.html)
 53 | 
 54 | 
 55 | ## Arbitrary-precision arithmetic software
 56 | 
 57 | Useful for rooting your nose around.
 58 | 
 59 | - [Link](https://en.wikipedia.org/wiki/List_of_arbitrary-precision_arithmetic_software): A general list.
 60 | 
 61 | - [Link](https://sourceforge.net/projects/bigz/): The `bigz` library, very easy to read, but not very sophisticated.
 62 | 
 63 | ### Fabrice Bellard's libbf
 64 | 
 65 | This is an impressive and small library, in typical Bellard
 66 | style. It's quite difficult to read though.
 67 | 
 68 | - [Link](https://bellard.org/libbf/)
 69 | 
 70 | - [Technical notes](https://bellard.org/libbf/readme.txt)
 71 | 
 72 | - [GitHub Mirror](https://github.com/rurban/libbf)
 73 | 
 74 | 
 75 | ## Assembly and SBCL
 76 | 
 77 | Paul Khuong has a wonderful series on writing low-level code (read: assembly) in SBCL.
 78 | 
 79 | - [SBCL: The Ultimate Assembly Code Breadboard](https://www.pvk.ca/Blog/2014/03/15/sbcl-the-ultimate-assembly-code-breadboard/)
 80 | 
 81 | - [How to Define New Intrinsics](https://www.pvk.ca/Blog/2014/08/16/how-to-define-new-intrinsics-in-sbcl/)
 82 | 
 83 | - [SSE Intrinsics](https://www.pvk.ca/Blog/2013/06/05/fresh-in-sbcl-1-dot-1-8-sse-intrinsics/)
 84 | 
 85 | Quick x864-64 assembly references:
 86 | 
 87 | - [Link](https://www.felixcloutier.com/x86/index.html)
 88 | 
 89 | - [godbolt](https://godbolt.org/) Useful for checking C code and getting assembler inspiration.
 90 | 
 91 | 
 92 | ## Closed Source Mathematical Software :(
 93 | 
 94 | The authors of these programs have done very impressive things. So
 95 | impressive that they feel their computational secrets must not be
 96 | shared. Too bad; the world is better when math software is open.
 97 | 
 98 | - [y-cruncher](http://www.numberworld.org/y-cruncher/): Purportedly the best pi calculating program on the planet. Broke many a record.
 99 | 
100 | - [PiFast](http://numbers.computation.free.fr/Constants/PiProgram/pifast.html): Was the fastest program for a long time. 
101 | 
102 | - [SuperPi](http://www.superpi.net/): Port of a program that broke a record in the 90s. Became popular with benchmarkers.
103 | 
104 | ## Out-of-Core FFTs
105 | 
106 | - [Fast Fourier Transforms---For Fun and Profit](http://www.cis.rit.edu/class/simg716/FFT_Fun_Profit.pdf): The OG paper about it.
107 | 
108 | - [Determining an Out-of-Core FFT Decomposition Strategy](https://pdfs.semanticscholar.org/30e3/07cc26b038b654122426133d6d545d2cc7e7.pdf) by Thomas Cormen: Has info about Swarztrauber's method and out-of-core FFTs.
109 | 
110 | - [FFTs in External or Hierarchical Memory](https://www.davidhbailey.com/dhbpapers/fftq.pdf) by David Bailey.
111 | 
112 | - See also Arndt.
113 | 
114 | ## Miscellaneous links
115 | 
116 | - [Numbers, Constants, and Computation](http://numbers.computation.free.fr/Constants/constants.html): A classic website (all the way back to 1999!) by Gourdon and Sebah. I remember reading this when I was just starting to get into math & programming.
117 | 
118 | - [Jason Papadopoulos's pages](https://web.archive.org/web/20160307010247/http://www.boo.net/~jasonp/): Pi programs, FFTs, etc. Another early inspiration for my study.
119 | 
120 | - [Modern Computer Arithmetic](https://members.loria.fr/PZimmermann/mca/mca-cup-0.5.9.pdf) by Brent and Zimmerman
121 | 


--------------------------------------------------------------------------------
/hypergeometrica-manager.asd:
--------------------------------------------------------------------------------
 1 | ;;;; hypergeometrica-manager.asd
 2 | ;;;;
 3 | ;;;; Copyright (c) 2024 Robert Smith
 4 | 
 5 | (asdf:defsystem #:hypergeometrica-manager
 6 |   :description "Manager process for running large Hypergeometrica calculations."
 7 |   :author "Robert Smith <robert@stylewarning.com>"
 8 |   :license "BSD 3-clause (See LICENSE.txt)"
 9 |   :depends-on (#:clingon #:uiop #:bordeaux-threads #:sb-bsd-sockets)
10 | ;  :in-order-to ((asdf:test-op (asdf:test-op #:hypergeometrica-manager/tests)))
11 |   :around-compile (lambda (compile)
12 |                     (let (#+sbcl (sb-ext:*derive-function-types* t))
13 |                       (funcall compile)))
14 |   :pathname "src-manager/"
15 |   :serial t
16 |   :components ((:file "package")
17 |                (:file "main"))
18 |   :build-operation "program-op"
19 |   :build-pathname "hypergeometrica-manager"
20 |   :entry-point "hypergeometrica-manager:main")
21 | 
22 | (asdf:defsystem #:hypergeometrica-manager/tests
23 |   :description "Tests for HYPERGEOMETRICA-MANAGER."
24 |   :author "Robert Smith <robert@stylewarning.com>"
25 |   :license "BSD 3-clause (See LICENSE.txt)"
26 |   :defsystem-depends-on (#:uiop)
27 |   :depends-on (#:hypergeometrica-manager
28 |                #:fiasco)
29 |   :perform (asdf:test-op (o s)
30 |                          #+ignore
31 |                          (uiop:symbol-call '#:hypergeometrica-tests
32 |                                            '#:test-hypergeometrica))
33 |   :serial t
34 |   :components ())
35 | 


--------------------------------------------------------------------------------
/hypergeometrica-worker.asd:
--------------------------------------------------------------------------------
 1 | ;;;; hypergeometrica-worker.asd
 2 | ;;;;
 3 | ;;;; Copyright (c) 2024 Robert Smith
 4 | 
 5 | (asdf:defsystem #:hypergeometrica-worker
 6 |   :description "Worker process for running large Hypergeometrica calculations."
 7 |   :author "Robert Smith <robert@stylewarning.com>"
 8 |   :license "BSD 3-clause (See LICENSE.txt)"
 9 |   :depends-on (#:clingon #:uiop #:bordeaux-threads #:sb-bsd-sockets)
10 | ;  :in-order-to ((asdf:test-op (asdf:test-op #:hypergeometrica-worker/tests)))
11 |   :around-compile (lambda (compile)
12 |                     (let (#+sbcl (sb-ext:*derive-function-types* t))
13 |                       (funcall compile)))
14 |   :pathname "src-worker/"
15 |   :serial t
16 |   :components ((:file "package")
17 |                (:file "main"))
18 |   :build-operation "program-op"
19 |   :build-pathname "hypergeometrica-worker"
20 |   :entry-point "hypergeometrica-worker:main")
21 | 
22 | (asdf:defsystem #:hypergeometrica-worker/tests
23 |   :description "Tests for HYPERGEOMETRICA-WORKER."
24 |   :author "Robert Smith <robert@stylewarning.com>"
25 |   :license "BSD 3-clause (See LICENSE.txt)"
26 |   :defsystem-depends-on (#:uiop)
27 |   :depends-on (#:hypergeometrica-worker
28 |                #:fiasco)
29 |   :perform (asdf:test-op (o s)
30 |                          #+ignore
31 |                          (uiop:symbol-call '#:hypergeometrica-tests
32 |                                            '#:test-hypergeometrica))
33 |   :serial t
34 |   :components ())
35 | 


--------------------------------------------------------------------------------
/hypergeometrica.asd:
--------------------------------------------------------------------------------
 1 | ;;;; hypergeometrica.asd
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019-2024 Robert Smith
 4 | 
 5 | (asdf:defsystem #:hypergeometrica
 6 |   :description "Calculate lots of digits of things."
 7 |   :author "Robert Smith <robert@stylewarning.com>"
 8 |   :license "BSD 3-clause (See LICENSE.txt)"
 9 |   :depends-on (#:alexandria
10 |                #:global-vars
11 |                #:lparallel
12 |                #:napa-fft3
13 |                #:trivial-garbage
14 |                #:uiop
15 | 
16 |                #:cffi
17 |                #:mmap
18 | 
19 |                (:feature :sbcl #:sb-mpfr)
20 |                (:feature :sbcl #:sb-posix))
21 |   :in-order-to ((asdf:test-op (asdf:test-op #:hypergeometrica/tests)))
22 |   :around-compile (lambda (compile)
23 |                     (let (#+sbcl (sb-ext:*derive-function-types* t))
24 |                       (funcall compile)))
25 |   :pathname "src/"
26 |   :serial t
27 |   :components ((:file "package")
28 |                (:file "config")
29 |                (:file "logging")
30 |                (:file "mmap")
31 |                (:file "timing-utilities")
32 |                (:file "math-utilities")
33 |                (:file "sbcl-intrinsics"         :if-feature :sbcl)
34 |                (:file "sbcl-intrinsics-x86-64"  :if-feature (:and :sbcl :x86-64))
35 |                (:file "sbcl-intrinsics-ppc64el" :if-feature (:and :sbcl :ppc64 :little-endian))
36 |                (:file "digit")
37 |                (:file "modular-arithmetic")
38 |                (:file "vec")
39 |                (:file "ram-vec")
40 |                (:file "disk-vec")
41 |                (:file "mpz-protocol")
42 |                (:file "mpz-ram")
43 |                (:file "moduli")
44 |                (:file "strandh-elster-reversal")
45 |                (:file "number-theoretic-transform")
46 |                (:file "ntt-multiply")
47 |                ;; (:file "fft-multiply")
48 |                ;; (:file "disk")
49 |                (:file "multiply")
50 |                (:file "mpd")
51 |                (:file "mpd-reciprocal")
52 |                (:file "mpd-sqrt")
53 |                (:file "divrem")
54 |                (:file "mpz-string")
55 |                (:file "binary-splitting")
56 |                (:file "pi")))
57 | 
58 | (asdf:defsystem #:hypergeometrica/tests
59 |   :description "Tests and debug routines for HYPERGEOMETRICA."
60 |   :author "Robert Smith <robert@stylewarning.com>"
61 |   :license "BSD 3-clause (See LICENSE.txt)"
62 |   :defsystem-depends-on (#:uiop)
63 |   :depends-on (#:hypergeometrica
64 |                #:fiasco
65 | 
66 |                (:feature :sbcl #:sb-mpfr))
67 |   :perform (asdf:test-op (o s)
68 |                          (uiop:symbol-call '#:hypergeometrica-tests
69 |                                            '#:test-hypergeometrica))
70 |   :pathname "tests/"
71 |   :serial t
72 |   :components ((:file "package")
73 |                (:module "debug"
74 |                 :serial t
75 |                 :components ((:file "debug-routines")))
76 |                (:file "suite")
77 |                (:file "arithmetic")
78 |                (:file "moduli")
79 |                (:file "vec")
80 |                (:file "mpz")
81 |                (:file "multiplication")
82 |                (:file "ntt")
83 |                (:file "primes")
84 |                (:file "sundries")
85 |                (:file "divrem")
86 |                (:file "write-number")
87 |                (:file "pi")))
88 | 


--------------------------------------------------------------------------------
/logs/24apr21-999-ram.txt:
--------------------------------------------------------------------------------
  1 | ;;; disable SAFE, PARANOID
  2 | ;;; enable THREADING, INTRINSICS
  3 | 
  4 | HYPERGEOMETRICA> (time (x^x^x/ram 9))
  5 | 
  6 | -------------------------
  7 | Current exponent: 2 / 387420489 (  0.00%)
  8 | -------------------------
  9 | Current exponent: 5 / 387420489 (  0.00%)
 10 | -------------------------
 11 | Current exponent: 11 / 387420489 (  0.00%)
 12 | -------------------------
 13 | Current exponent: 23 / 387420489 (  0.00%)
 14 | -------------------------
 15 | Current exponent: 46 / 387420489 (  0.00%)
 16 | -------------------------
 17 | Current exponent: 92 / 387420489 (  0.00%)
 18 | -------------------------
 19 | Current exponent: 184 / 387420489 (  0.00%)
 20 | -------------------------
 21 | Current exponent: 369 / 387420489 (  0.00%)
 22 | -------------------------
 23 | Current exponent: 738 / 387420489 (  0.00%)
 24 | -------------------------
 25 | Current exponent: 1477 / 387420489 (  0.00%)
 26 | -------------------------
 27 | Current exponent: 2955 / 387420489 (  0.00%)
 28 | -------------------------
 29 | Current exponent: 5911 / 387420489 (  0.00%)
 30 | Allocating... 0 ms
 31 | Size: 147 (approx 2832 decimals, 0 MiB)
 32 | Transform length: 512
 33 | Convolution bits: 137
 34 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 35 | Forward... 0 ms
 36 | Pointwise multiply...  8 ms
 37 | Reverse... 0 ms
 38 | CRT... 0 ms
 39 | -------------------------
 40 | Current exponent: 11823 / 387420489 (  0.00%)
 41 | Allocating... 0 ms
 42 | Size: 293 (approx 5645 decimals, 0 MiB)
 43 | Transform length: 1024
 44 | Convolution bits: 138
 45 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 46 | Forward... 0 ms
 47 | Pointwise multiply...  4 ms
 48 | Reverse... 0 ms
 49 | CRT... 0 ms
 50 | -------------------------
 51 | Current exponent: 23646 / 387420489 (  0.01%)
 52 | Allocating... 4 ms
 53 | Size: 586 (approx 11290 decimals, 0 MiB)
 54 | Transform length: 2048
 55 | Convolution bits: 139
 56 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 57 | Forward... 0 ms
 58 | Pointwise multiply...  4 ms
 59 | Reverse... 0 ms
 60 | CRT... 0 ms
 61 | -------------------------
 62 | Current exponent: 47292 / 387420489 (  0.01%)
 63 | Allocating... 0 ms
 64 | Size: 1172 (approx 22580 decimals, 0 MiB)
 65 | Transform length: 4096
 66 | Convolution bits: 140
 67 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 68 | Forward... 0 ms
 69 | Pointwise multiply...  0 ms
 70 | Reverse... 4 ms
 71 | CRT... 0 ms
 72 | -------------------------
 73 | Current exponent: 94585 / 387420489 (  0.02%)
 74 | Allocating... 0 ms
 75 | Size: 2343 (approx 45140 decimals, 0 MiB)
 76 | Transform length: 8192
 77 | Convolution bits: 141
 78 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 79 | Forward... 4 ms
 80 | Pointwise multiply...  4 ms
 81 | Reverse... 4 ms
 82 | CRT... 4 ms
 83 | -------------------------
 84 | Current exponent: 189170 / 387420489 (  0.05%)
 85 | Allocating... 0 ms
 86 | Size: 4685 (approx 90261 decimals, 0 MiB)
 87 | Transform length: 16384
 88 | Convolution bits: 142
 89 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 90 | Forward... 4 ms
 91 | Pointwise multiply...  4 ms
 92 | Reverse... 4 ms
 93 | CRT... 4 ms
 94 | -------------------------
 95 | Current exponent: 378340 / 387420489 (  0.10%)
 96 | Allocating... 4 ms
 97 | Size: 9370 (approx 180522 decimals, 1 MiB)
 98 | Transform length: 32768
 99 | Convolution bits: 143
100 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
101 | Forward... 8 ms
102 | Pointwise multiply...  0 ms
103 | Reverse... 12 ms
104 | CRT... 12 ms
105 | -------------------------
106 | Current exponent: 756680 / 387420489 (  0.20%)
107 | Allocating... 0 ms
108 | Size: 18740 (approx 361043 decimals, 2 MiB)
109 | Transform length: 65536
110 | Convolution bits: 144
111 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
112 | Forward... 16 ms
113 | Pointwise multiply...  4 ms
114 | Reverse... 24 ms
115 | CRT... 24 ms
116 | -------------------------
117 | Current exponent: 1513361 / 387420489 (  0.39%)
118 | Allocating... 0 ms
119 | Size: 37479 (approx 722067 decimals, 4 MiB)
120 | Transform length: 131072
121 | Convolution bits: 145
122 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
123 | Forward... 36 ms
124 | Pointwise multiply...  4 ms
125 | Reverse... 48 ms
126 | CRT... 44 ms
127 | -------------------------
128 | Current exponent: 3026722 / 387420489 (  0.78%)
129 | Allocating... 0 ms
130 | Size: 74957 (approx 1444116 decimals, 8 MiB)
131 | Transform length: 262144
132 | Convolution bits: 146
133 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
134 | Forward... 76 ms
135 | Pointwise multiply...  8 ms
136 | Reverse... 104 ms
137 | CRT... 96 ms
138 | -------------------------
139 | Current exponent: 6053445 / 387420489 (  1.56%)
140 | Allocating... 4 ms
141 | Size: 149914 (approx 2888231 decimals, 16 MiB)
142 | Transform length: 524288
143 | Convolution bits: 147
144 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
145 | Forward... 188 ms
146 | Pointwise multiply...  12 ms
147 | Reverse... 248 ms
148 | CRT... 184 ms
149 | -------------------------
150 | Current exponent: 12106890 / 387420489 (  3.12%)
151 | Allocating... 8 ms
152 | Size: 299828 (approx 5776462 decimals, 32 MiB)
153 | Transform length: 1048576
154 | Convolution bits: 148
155 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
156 | Forward... 528 ms
157 | Pointwise multiply...  16 ms
158 | Reverse... 640 ms
159 | CRT... 384 ms
160 | -------------------------
161 | Current exponent: 24213780 / 387420489 (  6.25%)
162 | Allocating... 16 ms
163 | Size: 599656 (approx 11552924 decimals, 64 MiB)
164 | Transform length: 2097152
165 | Convolution bits: 149
166 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
167 | Forward... 1224 ms
168 | Pointwise multiply...  36 ms
169 | Reverse... 1384 ms
170 | CRT... 756 ms
171 | -------------------------
172 | Current exponent: 48427561 / 387420489 ( 12.50%)
173 | Allocating... 36 ms
174 | Size: 1199311 (approx 23105830 decimals, 128 MiB)
175 | Transform length: 4194304
176 | Convolution bits: 150
177 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
178 | Forward... 2724 ms
179 | Pointwise multiply...  72 ms
180 | Reverse... 3012 ms
181 | CRT... 1540 ms
182 | -------------------------
183 | Current exponent: 96855122 / 387420489 ( 25.00%)
184 | Allocating... 64 ms
185 | Size: 2398621 (approx 46211640 decimals, 256 MiB)
186 | Transform length: 8388608
187 | Convolution bits: 151
188 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
189 | Forward... 5804 ms
190 | Pointwise multiply...  132 ms
191 | Reverse... 6632 ms
192 | CRT... 3004 ms
193 | -------------------------
194 | Current exponent: 193710244 / 387420489 ( 50.00%)
195 | Allocating... 128 ms
196 | Size: 4797242 (approx 92423280 decimals, 512 MiB)
197 | Transform length: 16777216
198 | Convolution bits: 152
199 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
200 | Forward... 12980 ms
201 | Pointwise multiply...  256 ms
202 | Reverse... 14684 ms
203 | CRT... 5992 ms
204 | -------------------------
205 | Current exponent: 387420489 / 387420489 (100.00%)
206 | Allocating... 256 ms
207 | Size: 9594484 (approx 184846559 decimals, 1024 MiB)
208 | Transform length: 33554432
209 | Convolution bits: 153
210 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
211 | Forward... 28728 ms
212 | Pointwise multiply...  504 ms
213 | Reverse... 31356 ms
214 | CRT... 12008 ms
215 | Evaluation took:
216 |   136.233 seconds of real time
217 |   331.953620 seconds of total run time (331.722013 user, 0.231607 system)
218 |   [ Run times consist of 0.154 seconds GC time, and 331.800 seconds non-GC time. ]
219 |   243.67% CPU
220 |   22 lambdas converted
221 |   395,605,407,318 processor cycles
222 |   23,256,528,992 bytes consed
223 |   
224 | #<MPZ 1228093895 bits* {101E3E36E3}>
225 | 


--------------------------------------------------------------------------------
/logs/8sept19-999-ram.txt:
--------------------------------------------------------------------------------
  1 | * (in-package :hypergeometrica)
  2 | #<PACKAGE "HYPERGEOMETRICA">
  3 | * (setq *verbose* t)
  4 | T
  5 | * (load "scratch")
  6 | T
  7 | * (time (x^x^x/ram 9))
  8 | ---------------------------- Exponent left: 387420489
  9 | ---------------------------- Exponent left: 193710244
 10 | ---------------------------- Exponent left: 96855122
 11 | ---------------------------- Exponent left: 48427561
 12 | ---------------------------- Exponent left: 24213780
 13 | ---------------------------- Exponent left: 12106890
 14 | ---------------------------- Exponent left: 6053445
 15 | ---------------------------- Exponent left: 3026722
 16 | ---------------------------- Exponent left: 1513361
 17 | ---------------------------- Exponent left: 756680
 18 | ---------------------------- Exponent left: 378340
 19 | ---------------------------- Exponent left: 189170
 20 | Allocating... 0 ms
 21 | Size: 102 (approx 1965 decimals, 0 MiB)
 22 | Transform length: 256
 23 | Convolution bits: 136
 24 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 25 | Forward... 72 ms
 26 | Pointwise multiply...  5 ms
 27 | Reverse... 1 ms
 28 | CRT... 0 ms
 29 | ---------------------------- Exponent left: 94585
 30 | Allocating... 0 ms
 31 | Size: 203 (approx 3911 decimals, 0 MiB)
 32 | Transform length: 512
 33 | Convolution bits: 137
 34 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 35 | Forward... 0 ms
 36 | Pointwise multiply...  0 ms
 37 | Reverse... 0 ms
 38 | CRT... 0 ms
 39 | ---------------------------- Exponent left: 47292
 40 | Allocating... 0 ms
 41 | Size: 406 (approx 7822 decimals, 0 MiB)
 42 | Transform length: 1024
 43 | Convolution bits: 138
 44 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 45 | Forward... 1 ms
 46 | Pointwise multiply...  0 ms
 47 | Reverse... 0 ms
 48 | CRT... 0 ms
 49 | ---------------------------- Exponent left: 23646
 50 | Allocating... 1 ms
 51 | Size: 812 (approx 15644 decimals, 0 MiB)
 52 | Transform length: 2048
 53 | Convolution bits: 139
 54 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 55 | Forward... 0 ms
 56 | Pointwise multiply...  0 ms
 57 | Reverse... 1 ms
 58 | CRT... 1 ms
 59 | ---------------------------- Exponent left: 11823
 60 | Allocating... 0 ms
 61 | Allocating... 0 ms
 62 | Size: 1844 (approx 35526 decimals, 0 MiB)
 63 | Transform length: 2048
 64 | Convolution bits: 139
 65 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 66 | Forward... 1 ms
 67 | Pointwise multiply... 10 ms
 68 | Reverse... 0 ms
 69 | CRT... 1 ms
 70 | Allocating... 0 ms
 71 | Size: 1624 (approx 31288 decimals, 0 MiB)
 72 | Transform length: 4096
 73 | Convolution bits: 140
 74 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 75 | Forward... 1 ms
 76 | Pointwise multiply...  0 ms
 77 | Reverse... 1 ms
 78 | CRT... 2 ms
 79 | ---------------------------- Exponent left: 5911
 80 | Allocating... 0 ms
 81 | Allocating... 0 ms
 82 | Size: 5090 (approx 98064 decimals, 0 MiB)
 83 | Transform length: 8192
 84 | Convolution bits: 141
 85 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 86 | Forward... 2 ms
 87 | Pointwise multiply... 0 ms
 88 | Reverse... 3 ms
 89 | CRT... 4 ms
 90 | Allocating... 0 ms
 91 | Size: 3247 (approx 62556 decimals, 0 MiB)
 92 | Transform length: 8192
 93 | Convolution bits: 141
 94 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
 95 | Forward... 3 ms
 96 | Pointwise multiply...  1 ms
 97 | Reverse... 2 ms
 98 | CRT... 5 ms
 99 | ---------------------------- Exponent left: 2955
100 | Allocating... 0 ms
101 | Allocating... 0 ms
102 | Size: 11582 (approx 223138 decimals, 0 MiB)
103 | Transform length: 16384
104 | Convolution bits: 142
105 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
106 | Forward... 6 ms
107 | Pointwise multiply... 0 ms
108 | Reverse... 5 ms
109 | CRT... 25 ms
110 | Allocating... 0 ms
111 | Size: 6493 (approx 125094 decimals, 0 MiB)
112 | Transform length: 16384
113 | Convolution bits: 142
114 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
115 | Forward... 18 ms
116 | Pointwise multiply...  3 ms
117 | Reverse... 5 ms
118 | CRT... 5 ms
119 | ---------------------------- Exponent left: 1477
120 | Allocating... 1 ms
121 | Allocating... 0 ms
122 | Size: 24566 (approx 473287 decimals, 0 MiB)
123 | Transform length: 32768
124 | Convolution bits: 143
125 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
126 | Forward... 10 ms
127 | Pointwise multiply... 3 ms
128 | Reverse... 9 ms
129 | CRT... 10 ms
130 | Allocating... 0 ms
131 | Size: 12985 (approx 250168 decimals, 1 MiB)
132 | Transform length: 32768
133 | Convolution bits: 143
134 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
135 | Forward... 35 ms
136 | Pointwise multiply...  6 ms
137 | Reverse... 10 ms
138 | CRT... 9 ms
139 | ---------------------------- Exponent left: 738
140 | Allocating... 2 ms
141 | Size: 25969 (approx 500317 decimals, 2 MiB)
142 | Transform length: 65536
143 | Convolution bits: 144
144 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
145 | Forward... 20 ms
146 | Pointwise multiply...  3 ms
147 | Reverse... 36 ms
148 | CRT... 20 ms
149 | ---------------------------- Exponent left: 369
150 | Allocating... 2 ms
151 | Allocating... 1 ms
152 | Size: 76502 (approx 1473881 decimals, 1 MiB)
153 | Transform length: 131072
154 | Convolution bits: 145
155 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
156 | Forward... 73 ms
157 | Pointwise multiply... 17 ms
158 | Reverse... 35 ms
159 | CRT... 34 ms
160 | Allocating... 1 ms
161 | Size: 51937 (approx 1000614 decimals, 4 MiB)
162 | Transform length: 131072
163 | Convolution bits: 145
164 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
165 | Forward... 93 ms
166 | Pointwise multiply...  9 ms
167 | Reverse... 38 ms
168 | CRT... 42 ms
169 | ---------------------------- Exponent left: 184
170 | Allocating... 2 ms
171 | Size: 103873 (approx 2001209 decimals, 8 MiB)
172 | Transform length: 262144
173 | Convolution bits: 146
174 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
175 | Forward... 138 ms
176 | Pointwise multiply...  18 ms
177 | Reverse... 70 ms
178 | CRT... 83 ms
179 | ---------------------------- Exponent left: 92
180 | Allocating... 4 ms
181 | Size: 207745 (approx 4002399 decimals, 16 MiB)
182 | Transform length: 524288
183 | Convolution bits: 147
184 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
185 | Forward... 137 ms
186 | Pointwise multiply...  19 ms
187 | Reverse... 244 ms
188 | CRT... 174 ms
189 | ---------------------------- Exponent left: 46
190 | Allocating... 9 ms
191 | Size: 415489 (approx 8004778 decimals, 32 MiB)
192 | Transform length: 1048576
193 | Convolution bits: 148
194 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
195 | Forward... 339 ms
196 | Pointwise multiply...  46 ms
197 | Reverse... 342 ms
198 | CRT... 318 ms
199 | ---------------------------- Exponent left: 23
200 | Allocating... 17 ms
201 | Allocating... 4 ms
202 | Size: 907478 (approx 17483398 decimals, 7 MiB)
203 | Transform length: 1048576
204 | Convolution bits: 148
205 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
206 | Forward... 410 ms
207 | Pointwise multiply... 34 ms
208 | Reverse... 345 ms
209 | CRT... 351 ms
210 | Allocating... 17 ms
211 | Size: 830977 (approx 16009536 decimals, 64 MiB)
212 | Transform length: 2097152
213 | Convolution bits: 149
214 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
215 | Forward... 758 ms
216 | Pointwise multiply...  60 ms
217 | Reverse... 847 ms
218 | CRT... 813 ms
219 | ---------------------------- Exponent left: 11
220 | Allocating... 49 ms
221 | Allocating... 24 ms
222 | Size: 2569432 (approx 49502471 decimals, 20 MiB)
223 | Transform length: 4194304
224 | Convolution bits: 150
225 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
226 | Forward... 2313 ms
227 | Pointwise multiply... 137 ms
228 | Reverse... 1788 ms
229 | CRT... 1260 ms
230 | Allocating... 56 ms
231 | Size: 1661954 (approx 32019072 decimals, 128 MiB)
232 | Transform length: 4194304
233 | Convolution bits: 150
234 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
235 | Forward... 1774 ms
236 | Pointwise multiply...  125 ms
237 | Reverse... 1867 ms
238 | CRT... 1442 ms
239 | ---------------------------- Exponent left: 5
240 | Allocating... 93 ms
241 | Allocating... 85 ms
242 | Size: 5893339 (approx 113540596 decimals, 45 MiB)
243 | Transform length: 8388608
244 | Convolution bits: 151
245 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
246 | Forward... 5326 ms
247 | Pointwise multiply... 142 ms
248 | Reverse... 3979 ms
249 | CRT... 2518 ms
250 | Allocating... 100 ms
251 | Size: 3323908 (approx 64038145 decimals, 256 MiB)
252 | Transform length: 8388608
253 | Convolution bits: 151
254 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
255 | Forward... 4420 ms
256 | Pointwise multiply...  188 ms
257 | Reverse... 4025 ms
258 | CRT... 2693 ms
259 | ---------------------------- Exponent left: 2
260 | Allocating... 213 ms
261 | Size: 6647815 (approx 128076270 decimals, 512 MiB)
262 | Transform length: 16777216
263 | Convolution bits: 152
264 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
265 | Forward... 9184 ms
266 | Pointwise multiply...  405 ms
267 | Reverse... 8664 ms
268 | CRT... 5127 ms
269 | Allocating... 412 ms
270 | Allocating... 187 ms
271 | Size: 19188968 (approx 369693118 decimals, 146 MiB)
272 | Transform length: 33554432
273 | Convolution bits: 153
274 | Moduli: #x6280000000000001, #x5700000000000001, #x4180000000000001
275 | Forward... 27367 ms
276 | Pointwise multiply... 255 ms
277 | Reverse... 18772 ms
278 | CRT... 8800 ms
279 | Evaluation took:
280 |   120.380 seconds of real time
281 |   404.413108 seconds of total run time (399.126501 user, 5.286607 system)
282 |   [ Run times consist of 0.398 seconds GC time, and 404.016 seconds non-GC time. ]
283 |   335.95% CPU
284 |   9 lambdas converted
285 |   277,390,485,842 processor cycles
286 |   35,199,897,440 bytes consed
287 | 
288 | #<MPZ 1228093895 bits* {103DF8BB73}>
289 | 
290 | 


--------------------------------------------------------------------------------
/src-manager/main.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; Copyright (c) 2024 Robert Smith
  2 | 
  3 | (in-package #:hypergeometrica-manager)
  4 | 
  5 | (sb-ext:defglobal **socket** nil)
  6 | (sb-ext:defglobal **socket-node** nil)
  7 | (sb-ext:defglobal **socket-thread** nil)
  8 | 
  9 | ;;; Socket
 10 | 
 11 | (defun write-form (stream form)
 12 |   (prin1 form stream)
 13 |   (terpri stream)
 14 |   (finish-output stream))
 15 | 
 16 | (defun make-socket-listener ()
 17 |   (let* ((server **socket**))
 18 |     (lambda ()
 19 |       (unwind-protect
 20 |            (loop
 21 |              (let* ((client (sb-bsd-sockets:socket-accept server))
 22 |                     (stream (sb-bsd-sockets:socket-make-stream
 23 |                              client
 24 |                              :input t
 25 |                              :output t
 26 |                              :element-type 'character
 27 |                              :buffering :line
 28 |                              :timeout 5))
 29 |                     (message (read stream nil '(:eof))))
 30 |                (handle-worker-message message stream)
 31 |                (sb-bsd-sockets:socket-close client)))
 32 |         (sb-bsd-sockets:socket-close **socket**)
 33 |         (delete-file **socket-node**)
 34 |         (setf **socket** nil
 35 |               **socket-node** nil
 36 |               **socket-thread** nil)))))
 37 | 
 38 | (defun start-socket-thread ()
 39 |   (when **socket-thread**
 40 |     (warn "Socket thread already started.")
 41 |     (bt:destroy-thread **socket-thread**))
 42 |   (setf **socket-thread** (bt:make-thread
 43 |                            (make-socket-listener)
 44 |                            :name "Hypergeometrica Socket Server")))
 45 | 
 46 | ;;; Worker Tracking
 47 | 
 48 | (defclass worker-status ()
 49 |   ((id :accessor worker-status-id
 50 |        :initarg :id)
 51 |    (last-heartbeat :accessor last-heartbeat
 52 |                    :initarg :last-heartbeat)))
 53 | 
 54 | (sb-ext:defglobal **max-workers** 1)
 55 | (sb-ext:defglobal **workers-lock** (bt:make-lock "**workers**"))
 56 | (sb-ext:defglobal **workers** nil)
 57 | 
 58 | (defun make-id ()
 59 |   (sleep 1.5)
 60 |   (get-universal-time))
 61 | 
 62 | (defun check-worker (id)
 63 |   (bt:with-lock-held (**workers-lock**)
 64 |     (let ((status (find id **workers** :key #'worker-status-id)))
 65 |       (cond
 66 |         (status
 67 |          (setf (last-heartbeat status) (get-internal-real-time))
 68 |          id)
 69 |         (t
 70 |          (warn "Unknown worker identified as #~D" id)
 71 |          nil)))))
 72 | 
 73 | ;;; Lifelines
 74 | 
 75 | (defun revive-lifelines (lifelines)
 76 |   (dolist (lifeline lifelines)
 77 |     (cond
 78 |       ((probe-file lifeline)
 79 |        (handler-case
 80 |            (let ((lifeline-socket (make-instance 'sb-bsd-sockets:local-socket :type :stream)))
 81 |              (unwind-protect
 82 |                   (progn
 83 |                     (sb-bsd-sockets:socket-connect lifeline-socket lifeline)
 84 |                     (let ((stream (sb-bsd-sockets:socket-make-stream
 85 |                                    lifeline-socket
 86 |                                    :element-type 'character
 87 |                                    :input t
 88 |                                    :output t
 89 |                                    :buffering ':line)))
 90 |                       (let ((new-id (make-id)))
 91 |                         (push
 92 |                          (make-instance 'worker-status
 93 |                                         :id new-id
 94 |                                         :last-heartbeat (get-internal-real-time))
 95 |                          **workers**)
 96 |                         (format t "Reviving: lifeline ~A -> ~S~%" lifeline new-id)
 97 |                         (write-form stream `(:revive :id ,new-id :socket ,**socket-node**))
 98 |                         ;; TODO: bookkeeping on current computational progress
 99 |                         )))
100 |                (sb-bsd-sockets:socket-close lifeline-socket)))
101 |          (sb-bsd-sockets:socket-error (c)
102 |            (declare (ignore c))
103 |            (warn "Error communicating with lifeline socket ~A... skipping" lifeline))))
104 |       (t
105 |        (warn "Invalid lifeline: ~A" lifeline)))))
106 | 
107 | ;;; Heartbeat
108 | 
109 | (defun make-heartbeat-checker (&optional (timeout 10))
110 |   (lambda ()
111 |     (loop
112 |       (sleep timeout)
113 |       (bt:with-lock-held (**workers-lock**)
114 |         (loop :for status :in **workers**
115 |               :if (< timeout (/ (- (get-internal-real-time)
116 |                                    (last-heartbeat status))
117 |                                 internal-time-units-per-second))
118 |                 :collect status :into evict
119 |               :else
120 |                 :collect status :into renew
121 |               :finally (progn
122 |                          (setf **workers** renew)
123 |                          (dolist (status evict)
124 |                            (warn "Evicting ~A due to timeout." (worker-status-id status)))))))))
125 | 
126 | (sb-ext:defglobal **heartbeat-checker-thread** nil)
127 | (defun start-heartbeat-checker-thread ()
128 |   (setf **heartbeat-checker-thread**
129 |         (bt:make-thread (make-heartbeat-checker) :name "Heartbeat Checker")))
130 | 
131 | ;;; Worker Message Handling
132 | 
133 | (defun handle-unknown-message (message)
134 |   (warn "Unknown message received: ~A" (prin1-to-string message))
135 |   nil)
136 | 
137 | (defun handle-worker-message (message stream)
138 |   (typecase message
139 |     (atom
140 |      (handle-unknown-message message))
141 |     ((cons keyword)
142 |      (alexandria:destructuring-case message
143 |        ((:eof)
144 |         (warn "Received EOF from client."))
145 |        ((:join)
146 |         (bt:with-lock-held (**workers-lock**)
147 |           (cond
148 |             ((> **max-workers** (length **workers**))
149 |              (let ((new-id (make-id)))
150 |                (push
151 |                 (make-instance 'worker-status
152 |                                :id new-id
153 |                                :last-heartbeat (get-internal-real-time))
154 |                 **workers**)
155 |                (write-form stream `(:welcome :id ,new-id))))
156 |             (t
157 |              (write-form stream '(:no-vacancy))))))
158 |        ((:status)
159 |         nil)
160 |        ((t &rest rest)
161 |         (declare (ignore rest))
162 |         (handle-unknown-message message))))
163 |     (t
164 |      (let ((from (car message)))
165 |        (when (check-worker from)
166 |          (alexandria:destructuring-case (cdr message)
167 |            ((:ping)
168 |             (format t "Ping from client ~D~%" from)
169 |             (write-form stream '(:pong))
170 |             (finish-output stream))
171 |            ((:heartbeat)
172 |             (format t "Heartbeat from worker #~D~%" from))
173 |            ((t &rest rest)
174 |             (declare (ignore rest))
175 |             (handle-unknown-message message))))))))
176 | 
177 | ;;; CLI
178 | 
179 | (defun cli-options ()
180 |   (list
181 |    (clingon:make-option
182 |     :integer
183 |     :required t
184 |     :description "maximum number of workers"
185 |     :long-name "max-workers"
186 |     :key :max-workers)
187 |    (clingon:make-option
188 |     :list
189 |     :description "lifelines to restart work with"
190 |     :long-name "lifeline"
191 |     :key :lifelines)))
192 | 
193 | (defun cli-command ()
194 |   (clingon:make-command
195 |    :name "hypergeometrica-manager"
196 |    :options (cli-options)
197 |    :handler #'cli-handler))
198 | 
199 | (defun cli-handler (cmd)
200 |   (let ((pid (sb-posix:getpid)))
201 |     (setf **max-workers** (clingon:getopt cmd ':max-workers)
202 |           **socket** (make-instance 'sb-bsd-sockets:local-socket
203 |                                     :type :stream)
204 |           **socket-node** (merge-pathnames
205 |                            (format nil "manager-~D" pid)
206 |                            "/tmp/"))
207 |     (sb-bsd-sockets:socket-bind **socket** (namestring **socket-node**))
208 |     (sb-bsd-sockets:socket-listen **socket** 8)
209 |     (start-heartbeat-checker-thread)
210 |     (revive-lifelines (clingon:getopt cmd ':lifelines))
211 |     (start-socket-thread)
212 |     (format t "Started socket on: ~A~%" **socket-node**)
213 |     (format t "Waiting for socket thread to end.~%")
214 |     (finish-output)
215 |     (bt:join-thread **socket-thread**)))
216 | 
217 | (defun main ()
218 |   (sb-ext:disable-debugger)
219 |   (let ((app (cli-command)))
220 |     (clingon:run app)))
221 | 


--------------------------------------------------------------------------------
/src-manager/package.lisp:
--------------------------------------------------------------------------------
1 | ;;;; Copyright (c) 2024 Robert Smith
2 | 
3 | (defpackage #:hypergeometrica-manager
4 |   (:use #:cl)
5 |   (:export #:main))
6 | 


--------------------------------------------------------------------------------
/src-worker/main.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; Copyright (c) 2024 Robert Smith
  2 | 
  3 | (in-package #:hypergeometrica-worker)
  4 | 
  5 | (sb-ext:defglobal **id** nil)
  6 | (sb-ext:defglobal **manager-address** nil)
  7 | (sb-ext:defglobal **manager-lock** (bt:make-lock))
  8 | 
  9 | (defun cli-options ()
 10 |   (list
 11 |    (clingon:make-option
 12 |     :string
 13 |     :description "Address of open socket."
 14 |     :short-name #\s
 15 |     :long-name "socket-address"
 16 |     :key :socket-address)
 17 |    ))
 18 | 
 19 | (defun cli-command ()
 20 |   (clingon:make-command
 21 |    :name "hypergeometrica-worker"
 22 |    :options (cli-options)
 23 |    :handler #'cli-handler))
 24 | 
 25 | (sb-ext:defglobal **communication-error-occured** nil)
 26 | 
 27 | (defun await-manager-reconnection ()
 28 |   (format t "Awaiting reconnection to a manager...")
 29 |   (let* ((lifeline (make-instance 'sb-bsd-sockets:local-socket :type :stream))
 30 |          (lifeline-node (merge-pathnames
 31 |                          (format nil "worker-~A-lifeline" **id**)
 32 |                          "/tmp/")))
 33 |     (sb-bsd-sockets:socket-bind lifeline (namestring lifeline-node))
 34 |     (sb-bsd-sockets:socket-listen lifeline 8)
 35 |     (format t "~2%!!! LIFELINE !!! Provide manager node to negotiate with to: ~S~2%" (namestring lifeline-node))
 36 |     (finish-output)
 37 |     (loop :named :REVIVED :do
 38 |       (let* ((client (sb-bsd-sockets:socket-accept lifeline))
 39 |              (stream (sb-bsd-sockets:socket-make-stream
 40 |                       client
 41 |                       :input t
 42 |                       :output t
 43 |                       :element-type 'character
 44 |                       :buffering :line))
 45 |              (message (read-form stream)))
 46 |         (format t "Lifeline message received: ~S~%" message)
 47 |         (sb-bsd-sockets:socket-close client)
 48 |         (when (typep message '(cons (member :revive)))
 49 |           (alexandria:destructuring-case message
 50 |             ((:revive &key id socket)
 51 |              (setf **id** id
 52 |                    **manager-address** (namestring socket))
 53 |              ;; TODO: We need to make sure we the manager knows what
 54 |              ;; state we are in before we carry on.
 55 |              (return-from :REVIVED))))))
 56 |     (sb-bsd-sockets:socket-close lifeline)))
 57 | 
 58 | (defmacro with-manager-io ((stream) &body body)
 59 |   (alexandria:with-gensyms (manager)
 60 |     `(bt:with-lock-held (**manager-lock**)
 61 |        (let ((,manager (make-instance 'sb-bsd-sockets:local-socket
 62 |                                       :type :stream)))
 63 |          (unwind-protect
 64 |               (tagbody
 65 |                :RETRY
 66 |                  (handler-case
 67 |                      (progn
 68 |                        (sb-bsd-sockets:socket-connect ,manager **manager-address**)
 69 |                        (let ((,stream (sb-bsd-sockets:socket-make-stream
 70 |                                        ,manager
 71 |                                        :element-type 'character
 72 |                                        :input t
 73 |                                        :output t
 74 |                                        :buffering ':line)))
 75 |                          ,@body))
 76 |                    (sb-bsd-sockets:socket-error (c)
 77 |                      (setf **comunication-error-occured** c)
 78 |                      (await-manager-reconnection)
 79 |                      (go :RETRY))))
 80 |            (when (sb-bsd-sockets:socket-open-p ,manager)
 81 |              (sb-bsd-sockets:socket-close ,manager)))))))
 82 | 
 83 | (defun write-form (stream form)
 84 |   (prin1 form stream)
 85 |   (terpri stream)
 86 |   (finish-output stream))
 87 | 
 88 | (defun read-form (stream)
 89 |   (read stream nil '(:eof)))
 90 | 
 91 | ;;; Heartbeat
 92 | 
 93 | (defun make-heartbeat (&optional (period 5))
 94 |   (lambda ()
 95 |     (loop
 96 |       (with-manager-io (stream)
 97 |         (write-form stream `(,**id** :heartbeat))
 98 |         (finish-output stream))
 99 |       (sleep period))))
100 | 
101 | (sb-ext:defglobal **heartbeat-thread** nil)
102 | (defun start-heartbeat-thread ()
103 |   (setf **heartbeat-thread** (bt:make-thread (make-heartbeat)
104 |                                              :name "Heartbeat Thread")))
105 | 
106 | ;;; Request ID
107 | 
108 | (defun request-id (stream)
109 |   (write-form stream '(:join))
110 |   (alexandria:destructuring-ecase (read-form stream)
111 |     ((:welcome &key id)
112 |      (format t "Got an ID: ~A~%" id)
113 |      id)
114 |     ((:no-vacancy)
115 |      (format t "No vacancy.~%")
116 |      nil)))
117 | 
118 | ;;; Main
119 | 
120 | (defun cli-handler (cmd)
121 |   (let ((manager-address (clingon:getopt cmd ':socket-address)))
122 |     ;; Set the manager address.
123 |     (unless (probe-file manager-address)
124 |       (error "Manager address ~A not found." manager-address))
125 |     (setf **manager-address** manager-address)
126 | 
127 |     ;; Get an ID.
128 |     (with-manager-io (stream)
129 |       (format t "Requesting ID.~%")
130 |       (let ((id? (request-id stream)))
131 |         (cond
132 |           (id?
133 |            (format t "Received ID: ~D~%" id?)
134 |            (setf **id** id?))
135 |           (t
136 |            (uiop:quit 1)))))
137 | 
138 |     ;; Start the heartbeat so we don't get kicked off.
139 |     (start-heartbeat-thread)
140 | 
141 |     ;; Request and dispatch work.
142 |     (with-manager-io (stream)
143 |       (format t "#~D connected to socket.~%" **id**)
144 |       (write-form stream `(,**id** :ping))
145 |       (format t "#~D: received ~S~%" **id** (read-form stream))
146 |       (finish-output))
147 |     (bt:join-thread **heartbeat-thread**)))
148 | 
149 | (defun main ()
150 |   (let ((app (cli-command)))
151 |     (clingon:run app)))
152 | 


--------------------------------------------------------------------------------
/src-worker/package.lisp:
--------------------------------------------------------------------------------
1 | ;;;; Copyright (c) 2024 Robert Smith
2 | 
3 | (defpackage #:hypergeometrica-worker
4 |   (:use #:cl)
5 |   (:export #:main))
6 | 


--------------------------------------------------------------------------------
/src/bbp.lisp:
--------------------------------------------------------------------------------
 1 | ;;; bbp.lisp
 2 | 
 3 | (in-package #:hypergeometrica)
 4 | 
 5 | ;;; Sketched out implementation of the BBP algorithm using normal Lisp
 6 | ;;; INTEGERs.
 7 | 
 8 | (defun modpow (a n m)                   ; EXPT-MOD without type restrictions
 9 |   (let ((result 1))
10 |     (loop
11 |       (when (oddp n)
12 |         (setf result (mod (* result a) m)))
13 |       (setf n (floor n 2))
14 |       (when (zerop n)
15 |         (return-from modpow result))
16 |       (setf a (mod (* a a) m)))))
17 | 
18 | (defun bbp (n &key (precision 14))
19 |   (let* ((bits (* 4 precision)))
20 |     (labels ((mask (x)
21 |                (ldb (byte bits 0) x))
22 |              (s (j n)
23 |                (let ((sum 0))
24 |                  (loop :for k :below n
25 |                        :for r := (+ j (* 8 k))
26 |                        :do (incf sum (floor
27 |                                       (ash (modpow 16 (- n k 1) r) bits)
28 |                                       r))
29 |                            (setf sum (mask sum)))
30 | 
31 |                  (loop :for k :from n
32 |                        :for i :from (1- precision) :downto 0
33 |                        :for d := (floor (expt 16 i)
34 |                                         (+ j (* 8 k)))
35 |                        :until (zerop d)
36 |                        :do (incf sum d))
37 |                  sum)))
38 |       (mask
39 |        (- (* 4 (s 1 n))
40 |           (* 2 (s 4 n))
41 |           (* 1 (s 5 n))
42 |           (* 1 (s 6 n)))))))
43 | 


--------------------------------------------------------------------------------
/src/binary-splitting.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; binary-splitting.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2012-2019, 2022-2023 Robert Smith
  4 | 
  5 | ;;; Canonical reference: https://www.ginac.de/CLN/binsplit.pdf
  6 | 
  7 | (in-package #:hypergeometrica)
  8 | 
  9 | ;;; Representation of a hypergeometric series
 10 | 
 11 | (defun one (n)
 12 |   (declare (type integer n)
 13 |            (ignore n))
 14 |   (integer-mpz 1 'mpz/ram))
 15 | 
 16 | (defun int (n)
 17 |   (integer-mpz n 'mpz/ram))
 18 | 
 19 | (defstruct (series (:predicate series?))
 20 |   "A representation of the series
 21 | 
 22 |        ===
 23 |        \    a(k)   p(0) p(1) ... p(k)
 24 |         >  ------ --------------------
 25 |        /    b(k)   q(0) q(1) ... q(k)
 26 |        ===
 27 |        k>=0
 28 | 
 29 | Each of the function a, b, p, and q are integer-valued.
 30 | "
 31 |   (a #'one :type function :read-only t)
 32 |   (b #'one :type function :read-only t)
 33 |   (p #'one :type function :read-only t)
 34 |   (q #'one :type function :read-only t))
 35 | 
 36 | (defmethod print-object ((obj series) stream)
 37 |   (print-unreadable-object (obj stream :type t :identity t)))
 38 | 
 39 | #+ignore
 40 | (defun product (f lower upper)
 41 |   "Compute the product
 42 | 
 43 |   F(LOWER) * F(LOWER + 1) * ... * F(UPPER - 1)."
 44 |   (declare (type fixnum lower upper))
 45 |   (labels ((rec (current accum)
 46 |              (if (>= current upper)
 47 |                  accum
 48 |                  (rec (1+ current)
 49 |                       (* accum (funcall f current))))))
 50 |     (rec lower 1)))
 51 | 
 52 | #+ignore
 53 | (defun sum-series-direct (series lower upper)
 54 |   (declare (type series series)
 55 |            (type fixnum lower upper))
 56 |   (assert (> upper lower))
 57 |   (loop :with a := (series-a series)
 58 |         :with b := (series-b series)
 59 |         :with p := (series-p series)
 60 |         :with q := (series-q series)
 61 |         :for n :from lower :below upper
 62 |         :sum (/ (* (funcall a n) (product p lower n))
 63 |                 (* (funcall b n) (product q lower n)))))
 64 | 
 65 | 
 66 | ;;; Partial sums
 67 | 
 68 | (defstruct (partial (:predicate partial?))
 69 |   "A partial sum of a series for LOWER <= k < UPPER."
 70 |   (lower nil :type fixnum :read-only t)
 71 |   (upper nil :type fixnum :read-only t)
 72 |   (p nil :type mpz/ram :read-only t)
 73 |   (q nil :type mpz/ram :read-only t)
 74 |   (b nil :type mpz/ram :read-only t)
 75 |   (r nil :type mpz/ram :read-only t))
 76 | 
 77 | (defmethod print-object ((obj partial) stream)
 78 |   (print-unreadable-object (obj stream :type t :identity nil)
 79 |     (format stream "[~D, ~D)"
 80 |             (partial-lower obj)
 81 |             (partial-upper obj))))
 82 | 
 83 | (defun partial-numerator (x)
 84 |   (declare (type partial x))
 85 |   (partial-r x))
 86 | 
 87 | (defun partial-denominator (x)
 88 |   (declare (type partial x))
 89 |   (mpz-* (partial-b x) (partial-q x)))
 90 | 
 91 | ;;; for debugging
 92 | (defun partial-digits (x digits)
 93 |   (declare (type partial x))
 94 |   (values (round (* (expt 10 digits) (mpz-integer (partial-numerator x)))
 95 |                  (mpz-integer (partial-denominator x)))))
 96 | 
 97 | 
 98 | ;;; Binary splitting
 99 | 
100 | (defun binary-split-base-case=1 (series lower &optional (upper (1+ lower)))
101 |   (declare (type series series)
102 |            (type fixnum lower upper))
103 |   (assert (= 1 (- upper lower)))
104 |   (let ((p (funcall (series-p series) lower)))
105 |     (make-partial :lower lower
106 |                   :upper upper
107 |                   :p p
108 |                   :q (funcall (series-q series) lower)
109 |                   :b (funcall (series-b series) lower)
110 |                   :r (mpz-* p (funcall (series-a series) lower)))))
111 | 
112 | (defun combine (left right)
113 |   (declare (type partial left right))
114 |   (assert (= (partial-upper left) (partial-lower right)))
115 |   (make-partial :lower (partial-lower left)
116 |                 :upper (partial-upper right)
117 |                 :p (mpz-* (partial-p left) (partial-p right))
118 |                 :q (mpz-* (partial-q left) (partial-q right))
119 |                 :b (mpz-* (partial-b left) (partial-b right))
120 |                 :r (mpz-+ (mpz-* (partial-b right) (mpz-* (partial-q right) (partial-r left)))
121 |                           (mpz-* (partial-b left)  (mpz-* (partial-p left)  (partial-r right))))))
122 | 
123 | (defun binary-split (series lower upper)
124 |   (declare (type series series)
125 |            (type fixnum lower upper))
126 |   (assert (> upper lower))
127 |   (let ((delta (- upper lower)))
128 |     (cond
129 |       ((= 1 delta) (binary-split-base-case=1 series lower upper))
130 |       (t (let* ((m     (floor (+ lower upper) 2))
131 |                 (left  (binary-split series lower m))
132 |                 (right (binary-split series m upper)))
133 |            (combine left right))))))
134 | 
135 | 
136 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Examples ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
137 | 
138 | ;;; Exponential eˣ
139 | 
140 | (defun make-exp-series (x)
141 |   (check-type x rational)
142 |   (let ((num (numerator x))
143 |         (den (denominator x)))
144 |     (make-series :a #'one
145 |                  :b #'one
146 |                  :p (lambda (n) (int (if (zerop n) 1 num)))
147 |                  :q (lambda (n) (int (if (zerop n) 1 (* n den)))))))
148 | 
149 | (defun make-e-series ()
150 |   (make-series :a #'one
151 |                :b #'one
152 |                :p #'one
153 |                :q (lambda (n) (int (if (zerop n) 1 n)))))
154 | 
155 | (defun compute-e (prec)
156 |   (let ((num-terms (+ 5 prec)))               ; way over-estimate
157 |     (partial-digits (binary-split (make-e-series) 0 num-terms) prec)))
158 | 
159 | ;;; Ramanujan's Series for pi
160 | 
161 | (defconstant +rama-decimals-per-term+ (log 96059301 10d0))
162 | (defconstant +rama-a+ 1103)
163 | (defconstant +rama-b+ 26390)
164 | (defconstant +rama-c+ 396)
165 | 
166 | (defun make-ramanujan-series ()
167 |   (flet ((a (n)
168 |            (+ +rama-a+ (* n +rama-b+)))
169 |          (p (n)
170 |            (if (zerop n)
171 |                1
172 |                ;; This is Horner's form of
173 |                ;; (2k - 1)*(4k - 3)*(4k - 1)
174 |                (+ -3 (* n (+ 22 (* n (+ -48 (* n 32))))))))
175 |          (q (n)
176 |            (if (zerop n)
177 |                1
178 |                (* (expt n 3)
179 |                   #.(/ (expt +rama-c+ 4) 8)))))
180 |     (make-series :a (alexandria:compose #'int #'a)
181 |                  :b #'one
182 |                  :p (alexandria:compose #'int #'p)
183 |                  :q (alexandria:compose #'int #'q))))
184 | 
185 | (defun compute-pi/ramanujan (prec)
186 |   (let* ((num-terms (floor (+ 2 (/ prec +rama-decimals-per-term+))))
187 |          (sqrt2 (isqrt (* 2 (expt 100 prec))))
188 |          (num (* 2 2))
189 |          (den (* (expt 99 2) sqrt2))
190 |          (comp (binary-split (make-ramanujan-series) 0 num-terms)))
191 |     (values (floor (* den (mpz-integer (partial-denominator comp)))
192 |                    (* num (mpz-integer (partial-numerator comp)))))))
193 | 
194 | 
195 | 
196 | 
197 | ;;; Catalan's Constant G
198 | ;;;
199 | ;;; This doesn't actually compute Catalan's constant G. It would
200 | ;;; compute G' such that
201 | ;;;
202 | ;;;          3        π
203 | ;;;     G = --- G' + --- log(2 + √3)
204 | ;;;          8        8
205 | ;;;
206 | (defun make-catalan-series ()
207 |   (make-series :a #'one
208 |                :b (lambda (n)
209 |                     (1+ (* 2 n)))
210 |                :p (lambda (n)
211 |                     (if (zerop n) 1 n))
212 |                :q (lambda (n)
213 |                     (if (zerop n)
214 |                         1
215 |                         (+ 2 (* 4 n))))))
216 | 
217 | 
218 | ;;; Apéry's Constant ζ(3)
219 | 
220 | (defun make-apery-series ()
221 |   (make-series :a (lambda (n)
222 |                     (+ 77 (* n (+ 250 (* n 205)))))
223 |                :b (lambda (n)
224 |                     (declare (ignore n))
225 |                     2)
226 |                :p (lambda (n)
227 |                     (if (zerop n)
228 |                         1
229 |                         (- (expt n 5))))
230 |                :q (lambda (n)
231 |                     (* 32 (expt (1+ (* 2 n)) 5)))))
232 | 


--------------------------------------------------------------------------------
/src/config.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; config.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | (defvar *verbose* nil)
 8 | (defvar *hypergeometrica-log-stream* *standard-output*)
 9 | 
10 | ;;; Enable parallelism in some routines. This could make it harder to
11 | ;;; debug or profile.
12 | 
13 | ;;; (push :hypergeometrica-parallel *features*)
14 | 
15 | #+ (and hypergeometrica-parallel lparallel)
16 | (unless (and (boundp 'lparallel:*kernel*)
17 |              (not (null lparallel:*kernel*)))
18 |   (setf lparallel:*kernel* (lparallel:make-kernel 8 :name "Hypergeometrica")))
19 | 
20 | 
21 | ;;; Enable assembly intrinsics.
22 | 
23 | ;; N.B.: Intrinsic functions are still compiled, even if they're not
24 | ;; used by Hypergeometrica.
25 | #+(and #:disable sbcl (or x86-64))
26 | (push :hypergeometrica-intrinsics *features*)
27 | 
28 | 
29 | ;;; Enable cheap-ish safety checks. This may slow down code, but help debug.
30 | 
31 | (push :hypergeometrica-safe *features*)
32 | 
33 | 
34 | ;;; Are you paranoid about the correctness of things? This is the
35 | ;;; feature for you.
36 | 
37 | (push :hypergeometrica-paranoid *features*)
38 | 
39 | 
40 | ;;; Enable explicit and ruthless initialization of objects. Don't
41 | ;;; trust "re-used" objects.
42 | 
43 | (push :hypergeometrica-hygiene *features*)
44 | 
45 | 
46 | ;;; Enable the use of floating point FFTs for smallish inputs.
47 | 
48 | ;; (push :hypergeometrica-floating-point *features*)
49 | 
50 | 
51 | ;;; Optimization qualities
52 | 
53 | ;; It is useful to change these when debugging.
54 | 
55 | (defparameter *optimize-extremely-safely* '(optimize (speed 0) safety debug (space 0) (compilation-speed 0)))
56 | 
57 | (defparameter *optimize-dangerously-fast* '(optimize speed (safety 0) (space 0) (space 0) (compilation-speed 0)))
58 | 
59 | 
60 | ;;; Storage constants
61 | 
62 | (defvar *maximum-file-size* (* 16 (expt 1024 3)) ; XXX: needed?
63 |   "The maximum size of a file in octets.")
64 | 
65 | (defvar *maximum-vector-size* (* 8 (expt 1024 2))
66 |   "The maximum size of a vector that can be stored in memory in octets.")
67 | 
68 | (defvar *default-file-directory* (uiop:ensure-directory-pathname "/tmp/"))
69 | 
70 | (defun hypergeometrica-work-directory ()
71 |   "Return the pathname of a directory where work files may be written to. This is controlled in order of priority by:
72 | 
73 |     Environment variable: HYPERGEOMETRICA_WORK
74 | 
75 |     Lisp variable: HYPERGEOMETRICA::*DEFAULT-FILE-DIRECTORY*
76 | "
77 |   (let ((env (uiop:getenv "HYPERGEOMETRICA_WORK")))
78 |     (if (null env)
79 |         *default-file-directory*
80 |         (uiop:ensure-directory-pathname env))))
81 | 


--------------------------------------------------------------------------------
/src/digit.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; digit.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2019 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | ;;; This file defines a DIGIT (the fundamental building block of an
  8 | ;;; arbitrary precision number) and associated operations.
  9 | 
 10 | (defconstant $digit-bits 64
 11 |   "The number of bits in a DIGIT.")
 12 | 
 13 | (defconstant $base (expt 2 $digit-bits)
 14 |   "The radix of all arbitrary precision arithmetic operations.")
 15 | 
 16 | ;; These two have the same value, but are used differently. The former
 17 | ;; is a numerical quantity. The latter is for bit twiddling.
 18 | (defconstant $max-digit (1- $base)
 19 |   "The maximum digit in radix $BASE.")
 20 | 
 21 | (defconstant $digit-ones (ldb (byte $digit-bits 0) -1)
 22 |   "A digit consisting of all binary ones.")
 23 | 
 24 | (defconstant $largest-power-of-10-exponent (floor (log $base 10.0d0))
 25 |   "The largest value n such that (< (expt 10 n) $max-digit).")
 26 | 
 27 | (defconstant $largest-power-of-10 (expt 10 $largest-power-of-10-exponent)
 28 |   "The largest power of 10 that can fit in a DIGIT. This is (expt 10 $largest-power-of-10-exponent).")
 29 | 
 30 | (deftype digit ()
 31 |   "A digit in an arbitrary precision number."
 32 |   `(unsigned-byte ,$digit-bits))
 33 | 
 34 | (deftype sign ()
 35 |   "The sign of a number."
 36 |   '(member 1 -1))
 37 | 
 38 | (declaim (inline bytes-for-digits))
 39 | (defun bytes-for-digits (num-digits)
 40 |   "The number of bytes required to store NUM-DIGITS digits."
 41 |   (ceiling (* $digit-bits num-digits) 8))
 42 | 
 43 | (defmacro define-fx-op (op-name (base-op &rest args))
 44 |   `(progn
 45 |      (declaim (inline ,op-name))
 46 |      (defun ,op-name ,args
 47 |        (declare (type digit ,@args)
 48 |                 (optimize speed (safety 0) (debug 0) (space 0) (compilation-speed 0)))
 49 |        (the digit (mod (,base-op ,@args) ,$base)))))
 50 | 
 51 | (define-fx-op fx+ (+ a b))
 52 | (define-fx-op fx- (- a b))
 53 | (define-fx-op fx1+ (1+ a))
 54 | (define-fx-op fx1- (1- a))
 55 | (define-fx-op fxneg (- a))
 56 | (define-fx-op fx* (* a b))
 57 | (define-fx-op fx/ (floor a b))
 58 | 
 59 | #+hypergeometrica-intrinsics
 60 | (macrolet ((define-intrinsic (name args)
 61 |              `(defun ,name ,args
 62 |                 (,name ,@args))))
 63 |   (define-intrinsic %%ub64/2 (x))
 64 |   (define-intrinsic %%add64  (x y))
 65 |   (define-intrinsic %%add128 (alo ahi blo bhi))
 66 |   (define-intrinsic %%sub128 (alo ahi blo bhi))
 67 |   (define-intrinsic %%mul128 (x y))
 68 |   (define-intrinsic %%div128 (dividend-lo dividend-hi divisor)))
 69 | 
 70 | #+hypergeometrica-intrinsics
 71 | (declaim (inline ub64/2 add64 add128 sub128 mul128 div128))
 72 | 
 73 | (declaim (ftype (function ((unsigned-byte 64))
 74 |                           (values (unsigned-byte 64) &optional))
 75 |                 ub64/2))
 76 | (defun ub64/2 (x)
 77 |   #+hypergeometrica-intrinsics
 78 |   (%%ub64/2 x)
 79 |   #-hypergeometrica-intrinsics
 80 |   (ash x -1))
 81 | 
 82 | ;; 64 x 64 -> 128
 83 | (declaim (ftype (function ((unsigned-byte 64) (unsigned-byte 64))
 84 |                           (values (unsigned-byte 64) bit &optional))
 85 |                 add64))
 86 | (defun add64 (x y)
 87 |   #+hypergeometrica-intrinsics
 88 |   (%%add64 x y)
 89 |   #-hypergeometrica-intrinsics
 90 |   (let ((s (+ x y)))
 91 |     (values (ldb (byte 64 0) s)
 92 |             (ldb (byte 1 64) s))))
 93 | 
 94 | (declaim (ftype (function ((unsigned-byte 64) (unsigned-byte 64))
 95 |                           (values (unsigned-byte 64) (unsigned-byte 64) &optional))
 96 |                 mul128))
 97 | (defun mul128 (x y)
 98 |   #+hypergeometrica-intrinsics
 99 |   (%%mul128 x y)
100 |   #-hypergeometrica-intrinsics
101 |   (let ((r (* x y)))
102 |     (values (ldb (byte 64 0) r)
103 |             (ldb (byte 64 64) r))))
104 | 
105 | (declaim (ftype (function ((unsigned-byte 64) (unsigned-byte 64) (unsigned-byte 64))
106 |                           (values (unsigned-byte 64) (unsigned-byte 64) &optional))
107 |                 div128))
108 | (defun div128 (dividend-lo dividend-hi divisor)
109 |   #+hypergeometrica-intrinsics
110 |   (%%div128 dividend-lo dividend-hi divisor)
111 |   #-hypergeometrica-intrinsics
112 |   (truncate (dpb dividend-hi (byte 64 64) dividend-lo) divisor))
113 | 
114 | (declaim (ftype (function ((unsigned-byte 64) (unsigned-byte 64) (unsigned-byte 64) (unsigned-byte 64))
115 |                           (values (unsigned-byte 64) (unsigned-byte 64) &optional))
116 |                 add128 sub128))
117 | (defun add128 (alo ahi blo bhi)
118 |   "Compute
119 | 
120 |     (alo + ahi*2^64) + (blo + bhi*2^64)
121 | "
122 |   #+hypergeometrica-intrinsics
123 |   (%%add128 alo ahi blo bhi)
124 |   #-hypergeometrica-intrinsics
125 |   (let ((sum (+ alo blo (* (expt 2 64) (+ ahi bhi)))))
126 |     (values (ldb (byte 64 0) sum)
127 |             (ldb (byte 64 64) sum))))
128 | 
129 | (defun sub128 (alo ahi blo bhi)
130 |   "Compute
131 | 
132 |     (alo + ahi*2^64) - (blo + bhi*2^64)
133 | 
134 | for A >= B.
135 | "
136 |   #+hypergeometrica-intrinsics
137 |   (%%sub128 alo ahi blo bhi)
138 |   #-hypergeometrica-intrinsics
139 |   (let ((sum (+ (- alo blo) (* (expt 2 64) (- ahi bhi)))))
140 |     ;; FIXME Issue #22
141 |     ;;
142 |     ;; (assert (not (minusp sum)))
143 |     (values (ldb (byte 64 0) sum)
144 |             (ldb (byte 64 64) sum))))
145 | 
146 | 
147 | 
148 | (declaim (inline complement-digit))
149 | (defun complement-digit (n)
150 |   "Invert the bits of a digit N."
151 |   (declare (type digit n))
152 |   (logxor n $digit-ones))
153 | 


--------------------------------------------------------------------------------
/src/disk-vec.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; disk-vec.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2021 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | ;;; VECs that are stored to disk.
 8 | 
 9 | (defun generate-work-filename ()
10 |   (merge-pathnames (format nil "hypergeo-r~X-t~D"
11 |                            (random most-positive-fixnum)
12 |                            (get-internal-real-time))
13 |                    (hypergeometrica-work-directory)))
14 | 
15 | (defun write-zeros-to-file (filename n)
16 |   (assert (uiop:file-pathname-p filename))
17 |   (check-type n alexandria:array-length)
18 |   (with-open-file (s filename :direction ':output
19 |                               :if-does-not-exist ':create
20 |                               :if-exists ':supersede
21 |                               :element-type 'digit)
22 |     (loop :repeat n :do (write-byte 0 s)))
23 |   nil)
24 | 
25 | (defclass disk-vec ()
26 |   ((mmap-data :initarg :mmap-data
27 |               :accessor disk-vec.mmap-data)
28 |    (filename :initarg :filename
29 |              :reader disk-vec.filename)
30 |    (length   :initarg :length
31 |              :reader vec-digit-length
32 |              :writer disk-vec.set-length)))
33 | 
34 | (defmethod vec-digit-pointer ((vec disk-vec))
35 |   (mmap-data-pointer (disk-vec.mmap-data vec)))
36 | 
37 | (defun disk-vec-finalizer (disk-vec)
38 |   (let ((filename (disk-vec.filename disk-vec))
39 |         (mmap-data (disk-vec.mmap-data disk-vec)))
40 |     (lambda ()
41 |       (when (uiop:file-exists-p filename)
42 |         (munmap mmap-data)
43 |         (delete-file filename)))))
44 | 
45 | (defmethod free-vec ((vec disk-vec))
46 |   (funcall (disk-vec-finalizer vec))
47 |   nil)
48 | 
49 | (defmethod copy-vec ((vec disk-vec))
50 |   (let* ((n (vec-digit-length vec))
51 |          (new-vec (make-disk-vec n)))
52 |     (with-vecs (vec vec_ new-vec new-vec_)
53 |       (dotimes (i n new-vec)
54 |         (setf (vec_ i) (new-vec_ i))))))
55 | 
56 | ;;; VEC-REF implemented by default
57 | 
58 | (defmethod resize-vec-by ((vec disk-vec) n)
59 |   (let* ((old-length (vec-digit-length vec))
60 |          (new-length (+ n old-length))
61 |          (new-size-bytes (bytes-for-digits new-length)))
62 |     (cond
63 |       ((minusp new-length)
64 |        (error "can't resize to a negative size..."))
65 |       ((zerop n)
66 |        vec)
67 |       (t
68 |        (tg:cancel-finalization vec)
69 |        (munmap (disk-vec.mmap-data vec))
70 |        ;; TRUNCATE will write 0 to extra bytes.
71 |        (sb-posix:truncate (disk-vec.filename vec) new-size-bytes)
72 |        (setf (disk-vec.mmap-data vec) (mmap (disk-vec.filename vec) new-size-bytes))
73 |        (disk-vec.set-length new-length vec)
74 |        (when *auto-free-vecs*
75 |         (tg:finalize vec (disk-vec-finalizer vec)))
76 |        vec))))
77 | 
78 | (defun make-disk-vec (n)
79 |   (let ((filename (generate-work-filename)))
80 |     (dbg ";;; writing disk vec ~A" filename)
81 |     (write-zeros-to-file filename n)
82 |     (make-disk-vec-from-file n filename)))
83 | 
84 | (defun make-disk-vec-from-file (n filename)
85 |   (let ((vec (make-instance 'disk-vec
86 |                :mmap-data (mmap filename (bytes-for-digits n))
87 |                :filename filename
88 |                :length n)))
89 |       (when *auto-free-vecs*
90 |         (tg:finalize vec (disk-vec-finalizer vec)))
91 |       vec))
92 | 


--------------------------------------------------------------------------------
/src/divrem.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; divrem.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2022-2023 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | ;;; We do integer division using MPDs.
 8 | ;;;
 9 | ;;; N.B. We could also compute floor(a/b) as
10 | ;;;
11 | ;;;     a * ceiling(2^k/b) >> k
12 | ;;;
13 | ;;; where a*b <= 2^k. This would allow pure integer operations, and
14 | ;;; may also be more efficient, but it would lead to some code
15 | ;;; duplication.
16 | 
17 | (defun mpz-divrem (a b)
18 |     "Compute the quotient and remainder of the integer division of A and B.
19 | 
20 | If (Q, R) = mpz-divrem(A, B), then A = Q*B + R."
21 |   (let* ((abs-a (mpz-abs a))
22 |          (abs-b (mpz-abs b))
23 |          (q     (%mpz-div abs-a abs-b))
24 |          (r    (mpz-- abs-a (mpz-* abs-b q))))
25 |     ;; q may be off by one.
26 |     (cond
27 |       ((mpz-minusp r)
28 |        (setf q (mpz-1- q)
29 |              r (mpz-+ r abs-b)))
30 |       ((mpz->= r abs-b)
31 |        (setf q (mpz-1+ q)
32 |              r (mpz-- r abs-b))))
33 | 
34 |     ;; Fix sign
35 |     ;;
36 |     ;; TODO: Sign of remainder?
37 |     (when (minusp (* (sign a) (sign b)))
38 |       (mpz-negate! q))
39 | 
40 |     ;; Return
41 |     (values q r)))
42 | 
43 | (defun %mpz-div (a b)
44 |   (cond
45 |     ((mpz-zerop b)
46 |      (error "Cannot divide by zero."))
47 |     ((mpz-< a b)
48 |      (integer-mpz 0 'mpz/ram))
49 |     ((mpz-= a b)
50 |      (integer-mpz 1 'mpz/ram))
51 |     ;; TODO: add branch for size-1 / size-1 division
52 |     ;;
53 |     ;; TODO: more testing of the below algorithm...
54 |     (t
55 |      ;; Note that due to the use of Newton's method, we might actually
56 |      ;; do a division that has a non-terminating decimal expansion,
57 |      ;; and as such could be an under-estimate. We will fix this up in
58 |      ;; the call above.
59 |      (let* ((bits-needed (+ (* 2 $digit-bits) ; slack
60 |                             (- (mpz-bit-size a)
61 |                                (mpz-bit-size b))))
62 |             (a (mpz-mpd a))
63 |             (b (mpz-mpd b)))
64 |        (mpd-integer-part (mpd-* a (mpd-reciprocal b bits-needed)))))))
65 | 


--------------------------------------------------------------------------------
/src/fft-multiply.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; fft-multiply.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2019 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | (deftype complex-double ()
  8 |   '(complex double-float))
  9 | 
 10 | (deftype fft-work ()
 11 |   '(simple-array complex-double (*)))
 12 | 
 13 | (declaim (type (member 2 4 8 16 32)
 14 |                +fft-coefficient-bits+
 15 |                +fft-overhead-factor+))
 16 | (defconstant +fft-coefficient-bits+ 16)
 17 | (defconstant +fft-overhead-factor+ (/ 64 +fft-coefficient-bits+))
 18 | 
 19 | (defun make-fft-work (mpz result-size)
 20 |   (declare (type mpz mpz)
 21 |            (type alexandria:array-length result-size))
 22 |   (let* ((raw-mpz (raw-storage mpz))
 23 |          (size (mpz-size mpz))
 24 |          ;; FFTs will use digits that are 16-bits! So quadruple the size.
 25 |          (fft-length (* +fft-overhead-factor+ result-size))
 26 |          (work (make-array fft-length :element-type 'complex-double :initial-element #C(0.0d0 0.0d0))))
 27 |     (declare (type fft-work work))
 28 |     (dotimes (i size work)
 29 |       (let ((digit (aref raw-mpz i)))
 30 |         (dotimes (k +fft-overhead-factor+)
 31 |           (let* ((digit-part (ldb (byte +fft-coefficient-bits+ (* k +fft-coefficient-bits+)) digit)))
 32 |             (declare (type digit digit)
 33 |                      (type (unsigned-byte #.+fft-coefficient-bits+) digit-part))
 34 |             (setf (aref work (+ (* +fft-overhead-factor+ i) k)) (coerce digit-part 'complex-double))))))))
 35 | 
 36 | ;;;   length-bits + 2*transform-bits <= 50
 37 | (defconstant +fft-length-limit+ (2^ (- 50 (* 2 +fft-coefficient-bits+))))
 38 | 
 39 | (defun mpz-*/fft (x y)
 40 |   (declare (optimize speed))
 41 |   (let* ((size (+ (mpz-size x) (mpz-size y)))
 42 |          (length (least-power-of-two->= (1+ size))) ; Enough room for a carry
 43 |          (fft-x (make-fft-work x length))
 44 |          (fft-y (make-fft-work y length))
 45 |          (result (make-storage length)) 
 46 |          (report-time (let ((start-time (get-internal-real-time)))
 47 |                         (lambda ()
 48 |                           (when *verbose*
 49 |                             (format t " ~D ms~%" (round (* 1000 (- (get-internal-real-time) start-time)) internal-time-units-per-second))
 50 |                             (setf start-time (get-internal-real-time))
 51 |                             (finish-output))))))
 52 |     (declare (type fft-work fft-x fft-y)
 53 |              (type storage result)
 54 |              (type alexandria:array-length size length))
 55 |     ;; The double-float mantissa is 52 bits, and we want 2 bits for
 56 |     ;; correct rounding, leaving us a limit of 50 bits.
 57 |     (when (> length +fft-length-limit+)
 58 |       (error "Transform of length ~D will overflow a 52-bit mantissa. Sorry." length))
 59 |     (when *verbose*
 60 |       (format t "~&Size: ~D (approx ~D decimal~:P)~%"
 61 |               size
 62 |               (round (* size $digit-bits)
 63 |                      (log 10.0d0 2.00)))
 64 |       (format t "Transform length (~D-bit~:P): ~D (x2 = ~D MiB)~%"
 65 |               +fft-coefficient-bits+
 66 |               #1=(* +fft-overhead-factor+ length)
 67 |               (round (/ (* 128 #1#) 8 1024 1024)))
 68 | 
 69 |       (format t "Forward..."))
 70 |     (with-parallel-work ()
 71 |       ;; These also work with
 72 |       ;;
 73 |       ;;   (dif-forward fft-x)
 74 |       ;;   (dif-forward fft-y)
 75 |       (with-task ()
 76 |         (napa-fft:fft fft-x :dst fft-x :in-order nil))
 77 |       (with-task ()
 78 |         (napa-fft:fft fft-y :dst fft-y :in-order nil)))
 79 |     (funcall report-time)
 80 | 
 81 |     ;; Pointwise multiply. The NTT work for X is mutated.
 82 |     (when *verbose*
 83 |       (format t "Pointwise multiply..."))
 84 |     (#+hypergeometrica-parallel lparallel:pdotimes
 85 |      #-hypergeometrica-parallel dotimes (i (* 4 length))
 86 |       (setf (aref fft-x i) (* (aref fft-x i) (aref fft-y i))))
 87 |     (funcall report-time)
 88 | 
 89 |     ;; Inverse transform
 90 |     (when *verbose*
 91 |       (format t "Reverse..."))
 92 |     ;; This also works with
 93 |     ;;
 94 |     ;;   (dit-reverse fft-x)
 95 |     (napa-fft:ifft fft-x :dst fft-x :in-order nil)
 96 |     (funcall report-time)
 97 |     
 98 |     (when *verbose*
 99 |       (loop :for z :across fft-x
100 |             :maximize (realpart z) :into re
101 |             :maximize (imagpart z) :into im
102 |             :maximize (abs z)      :into a
103 |             :finally (progn
104 |                        (format t "theory  = ~A~%" (* length (expt (1- (2^ 16)) 2)))
105 |                        (format t "max(re) = ~A~%" re)
106 |                        (format t "max(im) = ~A~%" im)
107 |                        (format t "max(ab) = ~A~%" a))))
108 | 
109 |     ;; Unpack the result. This is effectively done by doing something
110 |     ;; like:
111 |     ;;
112 |     ;;     extract(X, 0 == i mod (+fft-overhead-factor+))) << 0 * (+fft-coefficient-bits+)
113 |     ;;   + extract(X, 1 == i mod (+fft-overhead-factor+)) << 1 * (+fft-coefficient-bits+)
114 |     ;;   + extract(X, 2 == i mod (+fft-overhead-factor+)) << 2 * (+fft-coefficient-bits+)
115 |     ;;   + extract(X, 3 == i mod (+fft-overhead-factor+)) << 3 * (+fft-coefficient-bits+)
116 |     ;;   + ...
117 |     ;;
118 |     ;; We use the addition routine to take care of carries for us.
119 |     (when *verbose*
120 |       (format t "Unpacking..."))
121 |     (let ((temp (raw-storage-of-storage (make-storage (length result)))))
122 |       (declare (type raw-storage temp))
123 |       ;; First, copy all of the 0 (mod 4) indexed elements into the
124 |       ;; result array.
125 |       ;;
126 |       ;; These coefficients are designed to make the full use of the
127 |       ;; floating point mantissa So we unpack into 64-bit words.
128 |       ;;
129 |       ;; We don't lift this LET out because resizing may occur.
130 |       (let ((raw (raw-storage-of-storage result)))
131 |         (#+hypergeometrica-parallel lparallel:pdotimes
132 |          #-hypergeometrica-parallel dotimes (i length)
133 |          (let ((coef (aref fft-x (* +fft-overhead-factor+ i))))
134 |            (declare (type complex-double coef))
135 |            #+hypergeometrica-safe
136 |            (unless (< (imagpart coef) 5.0d0)
137 |              (error "Bad imaginary part x[~D]: ~A" (* +fft-overhead-factor+ i) coef))
138 |            (let ((re-coef (sb-ext:truly-the (signed-byte 64) (round (realpart coef)))))
139 |              (declare (type (unsigned-byte 64) re-coef))
140 |              (setf (aref raw i) re-coef)))))
141 |       ;; Now we copy all of the other elements and add them into our array.
142 |       (do-range (k 1 +fft-overhead-factor+)
143 |         (declare (type (integer 1 #.+fft-overhead-factor+) k))
144 |         (when (> k 1)
145 |           (fill temp 0))
146 | 
147 |         (let* ((hi-bits (* +fft-coefficient-bits+ k))
148 |                (lo-bits (- 64 hi-bits)))
149 |           (declare (type (integer (0) (64)) lo-bits hi-bits))
150 |           (#+hypergeometrica-parallel lparallel:pdotimes
151 |            #-hypergeometrica-parallel dotimes (i size)
152 |            (declare (type alexandria:array-index i))
153 |            (let ((coef (aref fft-x (+ k (sb-ext:truly-the alexandria:array-index (* +fft-overhead-factor+ i))))))
154 |              (declare (type complex-double coef))
155 |              #+hypergeometrica-safe
156 |              (unless (< (imagpart coef) 5.0d0)
157 |                (error "Bad imaginary part x[~D]: ~A"
158 |                       (+ k (* +fft-overhead-factor+ i))
159 |                       (imagpart coef))) 
160 |              (let* ((re-coef (sb-ext:truly-the (signed-byte 64) (round (realpart coef))))
161 |                     (lo (ldb (byte lo-bits 0) re-coef))
162 |                     (hi (ldb (byte hi-bits lo-bits) re-coef)))
163 |                (declare (type (unsigned-byte 64) re-coef)
164 |                         (type (unsigned-byte 64) lo hi))
165 |                ;; SBCL wouldn't optimize DPB because it didn't believe
166 |                ;; me.
167 |                (setf (aref temp i)      (logior (aref temp i)
168 |                                                 (sb-ext:truly-the digit (ash lo hi-bits)))
169 |                      (aref temp (1+ i)) hi)))))
170 |         ;; Add them together
171 |         (%add-storages/unsafe result (raw-storage-of-storage result)
172 |                               (length (raw-storage-of-storage result))
173 |                               temp
174 |                               (length temp))))
175 |     (funcall report-time)
176 |     (make-mpz (* (sign x) (sign y)) result)))
177 | 


--------------------------------------------------------------------------------
/src/fixed-width-arithmetic.lisp:
--------------------------------------------------------------------------------
 1 | ;;; fixed-width-arithmetic.lisp
 2 | ;;;
 3 | ;;; Copyright (c) 2014-2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | ;;;;;;;;;;;;;;;;;;;;;;; Fixed-Width Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;
 8 | 
 9 | (declaim (inline split-byte))
10 | (defun split-byte (x bits)
11 |   "Split the non-negative integer X into two values X0 and X1 such that
12 | 
13 |     X = X1 << bits + X0."
14 |   (values (ldb (byte bits 0) x)
15 |           (ash x (- bits))))
16 | 
17 | (declaim (inline join-bytes))
18 | (defun join-bytes (x0 x1 bits)
19 |   "Join the bytes X0 and X1 into a value X such that
20 | 
21 |     X = X1 << bits + X0.
22 | 
23 | Ideally BITS is greater than the size of X0."
24 |   (+ x0 (ash x1 bits)))
25 | 
26 | (defun fixed-width-add (a b width &aux (width/2 (ash width -1)))
27 |   "Add the numbers A and B of width no more than WIDTH bits, using temporary values whose width do not exceed WIDTH bits.
28 | 
29 | Return two values S0 and S1 such that
30 | 
31 |     A+B = S0 + S1 << WIDTH."
32 |   (multiple-value-bind (a0 a1) (split-byte a width/2)
33 |     (multiple-value-bind (b0 b1) (split-byte b width/2)
34 |       (multiple-value-bind (low carry) (split-byte (+ a0 b0) width/2)
35 |         (multiple-value-bind (high carry) (split-byte (+ carry a1 b1) width/2)
36 |           (values (join-bytes low high width/2) carry))))))
37 | 
38 | (defun fixed-width-multiply (a b width &aux (width/2 (ash width -1)))
39 |   "Multiply the numbers A and B of width no more than WIDTH bits, using temporary values whose width do not wxceed WIDTH bits.
40 | 
41 | Return two values P0 and P1 such that
42 | 
43 |     A*B = P0 + P1 << WIDTH."
44 |   ;; Split operands into half-width components.
45 |   (multiple-value-bind (a0 a1) (split-byte a width/2)
46 |     (multiple-value-bind (b0 b1) (split-byte b width/2)
47 |       ;; Compute partial products. If W = 2^WIDTH and W' = W/2, then
48 |       ;;
49 |       ;;   A   = A0 + A1*W'
50 |       ;;   B   = B0 + B1*W'
51 |       ;;
52 |       ;;   A*B = (A0 + A1*W')*(B0 + B1*W')
53 |       ;;       = A0*B0 + (A0*B1 + A1*B0)*W' + A1*B1*W
54 |       ;;
55 |       ;; Each of these sub-A*B products are of width WIDTH, and are
56 |       ;; broken into half-width components as above, except for the
57 |       ;; product C3 = A1*B1.
58 |       (multiple-value-bind (c0-lo c0-hi) (split-byte (* a0 b0) width/2)
59 |         (multiple-value-bind (c1a-lo c1a-hi) (split-byte (* a0 b1) width/2)
60 |           (multiple-value-bind (c1b-lo c1b-hi) (split-byte (* a1 b0) width/2)
61 |             (let ((c3 (* a1 b1)))
62 |               ;; Compute columns and carries as in longhand
63 |               ;; multiplication. Each column tracks WIDTH/2 bits.
64 |               ;;
65 |               ;; Column 0   = C0-LO
66 |               ;; Column 1   = C0-HI + C1A-LO + C1B-LO
67 |               ;; Column 2,3 = C1A-HI + C1B-HI + C3 + COL1-CARRY
68 |               (multiple-value-bind (col11 col1-carry1) (fixed-width-add c1a-lo c1b-lo width/2)
69 |                 (multiple-value-bind (col1 col1-carry2) (fixed-width-add col11 c0-hi width/2)
70 |                   (let ((col1-carry (+ col1-carry1 col1-carry2)))
71 |                     (values (join-bytes c0-lo col1 width/2)
72 |                             (+ c1a-hi
73 |                                c1b-hi
74 |                                c3
75 |                                col1-carry))))))))))))
76 | 


--------------------------------------------------------------------------------
/src/logging.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; logging.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2022 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | (defun dbg (control &rest args)
 8 |   (when (boundp '*hypergeometrica-log-stream*)
 9 |     (fresh-line *hypergeometrica-log-stream*)
10 |     (format *hypergeometrica-log-stream* control args)
11 |     (terpri *hypergeometrica-log-stream*)
12 |     nil))
13 | 


--------------------------------------------------------------------------------
/src/math-utilities.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; math-utilities.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | (defmacro do-range ((var from to &optional result) &body body)
 8 |   (alexandria:once-only (from to)
 9 |     `(do ((,var ,from (1+ ,var)))
10 |          ((= ,var ,to) ,result)
11 |        ,@body)))
12 | 
13 | (defun power-of-two-p (n)
14 |   "Is N a power-of-two?"
15 |   (and (plusp n)
16 |        (zerop (logand n (1- n)))))
17 | 
18 | (defun next-power-of-two (n)
19 |   "Find the minimum K such that N <= 2^K."
20 |   (if (power-of-two-p n)
21 |       (1- (integer-length n))
22 |       (integer-length n)))
23 | 
24 | (defun least-power-of-two->= (n)
25 |   "What is the least power-of-two greater than or equal to N?"
26 |   (if (power-of-two-p n)
27 |       n
28 |       (ash 1 (integer-length n))))
29 | 
30 | (declaim (inline lg))
31 | (defun lg (n)
32 |   (1- (integer-length n)))
33 | 
34 | (declaim (inline 2^))
35 | (defun 2^ (n)
36 |   (expt 2 n))
37 | 
38 | (defun count-trailing-zeroes (n)
39 |   "Count the number of trailing zeros in the binary representation of the positive integer N."
40 |   (assert (plusp n))
41 |   (loop :for z :from 0
42 |         :for x := n :then (ash x -1)
43 |         :while (evenp x)
44 |         :finally (return z)))
45 | 
46 | (defun coprimep (a b)
47 |   "Are the integers A and B coprime?"
48 |   (= 1 (gcd a b)))
49 | 
50 | (defun pairwise-coprimep (seq)
51 |   "Are all integers in the sequence SEQ pairwise coprime?"
52 |   (etypecase seq
53 |     (list
54 |      (loop :for m1 :on seq :do
55 |        (loop :for m2 :in (rest m1) :do
56 |          (unless (coprimep (first m1) m2)
57 |            (return-from pairwise-coprimep nil)))))
58 |     (vector
59 |      (let ((len (length seq)))
60 |        (loop :for i :below len :do
61 |          (loop :for j :from (1+ i) :below len :do
62 |            (unless (coprimep (aref seq i) (aref seq j))
63 |              (return-from pairwise-coprimep nil)))))))
64 |   ;; Otherwise, everything must be coprime.
65 |   t)
66 | 


--------------------------------------------------------------------------------
/src/mmap.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; mmap.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2021 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | (defvar *mmaps* (tg:make-weak-hash-table :test 'eq :weakness ':key))
 8 | 
 9 | (defstruct (mmap-data (:copier nil)
10 |                       (:constructor mmap-data (pointer fd size)))
11 |   (pointer nil :read-only t)
12 |   (fd      nil :read-only t)
13 |   (size    nil :read-only t)
14 |   ;; XXX: maybe keep the path here too...
15 |   (unmapped nil))
16 | 
17 | (defmethod print-object ((o mmap-data) stream)
18 |   (print-unreadable-object (o stream :type t :identity nil)
19 |     (format stream "~D byte~:P" (mmap-data-size o))))
20 | 
21 | (defun mmap (path size)
22 |   ;; XXX: should we create the file first instead of relying on MMAP?
23 |   (let ((data (multiple-value-call #'mmap-data
24 |                 (mmap:mmap path :open '(:read :write :create :file-sync)
25 |                                 :protection '(:read :write)
26 |                                 :mmap '(:shared)
27 |                                 :size size))))
28 |     (unless (<= size (mmap-data-size data))
29 |       (munmap data)
30 |       (error "bad mmap"))
31 |     (madvise-random data)               ; We'll try this out...
32 |     (setf (gethash data *mmaps*) (get-universal-time))
33 |     (values data path)))
34 | 
35 | (defun munmap (data)
36 |   (check-type data mmap-data)
37 |   (unless (mmap-data-unmapped data)
38 |     (mmap:munmap (mmap-data-pointer data)
39 |                  (mmap-data-fd data)
40 |                  (mmap-data-size data))
41 |     (setf (mmap-data-unmapped data) t)
42 |     (remhash data *mmaps*)
43 |     nil))
44 | 
45 | (defun madvise-normal (data)
46 |   (check-type data mmap-data)
47 |   #+linux
48 |   (mmap:madvise (mmap-data-pointer data)
49 |                 (mmap-data-size data)
50 |                 ':normal)
51 |   nil)
52 | 
53 | (defun madvise-random (data)
54 |   (check-type data mmap-data)
55 |   #+linux
56 |   (mmap:madvise (mmap-data-pointer data)
57 |                 (mmap-data-size data)
58 |                 ':random)
59 |   nil)
60 | 
61 | (defun madvise-sequential (data)
62 |   (check-type data mmap-data)
63 |   #+linux
64 |   (mmap:madvise (mmap-data-pointer data)
65 |                 (mmap-data-size data)
66 |                 ':sequential)
67 |   nil)
68 | 


--------------------------------------------------------------------------------
/src/modular-arithmetic.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; modular-arithmetic.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2019 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | ;;;;;;;;;;;;;;;;;;;;;;;;; Modular Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;
  8 | 
  9 | ;;; These two variables are for efficiency reasons only. They're
 10 | ;;; changeable at compile-time, and affect the moduli that are found
 11 | ;;; in #'DEFAULT-MODULI.
 12 | (defconstant +modulus-bits+ 63
 13 |   "The number of bits that each modulus must have.")
 14 | (defconstant +lg-modulus+ (1- +modulus-bits+))
 15 | 
 16 | 
 17 | (defconstant $max-modulus (1- $base))
 18 | (deftype modulus ()
 19 |   `(integer 2 ,$max-modulus))
 20 | 
 21 | ;; These are NOTINLINE'd below.
 22 | (declaim (ftype (function (digit digit modulus) digit) m+ m- m*))
 23 | (declaim (inline  m+ m- m* m/ m1+ m1- negate-mod inv-mod expt-mod))
 24 | 
 25 | (defun m- (a b m)
 26 |   "Compute A - B (mod M).
 27 | 
 28 | Assumes 0 <= A,B < M."
 29 |   (declare (optimize speed (safety 0) (debug 0) (space 0)))
 30 |   #+hypergeometrica-paranoid
 31 |   (assert (and (<= 0 a (1- m))
 32 |                (<= 0 b (1- m))))
 33 |   (if (< a b)
 34 |       (the digit (+ (the digit (- m b)) a))
 35 |       (the digit (- a b))))
 36 | 
 37 | (defun m+ (a b m)
 38 |   "Compute A + B (mod M).
 39 | 
 40 | Assumes 0 <= A,B < M."
 41 |   (declare (optimize speed (safety 0) (debug 0) (space 0))
 42 |            (inline m-))
 43 |   #+hypergeometrica-paranoid
 44 |   (assert (and (<= 0 a (1- m))
 45 |                (<= 0 b (1- m))))
 46 |   (if (zerop b)
 47 |       a
 48 |       (m- a (the digit (- m b)) m)))
 49 | 
 50 | (defun m1+ (a m)
 51 |   "Increment A modulo M.
 52 | 
 53 | Assumes 0 <= A < M."
 54 |   #+hypergeometrica-paranoid
 55 |   (<= 0 a (1- m))
 56 |   (let ((a (1+ a)))
 57 |     (if (= a m)
 58 |         0
 59 |         a)))
 60 | 
 61 | (defun m1- (a m)
 62 |   "Decrement A modulo M.
 63 | 
 64 | Assumes 0 <= A < M."
 65 |   #+hypergeometrica-paranoid
 66 |   (<= 0 a (1- m))
 67 |   (if (zerop a)
 68 |       (1- m)
 69 |       (1- a)))
 70 | 
 71 | (defun negate-mod (a m)
 72 |   "Negate A modulo M.
 73 | 
 74 | Assumes 0 <= A < M."
 75 |   #+hypergeometrica-paranoid
 76 |   (<= 0 a (1- m))
 77 |   (if (zerop a)
 78 |       0
 79 |       (- m a)))
 80 | 
 81 | (defun m* (a b m)
 82 |   (declare (type modulus m)
 83 |            (type digit a b))
 84 |   #+sbcl
 85 |   (multiple-value-bind (lo hi) (mul128 a b)
 86 |     (the digit (nth-value 1 (div128 lo hi m))))
 87 |   #-sbcl
 88 |   (mod (* a b) m))
 89 | 
 90 | (defun inv-mod (x m)
 91 |   "Compute X^-1 (mod M)."
 92 |   (labels ((egcd (x b a u)
 93 |              (if (zerop x)
 94 |                  (if (= 1 b)
 95 |                      (mod a m)
 96 |                      (error "~D is not invertible in Z/~DZ" x m)) ; X divides M
 97 |                  (multiple-value-bind (q r) (floor b x)
 98 |                    (egcd r x u (- a (* u q)))))))
 99 |     (egcd x m 0 1)))
100 | 
101 | (defun inv-mod/unsafe (x m)
102 |   "Compute X^-1 (mod M). Assumes X is invertible."
103 |   (declare (type digit x)
104 |            (type modulus m)
105 |            (optimize speed (safety 0) (debug 0) (space 0)))
106 |   (labels ((egcd (x b a u)
107 |              (declare (type digit b a u x))
108 |              (if (zerop x)
109 |                  (mod a m)
110 |                  (multiple-value-bind (q r) (floor b x)
111 |                    (egcd r x u (the digit (- a (the digit (* u q)))))))))
112 |     (egcd x m 0 1)))
113 | 
114 | (defun m/ (a b m)
115 |   "Compute A / B = A * B^-1 (mod M)."
116 |   (m* a (inv-mod b m) m))
117 | 
118 | (defun expt-mod (a n m)
119 |   "Compute A ^ N (mod M) for integer N."
120 |   (declare (type digit a)
121 |            (type alexandria:non-negative-fixnum n)
122 |            (type modulus m)
123 |            (inline m*)
124 |            (optimize speed (safety 0) (debug 0) (space 0)))
125 |   (let ((result 1))
126 |     (declare (type digit result))
127 |     (loop
128 |       (when (oddp n)
129 |         (setf result (m* result a m)))
130 |       (setf n (floor n 2))
131 |       (when (zerop n)
132 |         (return-from expt-mod result))
133 |       (setf a (m* a a m)))))
134 | 
135 | (declaim (inline expt-mod/2^n))
136 | (defun expt-mod/2^n (a n m)
137 |   "Compute A ^ (2 ^ N) (mod M) for integer N."
138 |   (declare (type digit a)
139 |            (type alexandria:non-negative-fixnum n)
140 |            (type modulus m)
141 |            (inline m*)
142 |            (optimize speed (safety 0) (debug 0) (space 0)))
143 |   (dotimes (i n a)
144 |     (setf a (m* a a m))))
145 | 
146 | (defun expt-mod/safe (a n m)
147 |   "Compute A ^ N (mod M) for integer N."
148 |   (when (minusp n)
149 |     (setf a (inv-mod a m)
150 |           n (- n)))
151 | 
152 |   (let ((result 1))
153 |     (loop
154 |       (when (oddp n)
155 |         (setf result (mod (* result a) m)))
156 |       (setf n (floor n 2))
157 |       (when (zerop n)
158 |         (return-from expt-mod/safe result))
159 |       (setf a (mod (* a a) m)))))
160 | 
161 | (declaim (notinline m+ m- m* m/ m1+ m1- negate-mod inv-mod expt-mod))
162 | 
163 | ;;; Mega-Fast Multiplication
164 | 
165 | ;;; Has a precondition that  < 2^(64 + min-modulus-length)
166 | (declaim (inline mod128/fast m*/fast))
167 | (defun mod128/fast (lo hi m m-inv)
168 |   "Reduce
169 | 
170 |     lo + hi*2^64 (mod m)
171 | 
172 | using m and its inverse m-inv."
173 |   (declare (type (unsigned-byte 64) lo hi m m-inv)
174 |            (optimize speed (safety 0) debug (space 0)))
175 |   #+hypergeometrica-paranoid
176 |   (assert (< (+ lo (ash hi 64)) (ash 1 (+ 64 +lg-modulus+))))
177 |   (let* ((a1 (dpb (ldb (byte +lg-modulus+ 0) hi)
178 |                   (byte +lg-modulus+ (- 64 +lg-modulus+))
179 |                   (ash lo (- +lg-modulus+))))
180 |          (q (nth-value 1 (mul128 a1 m-inv))))
181 |     ;; r = r - q*m - m*2
182 |     (multiple-value-bind (slo shi) (mul128 q m)
183 |       (multiple-value-setq (lo hi) (sub128 lo hi slo shi))
184 |       ;; Note that 2*M will always fit in 64 bits.
185 |       (multiple-value-setq (lo hi) (sub128 lo hi (fx* 2 m) 0)))
186 | 
187 |     (multiple-value-setq (lo hi) (add128 lo hi (logand m (ub64/2 hi)) 0))
188 |     (fx+ lo (logand m hi))))
189 | 
190 | (defun m*/fast (a b m m-inv)
191 |   "Reduce
192 | 
193 |     a*b (mod m)
194 | 
195 | using m and its inverse m-inv."
196 |   (declare (type (unsigned-byte 64) a b m m-inv)
197 |            (optimize speed (safety 0) (debug 0) (space 0)))
198 |   #+hypergeometrica-paranoid
199 |   (assert (and (<= 0 a (1- m))
200 |                (<= 0 b (1- m))))
201 |   (multiple-value-bind (lo hi) (mul128 a b)
202 |     (mod128/fast lo hi m m-inv)))
203 | 
204 | ;;; B and its companion B-INV is intended to be calculated once, which
205 | ;;; affords you fast and easy modular arithmetic.
206 | (declaim (inline calc-b-inv))
207 | (defun calc-b-inv (b m)
208 |   (nth-value 0 (div128 0 b m)))
209 | 
210 | (declaim (inline m*/fast2-unreduced m*/fast2))
211 | (defun m*/fast2-unreduced (b b-inv a m)
212 |   (declare (type (unsigned-byte 64) a b m b-inv)
213 |            (optimize speed (safety 0) (debug 0) (space 0)))
214 |   #+hypergeometrica-paranoid
215 |   (assert (<= 0 b (1- m)))
216 |   (let* ((q (nth-value 1 (mul128 a b-inv))))
217 |     (declare (type (unsigned-byte 64) q))
218 |     (fx- (fx* a b) (fx* q m))))
219 | 
220 | (defun m*/fast2 (b b-inv a m)
221 |   (declare (type (unsigned-byte 64) a b m b-inv)
222 |            (optimize speed (safety 0) (debug 0) (space 0)))
223 |   (let ((r (m*/fast2-unreduced b b-inv a m)))
224 |     (declare (type (unsigned-byte 64) r))
225 |     (when (>= r m)
226 |       (decf r m))
227 |     
228 |     #+hypergeometrica-paranoid
229 |     (assert (= (mod r m) (mod (* a b) m)))
230 |     
231 |     r))
232 | 
233 | ;;; Garner's Algorithm to compute CRT
234 | 
235 | (defun garner (moduli values)
236 |   "Given a vector of moduli MODULI = m1, ..., mk and a vector of values VALUES = v1, ... vk, compute the value of w such that
237 | 
238 |     w = v1 (mod m1)
239 |     w = v2 (mod m2)
240 |     ...
241 |     w = vk (mod mk).
242 | 
243 | The return value w will be represented in a mixed-radix representation (w1, ..., wk) such that
244 | 
245 |     w = w1 + w2*(m1) + w3*(m1*m2) + ... + wk(m1*m2*...*m(k-1)).
246 | 
247 | W will be a vector.
248 | "
249 |   #+hypergeometrica-paranoid
250 |   (progn
251 |     ;; Same number of moduli and values
252 |     (assert (= (length moduli) (length values)))
253 |     ;; Each thing is a modulus
254 |     (assert (every (lambda (m) (typep m 'modulus)) moduli))
255 |     ;; Each thing is less than the modulus (not strictly required)
256 |     (assert (every (lambda (x m) (and (not (minusp x)) (< x m))) values moduli))
257 |     ;; Moduli must be pairwise coprime
258 |     (assert (pairwise-coprimep moduli)))
259 | 
260 |   (let* ((k (length moduli))
261 |          (x (make-array k)))
262 |     (labels ((M (i)   (aref moduli i))
263 |              (R (i j) (inv-mod (M i) (M j))))
264 |       (replace x values)                ; Initialize x[i]
265 |       (dotimes (i k x)
266 |         (dotimes (j i)
267 |           (setf (aref x i) (m* (R j i) (m- (aref x i) (aref x j) (M i)) (M i))))))))
268 | 
269 | ;;; used for testing
270 | (defun to-congruence-relations (moduli integer)
271 |   (let* ((k (length moduli))
272 |          (values (make-array k)))
273 |     (loop :for i :below k
274 |           :for m :across moduli
275 |           :do (setf (aref values i) (mod integer m)))
276 |     values))
277 | 
278 | ;;; used for testing
279 | (defun reconstruct-from-garner (moduli mixed-radix)
280 |   (assert (= (length moduli) (length mixed-radix)))
281 |   (loop :with ans := (aref mixed-radix 0)
282 |         :with prod := 1
283 |         :for i :from 1 :below (length moduli)
284 |         :for m :across moduli
285 |         :do (setf prod (* prod m)
286 |                   ans (+ ans (* prod (aref mixed-radix i))))
287 |         :finally (return ans)))
288 | 


--------------------------------------------------------------------------------
/src/mpd-reciprocal.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; mpd-reciprocal.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2022-2023 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | (defun estimate-reciprocal (a)
 8 |   (assert (not (mpz-zerop (mantissa a))))
 9 |   (let ((m (mantissa a))
10 |         (e (exponent a)))
11 |     (let* ((size (mpz-size m))
12 |            (msb (with-vec (m m_)
13 |                   (m_ (1- size)))))
14 |       ;; 1/(a*2^e)
15 |       ;; = (1/a)2^(-e)
16 |       ;; ~ sign(a) * (1/msb(|a|) * 1/2^(len(a))) * 2^(-e)
17 |       ;; ~ sign(a) * 1/msb(|a|) * 2^(-n-len(a))
18 |       (let ((recip-mantissa-estimate
19 |               (integer-mpz (round $base msb) 'mpz/ram)))
20 |         (when (mpz-minusp m)
21 |           (mpz-negate! recip-mantissa-estimate))
22 |         (make-instance 'mpd
23 |           :mantissa recip-mantissa-estimate
24 |           :exponent (+ (- e)
25 |                        (- size)))))))
26 | 
27 | (defun mpd-reciprocal (a bits)
28 |   ;; Newton's method on f(x) = 1/x - a.
29 |   ;;
30 |   ;; Ordinarily the iteration is:
31 |   ;;
32 |   ;;     x(n+1) = 2*x(n) - a*x(n)^2
33 |   ;;
34 |   ;; We rewrite it by introducing a term:
35 |   ;;
36 |   ;;     h(n) = 1 - a*x(n)
37 |   ;;
38 |   ;;     x(n+1) = x(n) + x(n)*h(n).
39 |   ;;
40 |   ;; Since h(n) tends to zero as n tends to infinity, it serves as a
41 |   ;; good estimate for the accuracy of x(n), allowing us to
42 |   ;; confidently terminate the iteration.
43 |   (loop :with digits := (max 1 (ceiling bits $digit-bits))
44 |         :with x := (estimate-reciprocal a)
45 |         :for iter-number :from 1
46 |         :do (let ((h (mpd-- (integer-mpd 1)
47 |                             (mpd-* a x))))
48 |               (multiple-value-bind (zeros zero?) (mpd-oom h)
49 |                 (setf zeros (- zeros))  ; Want a positive number.
50 |                 (cond
51 |                   ;; Did we converge exactly?
52 |                   (zero?
53 |                    (loop-finish))
54 |                   ;; Have we exceeded the desired precision?
55 |                   ((>= zeros digits)
56 |                    ;; TODO: Truncate?
57 |                    (loop-finish))
58 |                   ;; We haven't reached out desired precision. We'll
59 |                   ;; need to do another iteration.
60 |                   (t
61 |                    (when *verbose*
62 |                      (format t "~&MPD-RECIPROCAL: iter=~D : ~D/~D digit~:P, h = ~A, est size = ~A~%"
63 |                              iter-number
64 |                              zeros
65 |                              digits
66 |                              (mpd-mpfr h)
67 |                              (vec-digit-length (storage (mantissa x)))))
68 |                    
69 |                    ;; TODO: Should we truncate?
70 |                   
71 |                    ;; Perform the next iteration on x.
72 |                    (setf x (mpd-+ x (mpd-* x h)))))))
73 |         :finally (return x)))
74 | 
75 | 
76 | (defun test-recip (digits &optional (v 3))
77 |   (let ((bits (ceiling (* digits (log 10.0d0 2.0d0)))))
78 |     (sb-mpfr:set-precision (max (+ 8 bits) 250))
79 |     (let ((x (mpd-reciprocal (integer-mpd v) bits)))
80 |       (format t "~D digit~:P 1/~D = ~A~%" digits v (mpd-mpfr x)))))
81 | 
82 | 


--------------------------------------------------------------------------------
/src/mpd-sqrt.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; mpd-sqrt.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2022-2023 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | (defun estimate-inv-sqrt (a)
 8 |   (let ((m (mantissa a))
 9 |         (e (exponent a)))
10 |     (assert (not (mpz-minusp m)))
11 |     (let* ((size (mpz-size m))
12 |            (msb (with-vec (m m_)
13 |                   (m_ (1- size)))))
14 |       ;; 1/sqrt(a*B^e)
15 |       ;; = 1/sqrt(a) * 1/sqrt(B^e)
16 |       ;; = 1/sqrt(a) * B^(-e/2)
17 |       ;; ~ 1/sqrt(msb(a) * B^(len(a) - 1)) * B^(-e/2)
18 |       ;; = 1/sqrt(msb(a)) * 1/B^((len(a)-1)/2) * B^(-e/2)
19 |       ;; = 1/sqrt(msb(a)) * B^([1 - len(a) - e]/2)
20 |       ;; = B/sqrt(msb(a)) * B^([-1 - len(a) - e]/2)
21 |       ;;
22 |       ;; we make sure the B^*
23 |       ;; exponent is even so that we
24 |       ;; can divide by two
25 |       ;; perfectly.
26 |       (let* ((double-new-exp (+ -1 (- size) (- e)))
27 |              (inv-sqrt-mantissa-estimate
28 |                (if (oddp double-new-exp)
29 |                    (integer-mpz (round $base (sqrt (* $base msb))) 'mpz/ram)
30 |                    (integer-mpz (round $base (sqrt msb)) 'mpz/ram))))
31 |         (when (oddp double-new-exp)
32 |           (incf double-new-exp))
33 |         (assert (evenp double-new-exp))
34 |         (when (mpz-minusp m)
35 |           (mpz-negate! inv-sqrt-mantissa-estimate))
36 |         (make-instance 'mpd
37 |           :mantissa inv-sqrt-mantissa-estimate
38 |           :exponent (/ double-new-exp 2))))))
39 | 
40 | (defun mpd-inv-sqrt (a bits)
41 |   ;; Newton's iteration for f(x) = 1/x^2 - a.
42 |   ;;
43 |   ;; This is ordinarily
44 |   ;;
45 |   ;;     x(n+1) = (3*x(n) - A*x(n)^3)/2
46 |   ;;
47 |   ;; but as with the reciprocal formula, we write it
48 |   ;;
49 |   ;;     h(n) = 1 - a*x(n)^2
50 |   ;;
51 |   ;;     x(n+1) = [2*x(n) + x(n)*h(n)]/2
52 |   ;;
53 |   ;; and estimate the error by the cancellation of h(n).
54 |   (loop :with digits := (max 1 (ceiling bits $digit-bits))
55 |         :with x := (estimate-inv-sqrt a)
56 |         :for iter-number :from 1
57 |         :do (let ((h (mpd-- (integer-mpd 1)
58 |                             (mpd-* a (mpd-* x x)))))
59 |               (multiple-value-bind (zeros zero?) (mpd-oom h)
60 |                 (setf zeros (- zeros))  ; Want a positive number.
61 |                 (cond
62 |                   ;; Did we converge exactly?
63 |                   (zero?
64 |                    (loop-finish))
65 |                   ;; Have we exceeded the desired precision?
66 |                   ((>= zeros digits)
67 |                    ;; TODO: Truncate?
68 |                    (loop-finish))
69 |                   ;; We haven't reached out desired precision. We'll
70 |                   ;; need to do another iteration.
71 |                   (t
72 |                    (when *verbose*
73 |                      (format t "~&MPD-INV-SQRT: iter=~D : ~D/~D digit~:P, h = ~A, est size = ~A~%"
74 |                              iter-number
75 |                              zeros
76 |                              digits
77 |                              (mpd-mpfr h)
78 |                              (vec-digit-length (storage (mantissa x)))))
79 | 
80 |                    ;; TODO: Should we truncate?
81 |                    
82 |                    ;; Perform the next iteration.
83 |                    (setf x (mpd-+
84 |                             (mpd-+ x x)
85 |                             (mpd-* x h)))
86 |                    (mpd-half! x)))))
87 |         :finally (return x)))
88 | 
89 | (defun mpd-sqrt (a bits)
90 |   (mpd-* a (mpd-inv-sqrt a bits)))
91 | 
92 | (defun test-sqrt (digits &optional (v 2))
93 |   (let ((bits (ceiling (* digits (log 10.0d0 2.0d0)))))
94 |     (sb-mpfr:set-precision (max (+ 8 bits) 250))
95 |     (let ((x (mpd-sqrt (integer-mpd v) bits)))
96 |       (format t "~D digit~:P sqrt(~D) = ~A~%" digits v (mpd-mpfr x)))))
97 | 
98 | 


--------------------------------------------------------------------------------
/src/mpd.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; mpd.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2022 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | (defclass mpd ()
  8 |   ((mantissa :initarg :mantissa
  9 |              :accessor mantissa)
 10 |    (exponent :initarg :exponent
 11 |              :accessor exponent))
 12 |   (:documentation "The number MANTISSA * $BASE^EXPONENT. There are no assumptions about the sizes or lengths of these quantities."))
 13 | 
 14 | (defun mpz-mpd (mpz)
 15 |   (make-instance 'mpd
 16 |     :mantissa mpz
 17 |     :exponent 0))
 18 | 
 19 | (defun mpd-zerop (mpd)
 20 |   "The MPD equal to zero?"
 21 |   (mpz-zerop (mantissa mpd)))
 22 | 
 23 | (defun mpd-oom (mpd)
 24 |   ;; OOM = Order of Magnitude
 25 |   ;;
 26 |   ;; This is defined as:
 27 |   ;;
 28 |   ;;            { -n   if x = 0.000[n digit zeros]00y
 29 |   ;;   oom(x) = {  
 30 |   ;;            {  0   otherwise
 31 |   ;;
 32 |   ;; Second value is a boolean indicating whether the number is zero.
 33 |   ;;
 34 |   ;; TODO FIXME: Is this correct?
 35 |   (cond
 36 |     ((mpd-zerop mpd)
 37 |      (values 0 t))
 38 |     (t
 39 |      (let ((s (mpz-size (mantissa mpd)))
 40 |            (e (exponent mpd)))
 41 |        (values (min 0 (+ s e)) nil)))))
 42 | 
 43 | (defmethod sign ((a mpd))
 44 |   (sign (mantissa a)))
 45 | 
 46 | (defun integer-mpd (n &optional (mantissa-class 'mpz/ram))
 47 |   (mpz-mpd (integer-mpz n mantissa-class)))
 48 | 
 49 | (defun mpd-rational (mpd)
 50 |   (* (mpz-integer (mantissa mpd))
 51 |      (expt $base (exponent mpd))))
 52 | 
 53 | (defun mpd-mpfr (x)
 54 |   (sb-mpfr:coerce (mpd-rational x) 'sb-mpfr:mpfr-float))
 55 | 
 56 | (defun mpd-truncate! (mpd &key to-digits to-bits)
 57 |   "Truncate the MPD to have at least TO-DIGITS digits or TO-BITS bits."
 58 |   (let* ((m (mantissa mpd))
 59 |          (s (mpz-size m))
 60 |          (digits
 61 |            (cond
 62 |              (to-digits to-digits)
 63 |              (to-bits (ceiling to-bits $digit-bits))
 64 |              (t (error "need to specify TO-DIGITS or TO-BITS")))))
 65 |     (unless (>= digits s)
 66 |       (let ((d (- s digits)))
 67 |         (left-displace-vec (storage m) d)
 68 |         (resize-vec-by (storage m) (- d))
 69 |         (incf (exponent mpd) d)
 70 |         nil))))
 71 | 
 72 | (defun mpd-negate (mpd)
 73 |   (make-instance 'mpd 
 74 |     :mantissa (mpz-negate (mantissa mpd))
 75 |     :exponent (exponent mpd)))
 76 | 
 77 | (defun mpd-half! (a)
 78 |   (check-type a mpd)
 79 |   ;; XXX: This assumes $base is divisible by 2.
 80 |   (mpz-multiply-by-digit! (/ $base 2) (mantissa a))
 81 |   (decf (exponent a)))
 82 | 
 83 | (defun mpd-* (a b)
 84 |   (make-instance 'mpd
 85 |     :mantissa (mpz-* (mantissa a)
 86 |                      (mantissa b))
 87 |     :exponent (+ (exponent a)
 88 |                  (exponent b))))
 89 | 
 90 | (defun mpd-+ (a b)
 91 |   (let ((a (mantissa a))
 92 |         (m (exponent a))
 93 |         (b (mantissa b))
 94 |         (n (exponent b)))
 95 |     (let ((min (min m n)))
 96 |       (make-instance 'mpd
 97 |         :mantissa (mpz-+ (mpz-left-shift a (* $digit-bits (- m min)))
 98 |                          (mpz-left-shift b (* $digit-bits (- n min))))
 99 |         :exponent min))))
100 | 
101 | (defun mpd-- (a b)
102 |   (mpd-+ a (mpd-negate b)))
103 | 
104 | (defun mpd-integer-part (mpd)
105 |   (let* ((e (exponent mpd))
106 |          (m (mantissa mpd))
107 |          (m-size (mpz-size m)))
108 |     (cond
109 |       ((not (plusp (+ m-size e)))
110 |        (integer-mpz 0 'mpz/ram))
111 |       ((zerop e)
112 |        m)
113 |       ((minusp e)
114 |        (mpz-right-shift m (* $digit-bits (- e))))
115 |       (t
116 |        (mpz-left-shift m (* $digit-bits e))))))
117 | 
118 | 


--------------------------------------------------------------------------------
/src/mpz-protocol.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; mpz-protocol.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2022 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | ;;; PRINT-OBJECT is a required protocol method.
 8 | 
 9 | (defgeneric mpz-integer (mpz)
10 |   (:documentation "Convert an MPZ to a Common Lisp integer."))
11 | 
12 | (defgeneric integer-mpz (n class-name)
13 |   (:documentation "Convert the Common Lisp integer N to an MPZ-like object of class CLASS-NAME."))
14 | 
15 | (defgeneric mpz-size (mpz)
16 |   (:documentation "The number of digits required to mathematically represent MPZ. If the MPZ is 0, then this is 0."))
17 | 
18 | (defgeneric mpz-bit-size (mpz)
19 |   (:documentation "The number of bits required to mathematically represent MPZ in two's complement."))
20 | 
21 | (defun mpz-integer-length (mpz)
22 |   (mpz-bit-size mpz))
23 | 
24 | (defgeneric optimize-mpz (mpz)
25 |   (:documentation "Optimize the storage used to represent the MPZ."))
26 | 
27 | (defgeneric mpz-digit (mpz n)
28 |   (:documentation "Get the Nth least-significant digit from MPZ."))
29 | 
30 | (defgeneric mpz-set-zero! (mpz)
31 |   (:documentation "Mutate the MPZ so that it is equal to zero."))
32 | 
33 | (defgeneric mpz-zerop (mpz)
34 |   (:documentation "Is MPZ zero?"))
35 | 
36 | (defgeneric mpz-plusp (mpz)
37 |   (:documentation "Is MPZ positive?"))
38 | 
39 | (defgeneric mpz-minusp (mpz)
40 |   (:documentation "Is MPZ negative?"))
41 | 
42 | (defgeneric mpz-abs (mpz)
43 |   (:documentation "Return the absolute value of MPZ. Shares storage."))
44 | 
45 | (defgeneric mpz-negate (mpz)
46 |   (:documentation "Return the negative of MPZ."))
47 | 
48 | (defgeneric mpz-negate! (mpz)
49 |   (:documentation "Mutate MPZ so that it is negative."))
50 | 
51 | (defgeneric mpz-= (a b)
52 |   (:documentation "Are the MPZs A and B mathematically equal?"))
53 | 
54 | (defun mpz-/= (a b)
55 |   (not (mpz-= a b)))
56 | 
57 | (defgeneric mpz-> (a b)
58 |   (:documentation "Is the MPZ A greater than the MPZ B?"))
59 | 
60 | (defun mpz->= (a b)
61 |   (or (mpz-= a b)
62 |       (mpz-> a b)))
63 | 
64 | (defun mpz-< (a b)
65 |   (not (mpz->= a b)))
66 | 
67 | (defun mpz-<= (a b)
68 |   (not (mpz-> a b)))
69 | 
70 | (defgeneric mpz-+ (a b))
71 | (defgeneric mpz-1+ (a))
72 | (defgeneric mpz-- (a b))
73 | (defgeneric mpz-1- (a))
74 | (defgeneric mpz-* (a b))
75 | (defgeneric mpz-left-shift (a n))
76 | (defgeneric mpz-right-shift (a n))
77 | (defgeneric mpz-multiply-by-digit! (d mpz))
78 | (defgeneric mpz-multiply-by-s64! (d mpz))
79 | (defgeneric mpz-debug (mpz &optional stream))
80 | 


--------------------------------------------------------------------------------
/src/mpz-string.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; mpz-string.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2022 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | ;;; ALGORITHM Out
  8 | ;;;
  9 | ;;; INPUT: integer A of N digits
 10 | ;;;
 11 | ;;; OUTPUT: string S in base B
 12 | ;;;
 13 | ;;; if A < B
 14 | ;;;     return char(A)
 15 | ;;; else
 16 | ;;;     find k such that B^(2k-2) <= A < B^(2k)
 17 | ;;;     Q, R = divrem(A, B^k)
 18 | ;;;     r = Out(R)
 19 | ;;;     return Out(Q) ++ ["0" * (k - len(r))] ++ r
 20 | 
 21 | 
 22 | 
 23 | (defgeneric write-number (x stream))
 24 | 
 25 | (defmethod write-number ((x integer) stream)
 26 |   (format stream "~D" x))
 27 | 
 28 | (defmethod write-number ((x mpz/ram) stream)
 29 |   (cond
 30 |     ((mpz-minusp x)
 31 |      (write-string "-" stream)
 32 |      (write-number (mpz-abs x) stream)
 33 |      nil)
 34 |     ((mpz-zerop x)
 35 |      (write-string "0" stream)
 36 |      nil)
 37 |     ((= 1 (mpz-size x))
 38 |      (write-number (mpz-digit x 0) stream)
 39 |      nil)
 40 |     (t
 41 |      (%write-big-number x stream)
 42 |      nil)))
 43 | 
 44 | (defun %write-big-number (x stream)
 45 |   ;;(format t "~& x = ~D~%" (mpz-integer x))
 46 |   (cond
 47 |     ((mpz-zerop x)
 48 |      (write-char #\0 stream))
 49 |     ((and (= 1 (mpz-size x))
 50 |           (< (mpz-digit x 0) $largest-power-of-10))
 51 |      (write-number (mpz-digit x 0) stream))
 52 |     (t
 53 |      (let* ((l (biggest-power-of-10<= x))
 54 |             (k (floor (+ l 2) 2)))
 55 |        (multiple-value-bind (quo rem)
 56 |            (mpz-divrem x (mpz-expt (integer-mpz 10 'mpz/ram) k))
 57 |          ;;(format t "quo = ~D~%" (mpz-integer quo))
 58 |          ;;(format t "rem = ~D~%" (mpz-integer rem))
 59 |          (let ((r (with-output-to-string (s)
 60 |                     (%write-big-number rem s))))
 61 |            (%write-big-number quo stream)
 62 |            (loop :repeat (- k (length r)) :do (write-char #\0 stream))
 63 |            (write-string r stream)))))))
 64 | 
 65 | (defun biggest-power-of-10<= (x)
 66 |   #+hypergeometrica-safe
 67 |   (assert (not (mpz-zerop x)))
 68 |   (let* ((bits (mpz-bit-size x))
 69 |          ;; This division is possibly an *OVER* estimate.
 70 |          (est (floor (/ (max 0 (1- bits)) (log 10.0d0 2.0d0))))
 71 |          (10^n (mpz-expt (integer-mpz 10 'mpz/ram) est)))
 72 |     #+hypergeometrica-paranoid
 73 |     (assert (mpz-<= 10^n x)
 74 |         ()
 75 |         "expected ~D <= ~D"
 76 |         (mpz-integer 10^n)
 77 |         (mpz-integer x))
 78 | 
 79 |     (loop :do
 80 |       (mpz-multiply-by-digit! 10 10^n)
 81 |       (incf est)
 82 |       (when (mpz-> 10^n x)
 83 |         (decf est)
 84 |         (loop-finish)))
 85 |     
 86 |     est))
 87 | 
 88 | (defun largest10 (n)
 89 |   (loop :with i := 0
 90 |         :while (plusp n)
 91 |         :do (setf n (floor n 10))
 92 |             (incf i)
 93 |         :finally (return (1- i))))
 94 | 
 95 | (defun largest10* (bits)
 96 |   (floor (/ bits (log 10.0d0 2.0d0))))
 97 | 
 98 | (defun pow-2-10 (n)
 99 |   (loop :for i :below n :do
100 |     (format t "2^~A > 10^~A = ~A = ~A?~%"
101 |             i
102 |             (largest10 (expt 2 i))
103 |             (largest10* i)
104 |             (biggest-power-of-10<= (integer-mpz (expt 2 i) 'mpz/ram))
105 |             ))
106 |   )
107 | 


--------------------------------------------------------------------------------
/src/multiply.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; multiply.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | ;;; Multiplication Driver
 8 | 
 9 | (defparameter *ntt-multiply-threshold* 100
10 |   "Up to how many digits can the smaller number of a multiplication have before NTT multiplication is used?")
11 | 
12 | (defmethod mpz-* ((x mpz/ram) (y mpz/ram))
13 |   (optimize-mpz x)
14 |   (optimize-mpz y)
15 |   (when (< (mpz-size x) (mpz-size y))
16 |     (rotatef x y))
17 | 
18 |   ;; Now the size of X is guaranteed greater-or-equal Y.
19 |   (optimize-mpz
20 |    (cond
21 |      ((mpz-zerop y)
22 |       (integer-mpz 0 'mpz/ram))
23 |      ((= 1 (mpz-size y))
24 |       (let ((d (vec-ref (storage y) 0)))
25 |         (cond
26 |           ((= 1 d)
27 |            (if (= -1 (sign y))
28 |                (mpz-negate x)
29 |                x))
30 |           (t
31 |            (let ((r (make-instance 'mpz/ram
32 |                       :sign (* (sign x) (sign y))
33 |                       :storage (make-storage (+ 2 (mpz-size x))))))
34 |              (vec-replace/unsafe (storage r) (storage x))
35 |              (mpz-multiply-by-digit! d r)
36 |              r)))))
37 |      ((<= (mpz-size y) *ntt-multiply-threshold*)
38 |       (make-instance 'mpz/ram
39 |         :sign (* (sign x) (sign y))
40 |         :storage (%multiply-storage/schoolboy
41 |                   (storage x) (mpz-size x)
42 |                   (storage y) (mpz-size y))))
43 |      ((eq x y)
44 |       (mpz-square x))
45 |      #+hypergeometrica-floating-point
46 |      ((< (least-power-of-two->= (+ 1 (mpz-size x) (mpz-size y))) +fft-length-limit+)
47 |       (mpz-*/fft x y))
48 |      (t
49 |       (mpz-*/ntt x y)))))
50 | 
51 | (defun f-expt (a n one multiply)
52 |   "Exponentiate A (any object) to the power of N (a non-negative integer), where ONE is multiplicative identity and MULTIPLY is the binary multiplication function."
53 |   (check-type n (integer 0))
54 |   (cond
55 |     ((zerop n) one)
56 |     ((= 1 n)   a)
57 |     (t
58 |      (let ((k (integer-length n))
59 |            (x a))
60 |        (loop :for bit :from (- k 2) :downto 0
61 |              :do (progn
62 |                    (when *verbose*
63 |                      (let* ((current-exponent (ldb (byte (- k bit) bit) n))
64 |                             (total-exponent   n)
65 |                             (percentage-complete (* 100.0d0
66 |                                                     (/ current-exponent total-exponent))))
67 |                        (format t "~&-------------------------~%")
68 |                        (format t "~&Current exponent: ~D / ~D (~6,2F%)~%"
69 |                                current-exponent
70 |                                total-exponent
71 |                                percentage-complete)))
72 |                    (setf x (funcall multiply x x))
73 |                    (when  (logbitp bit n)
74 |                      (setf x (funcall multiply x a))))
75 |              :finally (return x))))))
76 | 
77 | (defun mpz-expt (a n)
78 |   "Raise an MPZ A to the power of a non-negative integer N."
79 |   (f-expt a n (integer-mpz 1 (class-name (class-of a))) #'mpz-*))
80 | 


--------------------------------------------------------------------------------
/src/ntt-multiply.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; ntt-multiply.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2019 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | 
  8 | (global-vars:define-global-parameter **scheme** (make-modular-scheme (default-moduli)))
  9 | 
 10 | (defun add-big-digit (big-digit storage i)
 11 |   "Add the BIG-DIGIT (an UNSIGNED-BYTE) to the STORAGE (a RAW-STORAGE) beginning at the Ith digit."
 12 |   (declare (type unsigned-byte big-digit)
 13 |            (type storage storage)
 14 |            (type alexandria:array-index i)
 15 |            (optimize speed))
 16 |   (with-vec (storage storage_)
 17 |     (cond
 18 |       ((zerop big-digit) storage)
 19 |       ((>= i (vec-digit-length storage)) (error "Trying to add ~D at index ~D. ~
 20 |                                                  This is unexpected and indicates ~
 21 |                                                  a grave inconsistency."
 22 |                                                 big-digit i))
 23 |       ((typep big-digit 'fixnum)
 24 |        (multiple-value-bind (x carry) (add64 big-digit (storage_ i))
 25 |          (declare (type bit carry))
 26 |          (setf (storage_ i) x)
 27 |          (add-big-digit carry storage (1+ i))))
 28 |       ;; Specially written to not cons.
 29 |       #+sbcl
 30 |       (t (dotimes (j (sb-bignum:%bignum-length big-digit) storage)
 31 |            (let* ((offset (+ i j))
 32 |                   (si (storage_ offset))
 33 |                   (bi (sb-bignum:%bignum-ref big-digit j)))
 34 |              (declare (type alexandria:array-index offset)
 35 |                       (type (unsigned-byte 64) si bi))
 36 |              (multiple-value-bind (x carry) (add64 si bi)
 37 |                (setf (storage_ offset) x)
 38 |                (add-big-digit carry storage (1+ offset))))))
 39 |       ;; Otherwise, we gotta cons...
 40 |       #-sbcl
 41 |       (t (let ((si (storage_ i))
 42 |                (digit-lo64 (ldb (byte $digit-bits 0) big-digit)))
 43 |            (declare (type (unsigned-byte 64) si digit-lo64))
 44 |            (multiple-value-bind (x carry) (add64 si digit-lo64)
 45 |              (declare (type bit carry))
 46 |              (setf (storage_ i) x)
 47 |              (let ((quo (ash big-digit #.(- $digit-bits))))
 48 |                ;; Do QUO + CARRY in two separate steps to avoid a bignum
 49 |                ;; addition on the Lisp side.
 50 |                (add-big-digit quo storage (1+ i))
 51 |                (add-big-digit carry storage (1+ i)))))))))
 52 | 
 53 | (defun iterate (f x n)
 54 |   (assert (not (minusp n)))
 55 |   (if (zerop n)
 56 |       x
 57 |       (iterate f (funcall f x) (1- n))))
 58 | 
 59 | 
 60 | (defmacro with-rebind ((&rest vars) &body body)
 61 |   `(let ,(loop :for var :in vars :collect (list var var))
 62 |      ,@body))
 63 | 
 64 | (defmacro with-task ((&rest vars) &body work)
 65 |   `(with-rebind ,vars
 66 |      ,@work))
 67 | 
 68 | (defmacro with-parallel-work (() &body body)
 69 |   #+hypergeometrica-parallel
 70 |   (alexandria:with-gensyms (ch num-items i work vars)
 71 |     `(let ((,num-items 0)
 72 |            (,ch (lparallel:make-channel)))
 73 |        (declare (type fixnum ,num-items))
 74 |        (macrolet ((with-task ((&rest ,vars) &body ,work)
 75 |                     `(with-rebind ,,vars
 76 |                        (incf ,',num-items)
 77 |                        (lparallel:submit-task ,',ch
 78 |                                               (lambda ()
 79 |                                                 ,@,work)))))
 80 |          ,@body)
 81 |        ;; do work
 82 |        (dotimes (,i ,num-items)
 83 |          (lparallel:receive-result ,ch))))
 84 |   #-hypergeometrica-parallel
 85 |   `(progn
 86 |      ,@body))
 87 | 
 88 | (defun multiply-pointwise! (a b length scheme i)
 89 |   (declare (type storage a b)
 90 |            (type alexandria:array-length length)
 91 |            (type alexandria:array-index i)
 92 |            (type modular-scheme scheme)
 93 |            (optimize speed (safety 0) (debug 0) (space 0) (compilation-speed 0)))
 94 |   (with-vecs (a a_ b b_)
 95 |     (let ((m (aref (scheme-moduli scheme) i))
 96 |           (m~ (aref (scheme-inverses scheme) i)))
 97 |       (#+hypergeometrica-parallel lparallel:pdotimes
 98 |        #-hypergeometrica-parallel dotimes (i length)
 99 |        (setf (a_ i) (m*/fast (a_ i) (b_ i) m m~))))))
100 | 
101 | (defun mpz-square (x)
102 |   (let* ((size (mpz-size x))
103 |          (length (least-power-of-two->= (* 2 size)))
104 |          (bound-bits (integer-length (* length (expt (1- $base) 2))))
105 |          (num-moduli (num-moduli-needed-for-bits **scheme** bound-bits))
106 |          (ntts (make-ntt-work x length (scheme-moduli **scheme**)))
107 |          ;; TODO don't allocate
108 |          (result (make-storage length))
109 |          (report-time (let ((start-time (get-internal-real-time)))
110 |                         (lambda ()
111 |                           (when *verbose*
112 |                             (format t " ~D ms~%" (round (* 1000 (- (get-internal-real-time) start-time)) internal-time-units-per-second))
113 |                             (setf start-time (get-internal-real-time))
114 |                             (finish-output))))))
115 |     (when *verbose*
116 |       (format t "~&Size: ~D (approx ~D decimal~:P, ~D MiB)~%"
117 |               size
118 |               (round (* size $digit-bits)
119 |                      (log 10.0d0 2.00))
120 |               (round (/ (* (1+ num-moduli) length $digit-bits) 8 1024 1024)))
121 |       (format t "Transform length: ~D~%" length)
122 |       (format t "Convolution bits: ~D~%" bound-bits)
123 |       (format t "Moduli: ~{#x~16X~^, ~}~%" (coerce (scheme-moduli **scheme**) 'list))
124 | 
125 |       (format t "Forward..."))
126 |     (with-parallel-work ()
127 |       (loop :for i :below num-moduli
128 |             :for a :in ntts
129 |             :do (with-task (a i)
130 |                   (ntt-forward a **scheme** i))))
131 |     (funcall report-time)
132 | 
133 |     ;; Pointwise multiply
134 |     (when *verbose*
135 |       (format t "Pointwise multiply... "))
136 |     (loop :for i :below num-moduli
137 |           :for m := (aref (scheme-moduli **scheme**) i)
138 |           :for mi := (aref (scheme-inverses **scheme**) i)
139 |           :for a :in ntts
140 |           :do (with-vec (a a_)
141 |                 (#+hypergeometrica-parallel lparallel:pdotimes
142 |                  #-hypergeometrica-parallel dotimes (i length)
143 |                  (let ((ai (a_ i)))
144 |                    (setf (a_ i) (m*/fast ai ai m mi))))))
145 |     (funcall report-time)
146 | 
147 |     ;; Inverse transform
148 |     (when *verbose*
149 |       (format t "Reverse..."))
150 |     (with-parallel-work ()
151 |       (loop :for i :below num-moduli
152 |             :for a :in ntts
153 |             :do (with-task (a i)
154 |                   (ntt-reverse a **scheme** i))))
155 |     (funcall report-time)
156 | 
157 |     ;; Unpack the result.
158 |     (when *verbose*
159 |       (format t "CRT..."))
160 |     (let* ((moduli      (subseq (scheme-moduli **scheme**) 0 num-moduli))
161 |            (composite   (reduce #'* moduli))
162 |            (complements (map 'list (lambda (m) (/ composite m)) moduli))
163 |            (inverses    (map 'list #'inv-mod complements moduli))
164 |            (factors     (map 'list #'* complements inverses)))
165 |       (dotimes (i length)
166 |         (loop :for a :in ntts
167 |               :for f :in factors
168 |               ;; TODO: optimize
169 |               :sum (* f (vec-ref a i)) :into result-digit
170 |               :finally (add-big-digit (mod result-digit composite) result i)))
171 |       (funcall report-time))
172 |     (make-instance 'mpz/ram :sign 1 :storage result)))
173 | 
174 | (defun mpz-*/ntt (x y)
175 |   (let* ((size (+ (mpz-size x) (mpz-size y)))
176 |          (length (least-power-of-two->= size))
177 |          (bound-bits (integer-length (* length (expt (1- $base) 2))))
178 |          (num-moduli (num-moduli-needed-for-bits **scheme** bound-bits))
179 |          (ntts-x (make-ntt-work x length (scheme-moduli **scheme**)))
180 |          (ntts-y (make-ntt-work y length (scheme-moduli **scheme**)))
181 |          ;; By the time we write to RESULT, NTTS-Y will be done.
182 |          ;;
183 |          ;; However (!), we will need to remember to clear it.
184 |          (result (first ntts-y))
185 |          (report-time (let ((start-time (get-internal-real-time)))
186 |                         (lambda ()
187 |                           (when *verbose*
188 |                             (format t " ~D ms~%" (round (* 1000 (- (get-internal-real-time) start-time)) internal-time-units-per-second))
189 |                             (setf start-time (get-internal-real-time))
190 |                             (finish-output))))))
191 |     (when *verbose*
192 |       (format t "~&Size: ~D (approx ~D decimal~:P, ~D MiB)~%"
193 |               size
194 |               (round (* size $digit-bits)
195 |                      (log 10.0d0 2.00))
196 |               (round (/ (* size $digit-bits) 8 1024 1024)))
197 |       (format t "Transform length: ~D~%" length)
198 |       (format t "Convolution bits: ~D~%" bound-bits)
199 |       (format t "Moduli: ~{#x~16X~^, ~}~%" (coerce (scheme-moduli **scheme**) 'list))
200 | 
201 |       (format t "Forward..."))
202 |     (with-parallel-work ()
203 |       (loop :for i :below num-moduli
204 |             :for ax :in ntts-x
205 |             :for ay :in ntts-y
206 |             :do (with-task (i ax)
207 |                   (ntt-forward ax **scheme** i))
208 |             :do (with-task (i ay)
209 |                   (ntt-forward ay **scheme** i))))
210 |     (funcall report-time)
211 | 
212 |     ;; Pointwise multiply. The NTT work for X is mutated.
213 |     (when *verbose*
214 |       (format t "Pointwise multiply..."))
215 |     (with-parallel-work ()
216 |       (loop :for i :below num-moduli
217 |             :for ax :in ntts-x
218 |             :for ay :in ntts-y
219 |             :do (with-task (i ax ay)
220 |                   (multiply-pointwise! ax ay length **scheme** i))))
221 |     (funcall report-time)
222 | 
223 |     ;; Tell the garbage collector we don't need no vectors anymore.
224 |     (setf ntts-y nil)
225 |     (vec-fill result 0)
226 | 
227 |     ;; Inverse transform
228 |     (when *verbose*
229 |       (format t "Reverse..."))
230 |     (with-parallel-work ()
231 |       (loop :for i :below num-moduli
232 |             :for ax :in ntts-x
233 |             :do (with-task (i ax)
234 |                   (ntt-reverse ax **scheme** i))))
235 |     (funcall report-time)
236 | 
237 |     ;; Unpack the result.
238 |     ;;
239 |     ;; This allocates a lot, but seems to be fast in practice.
240 |     (when *verbose*
241 |       (format t "CRT..."))
242 |     (let* ((moduli      (subseq (scheme-moduli **scheme**) 0 num-moduli))
243 |            (composite   (reduce #'* moduli))
244 |            (complements (map 'list (lambda (m) (/ composite m)) moduli))
245 |            (inverses    (map 'list #'inv-mod complements moduli))
246 |            (factors     (map 'list #'* complements inverses)))
247 |       (dotimes (i length)
248 |         (loop :for a :in ntts-x
249 |               :for f :in factors
250 |               ;; TODO: optimize
251 |               :sum (* f (vec-ref a i)) :into result-digit
252 |               :finally (add-big-digit (mod result-digit composite) result i)))
253 |       (funcall report-time))
254 |     (make-instance 'mpz/ram :sign (* (sign x) (sign y)) :storage result)))
255 | 
256 | 
257 | 
258 | 


--------------------------------------------------------------------------------
/src/number-theoretic-transform.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; number-theoretic-transform.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2014-2019 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | (defun %make-ntt-work (vec length modulus)
  8 |   (declare (type storage vec)
  9 |            (type alexandria:array-length length)
 10 |            (type modulus modulus))
 11 |   #-hypergeometric-safe
 12 |   (declare (optimize speed (safety 0) (space 0)))
 13 |   #+hypergeometrica-safe
 14 |   (assert (and (>= length (vec-digit-length vec))
 15 |                (power-of-two-p length)))
 16 |   (let ((a (make-storage length)))
 17 |     (with-vecs (vec vec_ a a_)
 18 |       ;; NB. LENGTH is the total power-of-two length, not
 19 |       ;; the length of the mpz!
 20 |       (dotimes (i (vec-digit-length vec) a)
 21 |         (declare (type alexandria:array-index i))
 22 |         (setf (a_ i) (mod (vec_ i) modulus))))))
 23 | 
 24 | (defun make-ntt-work (mpz length moduli)
 25 |   (declare (type mpz/ram mpz)
 26 |            (type alexandria:array-length length)
 27 |            (type (simple-array digit (*)) moduli))
 28 |   (when *verbose*
 29 |     (format t "Allocating..."))
 30 |   (let ((start-time (get-internal-real-time)))
 31 |     (prog1 (loop :for m :of-type modulus :across moduli
 32 |                  :collect (%make-ntt-work (storage mpz) length m))
 33 |       (when *verbose*
 34 |         (format t " ~D ms~%" (round (* 1000 (- (get-internal-real-time) start-time)) internal-time-units-per-second))))))
 35 | 
 36 | ;;;;;;;;;;;;;;;;;;;;; Number-Theoretic Transform ;;;;;;;;;;;;;;;;;;;;;
 37 | 
 38 | ;;; Decimation-in-frequency algorithm.
 39 | 
 40 | (defun ntt-forward (a scheme mod-num)
 41 |   "Compute the forward number-theoretic transform of the array of integers A, with modulus M and primitive root W. If they are not provided, a suitable one will be computed.
 42 | 
 43 | M and W are extracted from the MODULAR-SCHEME SCHEME based off of their index MOD-NUM.
 44 | 
 45 | The array must have a power-of-two length.
 46 | 
 47 | The resulting array (a mutation of the input) will be in bit-reversed order."
 48 |   (declare (type storage a)
 49 |            (type modular-scheme scheme)
 50 |            (type alexandria:array-index mod-num)
 51 |            (inline m+ m- m*/fast m*/fast2 m*/fast2-unreduced)
 52 |            (optimize speed (safety 0) debug (space 0) (compilation-speed 0)))
 53 |   #+hypergeometrica-safe
 54 |   (assert (power-of-two-p (vec-digit-length a)))
 55 |   (let* ((m     (aref (scheme-moduli scheme)   mod-num))
 56 |          (m~    (aref (scheme-inverses scheme) mod-num))
 57 |          (N     (vec-digit-length a))
 58 |          (ln    (lg N))
 59 |          (roots (scheme-primitive-roots scheme)))
 60 |     (declare (type modulus m m~)
 61 |              (type alexandria:array-length N)
 62 |              (type alexandria:non-negative-fixnum ln))
 63 |     #+hypergeometrica-paranoid
 64 |     (assert (vec-every (lambda (x) (< x m)) a))
 65 |     (with-vec (a a_)
 66 |       (loop :for lsubn :from ln :downto 2 :do
 67 |         (let* ((subn (ash 1 lsubn))
 68 |                (subn/2 (floor subn 2))
 69 |                (dw (aref roots ln mod-num (- ln lsubn) 0))
 70 |                (dw~ (aref roots ln mod-num (- ln lsubn) 1))
 71 |                (w^j 1))
 72 |           (loop :for j :below subn/2 :do
 73 |             (loop :for r :from 0 :to (- n subn) :by subn :do
 74 |               (let* ((r+j (+ r j))
 75 |                      (r+j+subn/2 (+ r+j subn/2))
 76 |                      (u (a_ r+j))
 77 |                      (v (a_ r+j+subn/2)))
 78 |                 (declare (type alexandria:array-index r+j r+j+subn/2))
 79 |                 (setf (a_ r+j)        (m+ u v m)
 80 |                       (a_ r+j+subn/2) (m*/fast w^j (m- u v m) m m~))))
 81 |             (setf w^j (m*/fast2 dw dw~ w^j m)))))
 82 | 
 83 |       (when (plusp ln)
 84 |         (loop :for r :below N :by 2 :do
 85 |           (symbol-macrolet ((u (a_ r))
 86 |                             (v (a_ (1+ r))))
 87 |             (psetf u (m+ u v m)
 88 |                    v (m- u v m)))))))
 89 | 
 90 |   a)
 91 | 
 92 | ;;; Decimation-in-time algorithm.
 93 | (defun ntt-reverse (a scheme mod-num)
 94 |   "Compute the inverse number-theoretic transform of the array of integers A, with modulus M and primitive root W. If they are not provided, a suitable one will be computed.
 95 | 
 96 | M and W are extracted from the MODULAR-SCHEME SCHEME based off of their index MOD-NUM.
 97 | 
 98 | The array must have a power-of-two length.
 99 | 
100 | The input must be in bit-reversed order."
101 |   (declare (type storage a)
102 |            (type modular-scheme scheme)
103 |            (type alexandria:array-index mod-num)
104 |            (inline m+ m- m* m*/fast m*/fast2 m*/fast2-unreduced)
105 |            (optimize speed (safety 0) debug (space 0) (compilation-speed 0)))
106 |   (let* ((m     (aref (scheme-moduli scheme)   mod-num))
107 |          (m~    (aref (scheme-inverses scheme) mod-num))
108 |          (N     (vec-digit-length a))
109 |          (ldn   (lg N))
110 |          (roots (scheme-inverse-primitive-roots scheme)))
111 |     #+hypergeometrica-safe
112 |     (assert (power-of-two-p (vec-digit-length a)))
113 |     #+hypergeometrica-paranoid
114 |     (assert (vec-every (lambda (x) (< x m)) a))
115 |     (with-vec (a a_)
116 |       (let ((1/N  (aref (scheme-inverse-transform-lengths scheme) ldn mod-num 0))
117 |             (1/N~ (aref (scheme-inverse-transform-lengths scheme) ldn mod-num 1)))
118 |         #+hypergeometrica-paranoid
119 |         (assert (= 1/N (inv-mod N m)))
120 |         (when (plusp ldn)
121 |           (loop :for r :below N :by 2 :do
122 |             (symbol-macrolet ((u (a_ r))
123 |                               (v (a_ (1+ r))))
124 |               (psetf u (m*/fast2 1/N 1/N~ (m+ u v m) m)
125 |                      v (m*/fast2 1/N 1/N~ (m- u v m) m))))))
126 |       (loop :for ldm :from 2 :to ldn :do
127 |         (let* ((subn (ash 1 ldm))
128 |                (subn/2 (floor subn 2))
129 |                (dw (aref roots ldn mod-num (- ldn ldm) 0))
130 |                (dw~ (aref roots ldn mod-num (- ldn ldm) 1))
131 |                (w^j 1))
132 |           (loop :for j :below subn/2 :do
133 |             (loop :for r :from 0 :to (- n subn) :by subn :do
134 |               (let* ((r+j (+ r j))
135 |                      (r+j+subn/2 (+ r+j subn/2))
136 |                      (u (a_ r+j))
137 |                      (v (m*/fast w^j (a_ r+j+subn/2) m m~)))
138 |                 (declare (type alexandria:array-index r+j r+j+subn/2))
139 |                 (setf (a_ r+j)        (m+ u v m)
140 |                       (a_ r+j+subn/2) (m- u v m))))
141 |             (setf w^j (m*/fast2 dw dw~ w^j m)))))))
142 | 
143 |   a)
144 | 
145 | 
146 | ;;;;;;;;;;;;;;;;;;;; Reference DIF FFT algorithm ;;;;;;;;;;;;;;;;;;;;;
147 | 
148 | (defun dif-forward (a)
149 |   "Compute the radix-2 decimation-in-frequency FFT of the complex vector A.
150 | 
151 | The vector must have a power-of-two length."
152 |   (let* ((N   (length a))
153 |          (ldn (1- (integer-length N))))
154 |     (loop :for ldm :from ldn :downto 2 :do
155 |       (let* ((m (ash 1 ldm))
156 |              (m/2 (floor m 2)))
157 |         (loop :for j :below m/2
158 |               :for w^j := (cis (/ (* 2 pi j) m)) :do
159 |                 (loop :for r :from 0 :to (- n m) :by m :do
160 |                   (let* ((r+j (+ r j))
161 |                          (r+j+m/2 (+ r+j m/2))
162 |                          (u (aref a r+j))
163 |                          (v (aref a r+j+m/2)))
164 |                     (setf (aref a r+j)     (+ u v)
165 |                           (aref a r+j+m/2) (* w^j (- u v))))))))
166 | 
167 |     (loop :for r :below N :by 2 :do
168 |       (psetf (aref a r)      (+ (aref a r) (aref a (1+ r)))
169 |              (aref a (1+ r)) (- (aref a r) (aref a (1+ r))))))
170 | 
171 |   a)
172 | 
173 | (defun dif-reverse (a)
174 |   "Compute the radix-2 decimation-in-frequency inverse FFT of the complex vector A.
175 | 
176 | The vector must have a power-of-two length."
177 |   (let* ((N   (length a))
178 |          (ldn (1- (integer-length N))))
179 |     (loop :for ldm :from ldn :downto 2 :do
180 |       (let* ((m (ash 1 ldm))
181 |              (m/2 (floor m 2)))
182 |         (loop :for j :below m/2
183 |               :for w^j := (cis (/ (* -2 pi j) m)) :do
184 |                 (loop :for r :from 0 :to (- n m) :by m :do
185 |                   (let* ((r+j (+ r j))
186 |                          (r+j+m/2 (+ r+j m/2))
187 |                          (u (aref a r+j))
188 |                          (v (aref a r+j+m/2)))
189 |                     (setf (aref a r+j)     (+ u v)
190 |                           (aref a r+j+m/2) (* w^j (- u v))))))))
191 | 
192 |     (loop :for r :below N :by 2 :do
193 |       (psetf (aref a r)      (/ (+ (aref a r) (aref a (1+ r))) N)
194 |              (aref a (1+ r)) (/ (- (aref a r) (aref a (1+ r))) N))))
195 | 
196 |   a)
197 | 
198 | (defun dit-forward (a)
199 |   "Compute the radix-2 decimation-in-time FFT of the complex vector A.
200 | 
201 | The vector must have a power-of-two length."
202 |   (let* ((N   (length a))
203 |          (ldn (1- (integer-length N))))
204 |     (loop :for r :below N :by 2 :do
205 |       (psetf (aref a r)      (+ (aref a r) (aref a (1+ r)))
206 |              (aref a (1+ r)) (- (aref a r) (aref a (1+ r)))))
207 |     (loop :for ldm :from 2 :to ldn :do
208 |       (let* ((m (ash 1 ldm))
209 |              (m/2 (floor m 2)))
210 |         (loop :for j :below m/2
211 |               :for w^j := (cis (/ (* 2 pi j) m)) :do
212 |                 (loop :for r :from 0 :to (- n m) :by m :do
213 |                   (let* ((r+j (+ r j))
214 |                          (r+j+m/2 (+ r+j m/2))
215 |                          (u (aref a r+j))
216 |                          (v (* w^j (aref a r+j+m/2))))
217 |                     (setf (aref a r+j)     (+ u v)
218 |                           (aref a r+j+m/2) (- u v))))))))
219 | 
220 |   a)
221 | 
222 | (defun dit-reverse (a)
223 |   "Compute the radix-2 decimation-in-time inverse FFT of the complex vector A.
224 | 
225 | Input must be in bit-reversed order.
226 | 
227 | The vector must have a power-of-two length."
228 |   (let* ((N   (length a))
229 |          (ldn (1- (integer-length N))))
230 |     (loop :for r :below N :by 2 :do
231 |       (psetf (aref a r)      (/ (+ (aref a r) (aref a (1+ r))) N)
232 |              (aref a (1+ r)) (/ (- (aref a r) (aref a (1+ r))) N)))
233 |     (loop :for ldm :from 2 :to ldn :do
234 |       (let* ((m (ash 1 ldm))
235 |              (m/2 (floor m 2)))
236 |         (loop :for j :below m/2
237 |               :for w^j := (cis (/ (* -2 pi j) m)) :do
238 |                 (loop :for r :from 0 :to (- n m) :by m :do
239 |                   (let* ((r+j (+ r j))
240 |                          (r+j+m/2 (+ r+j m/2))
241 |                          (u (aref a r+j))
242 |                          (v (* w^j (aref a r+j+m/2))))
243 |                     (setf (aref a r+j)     (+ u v)
244 |                           (aref a r+j+m/2) (- u v))))))))
245 | 
246 |   a)
247 | 


--------------------------------------------------------------------------------
/src/package.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; package.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (defpackage #:hypergeometrica
 6 |   (:use #:cl)
 7 |   ;; digit.lisp
 8 |   (:export
 9 |    #:$digit-bits                        ; CONSTANT
10 |    #:$base                              ; CONSTANT
11 |    #:$max-digit                         ; CONSTANT
12 |    #:$digit-ones                        ; CONSTANT
13 |    #:$largest-power-of-10-exponent      ; CONSTANT
14 |    #:$largest-power-of-10               ; CONSTANT
15 | 
16 |    #:digit                              ; TYPE
17 |    #:sign                               ; TYPE
18 | 
19 |    #:bytes-for-digits                   ; FUNCTION
20 | 
21 |    ;; wraparound arithmetic in radix $BASE
22 |    #:fx+                                ; FUNCTION
23 |    #:fx-                                ; FUNCTION
24 |    #:fx1+                               ; FUNCTION
25 |    #:fx1-                               ; FUNCTION
26 |    #:fxneg                              ; FUNCTION
27 |    #:fx*                                ; FUNCTION
28 |    #:fx/                                ; FUNCTION
29 | 
30 |    #:ub64/2                             ; FUNCTION
31 |    #:add64                              ; FUNCTION
32 |    #:mul128                             ; FUNCTION
33 |    #:div128                             ; FUNCTION
34 |    #:add128                             ; FUNCTION
35 |    #:sub128                             ; FUNCTION
36 | 
37 |    #:complement-digit                   ; FUNCTION
38 |    )
39 | 
40 |   ;; modular-arithmetic.lisp
41 |   (:export
42 |    #:m+                                 ; FUNCTION
43 |    #:m-                                 ; FUNCTION
44 |    #:m1+                                ; FUNCTION
45 |    #:m1-                                ; FUNCTION
46 |    #:negate-mod                         ; FUNCTION
47 |    #:inv-mod                            ; FUNCTION
48 |    #:inv-mod/unsafe                     ; FUNCTION
49 |    #:m/                                 ; FUNCTION
50 |    #:expt-mod                           ; FUNCTION
51 |    #:expt-mod/2^n                       ; FUNCTION
52 |    #:expt-mod/safe                      ; FUNCTION
53 |    #:mod128/fast                        ; FUNCTION
54 |    #:m*/fast                            ; FUNCTION
55 |    #:garner                             ; FUNCTION
56 |    )
57 | 
58 |   ;; vec.lisp
59 |   (:export
60 |    #:vec-digit-pointer                  ; GENERIC
61 |    #:vec-digit-length                   ; GENERIC
62 |    #:copy-vec                           ; GENERIC
63 |    #:resize-vec-by                      ; GENERIC
64 |    #:free-vec                           ; GENERIC
65 |    #:vec-ref                            ; GENERIC
66 | 
67 |    #:with-vec                           ; MACRO
68 |    #:with-vecs                          ; MACRO
69 |    #:vec->vector                        ; FUNCTION
70 |    #:do-digits                          ; MACRO
71 | 
72 |    #:vec=                               ; FUNCTION
73 |    #:vec-compare                        ; FUNCTION
74 |    #:vec-fill                           ; FUNCTION
75 |    #:vec-every                          ; FUNCTION
76 |    #:vec-into                           ; FUNCTION
77 |    #:vec-replace/unsafe                 ; FUNCTION
78 | 
79 |    #:vec-leading-zeros                  ; FUNCTION
80 |    #:vec-trailing-zeros                 ; FUNCTION
81 |    #:left-displace-vec                  ; FUNCTION
82 |    #:vec-digit-length*                  ; FUNCTION
83 |    )
84 | 
85 |   ;; ram-vec.lisp
86 |   (:export
87 |    #:ram-vec                            ; CLASS
88 |    #:make-ram-vec                       ; FUNCTION
89 |    #:sequence->ram-vec                  ; FUNCTION
90 |    )
91 | 
92 |   ;; disk-vec.lisp
93 |   (:export
94 |    #:disk-vec                           ; CLASS
95 |    #:make-disk-vec                      ; FUNCTION
96 |    #:make-disk-vec-from-file            ; FUNCTION
97 |    )
98 |   )
99 | 


--------------------------------------------------------------------------------
/src/pi.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; pi.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2023 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | ;;;; Chudnovsky's Series for pi
 8 | ;;;;
 9 | ;;;; We actually calculate pi/10 so that the result is in [0,1).
10 | 
11 | (defconstant +chud-decimals-per-term+ (log 151931373056000 10d0))
12 | (defconstant +chud-bits-per-term+ (log 151931373056000 2d0))
13 | (defconstant +chud-a+ 13591409)
14 | (defconstant +chud-b+ 545140134)
15 | (defconstant +chud-c+ 640320)
16 | 
17 | ;; We compute pi/10 instead of pi so the result is in [0,1). Change
18 | ;; this constant to 1 if you want True Pi.
19 | ;;
20 | ;; Note that it is 10 and not 1/10 since the series of Chudnovsky
21 | ;; calculates 1/pi.
22 | (defconstant +chud-prefactor+ 10)
23 | 
24 | (defun make-chudnovsky-series ()
25 |   (flet ((a (n)
26 |            (+ #.(* +chud-a+ (numerator +chud-prefactor+))
27 |               (* n #.(* +chud-b+ (numerator +chud-prefactor+)))))
28 |          (p (n)
29 |            (if (zerop n)
30 |                1
31 |                ;; This is Horner's form of
32 |                ;; -(6n - 5)*(2n - 1)*(6n - 1)
33 |                (+ 5 (* n (+ -46 (* n (+ 108 (* n -72))))))))
34 |          (q (n)
35 |            (if (zerop n)
36 |                1
37 |                (* (expt n 3)
38 |                   #.(/ (expt +chud-c+ 3) 24)))))
39 |     (make-series :a (alexandria:compose #'int #'a)
40 |                  :b (constantly (int #.(* (denominator +chud-prefactor+)
41 |                                           (/ (* 8 +chud-c+) 12))))
42 |                  :p (alexandria:compose #'int #'p)
43 |                  :q (alexandria:compose #'int #'q))))
44 | 
45 | (defun compute-pi/chudnovsky (prec)
46 |   (let* ((num-terms (floor (+ 2 (/ prec +chud-decimals-per-term+))))
47 |          ;; √640320 = 8√10005
48 |          (sqrt-c    (isqrt (* 10005 (expt 100 prec))))
49 |          (comp      (binary-split (make-chudnovsky-series) 0 num-terms)))
50 |     (values (floor (* sqrt-c (mpz-integer (partial-denominator comp)))
51 |                    (mpz-integer (partial-numerator comp))))))
52 | 
53 | (defun mpd-pi (prec-bits)
54 |   (let* ((guard-bits (+ prec-bits $digit-bits))
55 |          (num-terms (+ 2 (floor guard-bits +chud-bits-per-term+)))
56 |          ;; intermediate steps:
57 |          comp sqrt recip final)
58 |     (with-stopwatch (tim :log t)
59 |       (format t "~2&terms = ~A~%" num-terms)
60 |       (setf comp (binary-split (make-chudnovsky-series) 0 num-terms))
61 |       (tim "split")
62 | 
63 |       (setf sqrt (mpd-sqrt (integer-mpd 10005) guard-bits))
64 |       (setf final sqrt)
65 |       (tim "sqrt")
66 | 
67 |       (setf recip (mpd-reciprocal (mpz-mpd (partial-numerator comp)) guard-bits))
68 |       (setf final (mpd-* final recip))
69 |       (tim "recip")
70 | 
71 |       (setf final (mpd-* final (mpz-mpd (partial-denominator comp))))
72 |       ;;(mpd-truncate! final :to-bits prec-bits) ; prec, not guard!
73 |       (tim "final"))
74 |     final))
75 | 


--------------------------------------------------------------------------------
/src/ram-vec.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; ram-vec.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2021 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | ;;; Some C stuff first...
  8 | 
  9 | (cffi:defcfun malloc :pointer
 10 |   (n-bytes :size))
 11 | 
 12 | (cffi:defcfun realloc :pointer
 13 |   (src     :pointer)
 14 |   (n-bytes :size))
 15 | 
 16 | (cffi:defcfun calloc :pointer
 17 |   (num-elts :size)
 18 |   (elt-size :size))
 19 | 
 20 | (cffi:defcfun free :void
 21 |   (ptr :pointer))
 22 | 
 23 | (cffi:defcfun memcpy :pointer
 24 |   (dest      :pointer)
 25 |   (src       :pointer)
 26 |   (num-bytes :size))
 27 | 
 28 | 
 29 | ;;; Now the RAM-VEC implementation.
 30 | 
 31 | (defclass ram-vec ()
 32 |   ((allocated-size :initarg :allocated-size
 33 |                    :reader ram-vec.allocated-size
 34 |                    :documentation "The allocation size in bytes.")
 35 |    (base-pointer   :initarg :base-pointer
 36 |                    :reader ram-vec.base-pointer
 37 |                    :reader vec-digit-pointer
 38 |                    :documentation "The base pointer that was allocated. This is a pointer that would be able to be freed.")
 39 |    ;; TODO: This can be derived from ALLOCATED-SIZE
 40 |    (length         :initarg :length
 41 |                    :reader vec-digit-length
 42 |                    :documentation "The number of digits this VEC holds.")
 43 |    (finalizer-cons :initarg :finalizer-cons
 44 |                    :reader ram-vec.finalizer-cons
 45 |                    :documentation "A CONS whose CAR will be read in order to free the memory of the RAM-VEC. This needed indirection is used in case the BASE-POINTER changes."))
 46 |   (:documentation "Digits allocated in RAM."))
 47 | 
 48 | (defmethod print-object ((vec ram-vec) stream)
 49 |   (print-unreadable-object (vec stream :type t :identity t)
 50 |     (format stream "~D digit~:P" (vec-digit-length vec))))
 51 | 
 52 | (defun alloc (num-bytes)
 53 |   "Allocated NUM-BYTES bytes, initialized to zero."
 54 |   #+hypergeometrica-safe
 55 |   (check-type num-bytes alexandria:array-length)
 56 |   (let ((pointer (calloc num-bytes 1)))
 57 |     (when (cffi:null-pointer-p pointer)
 58 |       (error "CALLOC failed."))
 59 |     pointer))
 60 | 
 61 | (defun uninitialized-alloc (num-bytes)
 62 |   "Allocated NUM-BYTES bytes."
 63 |   #+hypergeometrica-safe
 64 |   (check-type num-bytes alexandria:array-length)
 65 |   (let ((pointer (malloc num-bytes)))
 66 |     (when (cffi:null-pointer-p pointer)
 67 |       (error "MALLOC failed"))
 68 |     pointer))
 69 | 
 70 | (defun resize-alloc (pointer old-size-bytes new-size-bytes)
 71 |   "Re-allocate POINTER with NUM-BYTES bytes."
 72 |   #+hypergeometrica-safe
 73 |   (check-type old-size-bytes alexandria:array-length)
 74 |   #+hypergeometrica-safe
 75 |   (check-type new-size-bytes alexandria:array-length)
 76 |   (let ((new-pointer (realloc pointer new-size-bytes)))
 77 |     (cond
 78 |       ((cffi:null-pointer-p new-pointer)
 79 |        (setf new-pointer (uninitialized-alloc new-size-bytes))
 80 |        (memcpy new-pointer pointer (min old-size-bytes new-size-bytes))
 81 |        new-pointer)
 82 |       (t
 83 |        new-pointer))))
 84 | 
 85 | (defun make-ram-vec (n)
 86 |   "Make a new RAM-VEC of N digits, initialized to zero."
 87 |   (check-type n alexandria:array-length)
 88 |   (let* ((num-bytes (bytes-for-digits n))
 89 |          (pointer (alloc num-bytes))
 90 |          (finalizer-cons (cons pointer nil))
 91 |          (vec (make-instance 'ram-vec :allocated-size num-bytes
 92 |                                       :base-pointer pointer
 93 |                                       :length n
 94 |                                       :finalizer-cons finalizer-cons)))
 95 |     (when *auto-free-vecs*
 96 |       (tg:finalize vec (lambda () (free (car finalizer-cons)))))
 97 |     vec))
 98 | 
 99 | (defmethod copy-vec ((vec ram-vec))
100 |   (let* ((bytes (ram-vec.allocated-size vec))
101 |          (pointer (uninitialized-alloc bytes))
102 |          (finalizer-cons (cons pointer nil))
103 |          (copy (make-instance 'ram-vec :allocated-size bytes
104 |                                        :base-pointer pointer
105 |                                        :length (vec-digit-length vec)
106 |                                        :finalizer-cons finalizer-cons)))
107 |     (memcpy pointer (ram-vec.base-pointer vec) bytes)
108 |     (when *auto-free-vecs*
109 |       (tg:finalize copy (lambda () (free (car finalizer-cons)))))
110 |     copy))
111 | 
112 | (defmethod resize-vec-by ((vec ram-vec) n-digits)
113 |   (unless (zerop n-digits)
114 |     (let* ((old-length (vec-digit-length vec))
115 |            (new-length (+ n-digits old-length))
116 |            (new-allocated-size (bytes-for-digits new-length)))
117 |       #+hypergeometrica-safe
118 |       (check-type new-length unsigned-byte)
119 |       (let* ((old-pointer (ram-vec.base-pointer vec))
120 |              (new-pointer (resize-alloc old-pointer
121 |                                         (ram-vec.allocated-size vec)
122 |                                         new-allocated-size)))
123 |         (setf (slot-value vec 'length)         new-length
124 |               (slot-value vec 'allocated-size) new-allocated-size)
125 |         (unless (cffi:pointer-eq old-pointer new-pointer)
126 |           (setf (slot-value vec 'base-pointer) new-pointer)
127 |           (rplaca (ram-vec.finalizer-cons vec) new-pointer)))
128 |       ;; Fill with zeros
129 |       ;;
130 |       ;; TODO: use memset?
131 |       (when (plusp n-digits)
132 |         (with-vec (vec vec_)
133 |           (loop :for i :from old-length :below new-length
134 |                 :do (setf (vec_ i) 0))))
135 |       ;; Return nothing
136 |       nil)))
137 | 
138 | (defmethod free-vec ((vec ram-vec))
139 |   (when (plusp (ram-vec.allocated-size vec))
140 |     (free (ram-vec.base-pointer vec))
141 |     (setf (slot-value vec 'base-pointer) (cffi:null-pointer)
142 |           (slot-value vec 'allocated-size) 0
143 |           (slot-value vec 'length) 0)
144 |     (rplaca (ram-vec.finalizer-cons vec) (cffi:null-pointer))
145 |     (tg:cancel-finalization vec))
146 |   nil)
147 | 
148 | (defun sequence->ram-vec (seq)
149 |   "Convert a sequence of digits SEQ to a RAM-VEC."
150 |   (let* ((n (length seq))
151 |          (vec (make-ram-vec n)))
152 |     (with-vec (vec vec_)
153 |       (let ((i -1))
154 |         (map nil (lambda (x)
155 |                    (setf (vec_ (incf i)) x))
156 |              seq)
157 |         vec))))
158 | 


--------------------------------------------------------------------------------
/src/sbcl-intrinsics-ppc64el.lisp:
--------------------------------------------------------------------------------
1 | ;;;; sbcl-intrinsics-ppc64el.lisp
2 | ;;;;
3 | ;;;; Copyright (c) 2019 Robert Smith
4 | 
5 | (in-package #:hypergeometrica)
6 | 
7 | (in-package #:sb-vm)
8 | 


--------------------------------------------------------------------------------
/src/sbcl-intrinsics-x86-64.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; sbcl-intrinsics-x86-64.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2019 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | (in-package #:sb-vm)
  8 | 
  9 | (define-vop (hypergeometrica::%%ub64/2)
 10 |   (:translate hypergeometrica::%%ub64/2)
 11 |   (:policy :fast-safe)
 12 |   (:args (x :scs (unsigned-reg) :target r))
 13 |   (:arg-types unsigned-num)
 14 |   (:results (r :scs (unsigned-reg) :from (:argument 0)))
 15 |   (:result-types unsigned-num)
 16 |   (:generator 6
 17 |     (move r x)
 18 |     (inst sar r 1)))
 19 | 
 20 | (define-vop (hypergeometrica::%%add64)
 21 |   (:translate hypergeometrica::%%add64)
 22 |   (:policy :fast-safe)
 23 |   (:args (x :scs (unsigned-reg) :target sum)
 24 |          (y :scs (unsigned-reg unsigned-stack) :to :save))
 25 |   (:arg-types unsigned-num
 26 |               unsigned-num)
 27 |   (:results (sum   :scs (unsigned-reg) :from (:argument 0))
 28 |             (carry :scs (unsigned-reg)))
 29 |   (:result-types unsigned-num
 30 |                  unsigned-num)
 31 |   (:generator 6
 32 |     (zeroize carry)
 33 |     (move sum x)
 34 |     (inst add sum y)
 35 |     (inst set :c carry)))
 36 | 
 37 | (define-vop (hypergeometrica::%%mul128)
 38 |   (:translate hypergeometrica::%%mul128)
 39 |   (:policy :fast-safe)
 40 |   (:args (x :scs (unsigned-reg) :target rax)
 41 |          (y :scs (unsigned-reg unsigned-stack)))
 42 |   (:arg-types unsigned-num
 43 |               unsigned-num)
 44 |   (:temporary (:sc unsigned-reg :offset rax-offset :target r-lo
 45 |                :from (:argument 0) :to (:result 0))
 46 |               rax)
 47 |   (:temporary (:sc unsigned-reg :offset rdx-offset :target r-hi
 48 |                :from :eval :to (:result 1))
 49 |               rdx)
 50 |   (:results (r-lo :scs (unsigned-reg) :from (:argument 0))
 51 |             (r-hi :scs (unsigned-reg) :from (:argument 1)))
 52 |   (:result-types unsigned-num
 53 |                  unsigned-num)
 54 |   (:generator 6
 55 |     (move rax x)
 56 |     (inst mul rax y)
 57 |     (move r-lo rax)
 58 |     (move r-hi rdx)))
 59 | 
 60 | ;;; XXX: This won't properly detect overflow.
 61 | (define-vop (hypergeometrica::%%div128)
 62 |   (:translate hypergeometrica::%%div128)
 63 |   (:policy :fast-safe)
 64 |   (:args (dividend-lo :scs (unsigned-reg) :target rax)
 65 |          (dividend-hi :scs (unsigned-reg) :target rdx)
 66 |          (divisor :scs (unsigned-reg unsigned-stack)))
 67 |   (:arg-types unsigned-num
 68 |               unsigned-num
 69 |               unsigned-num)
 70 |   (:temporary (:sc unsigned-reg :offset rax-offset :target quotient
 71 |                :from (:argument 0) :to (:result 0))
 72 |               rax)
 73 |   (:temporary (:sc unsigned-reg :offset rdx-offset :target remainder
 74 |                :from (:argument 1) :to (:result 1))
 75 |               rdx)
 76 |   (:results (quotient :scs (unsigned-reg))
 77 |             (remainder :scs (unsigned-reg)))
 78 |   (:result-types unsigned-num
 79 |                  unsigned-num)
 80 |   (:generator 6
 81 |     (move rax dividend-lo)
 82 |     (move rdx dividend-hi)
 83 |     (inst div rax divisor)
 84 |     (move quotient rax)
 85 |     (move remainder rdx)))
 86 | 
 87 | (define-vop (hypergeometrica::%%add128)
 88 |   (:translate hypergeometrica::%%add128)
 89 |   (:policy :fast-safe)
 90 |   (:args (a-lo :scs (unsigned-reg) :target c-lo)
 91 |          (a-hi :scs (unsigned-reg) :target c-hi)
 92 |          (b-lo :scs (unsigned-reg))
 93 |          (b-hi :scs (unsigned-reg)))
 94 |   (:arg-types unsigned-num
 95 |               unsigned-num
 96 |               unsigned-num
 97 |               unsigned-num)
 98 |   (:results (c-lo :scs (unsigned-reg) :from (:argument 0))
 99 |             (c-hi :scs (unsigned-reg) :from (:argument 1)))
100 |   (:result-types unsigned-num
101 |                  unsigned-num)
102 |   (:generator 6
103 |     (move c-lo a-lo)
104 |     (move c-hi a-hi)
105 |     (inst add c-lo b-lo)
106 |     (inst adc c-hi b-hi)))
107 | 
108 | (define-vop (hypergeometrica::%%sub128)
109 |   (:translate hypergeometrica::%%sub128)
110 |   (:policy :fast-safe)
111 |   (:args (a-lo :scs (unsigned-reg) :target c-lo)
112 |          (a-hi :scs (unsigned-reg) :target c-hi)
113 |          (b-lo :scs (unsigned-reg))
114 |          (b-hi :scs (unsigned-reg)))
115 |   (:arg-types unsigned-num
116 |               unsigned-num
117 |               unsigned-num
118 |               unsigned-num)
119 |   (:results (c-lo :scs (unsigned-reg) :from (:argument 0))
120 |             (c-hi :scs (unsigned-reg) :from (:argument 1)))
121 |   (:result-types unsigned-num
122 |                  unsigned-num)
123 |   (:generator 6
124 |     (move c-lo a-lo)
125 |     (move c-hi a-hi)
126 |     (inst sub c-lo b-lo)
127 |     (inst sbb c-hi b-hi)))
128 | 


--------------------------------------------------------------------------------
/src/sbcl-intrinsics.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; sbcl-intrinsics.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | ;;; In this file, we just define known functions for SBCL.
 8 | 
 9 | (sb-c:defknown %%ub64/2 ((unsigned-byte 64))
10 |     (unsigned-byte 64)
11 |     (sb-c:foldable sb-c:flushable sb-c:movable)
12 |   :overwrite-fndb-silently t)
13 | 
14 | (sb-c:defknown %%add64 ((unsigned-byte 64) (unsigned-byte 64))
15 |     (values (unsigned-byte 64) bit)
16 |     (sb-c:foldable sb-c:flushable sb-c:movable)
17 |   :overwrite-fndb-silently t)
18 | 
19 | (sb-c:defknown %%add128 ((unsigned-byte 64) (unsigned-byte 64)
20 |                          (unsigned-byte 64) (unsigned-byte 64))
21 |     (values (unsigned-byte 64) (unsigned-byte 64))
22 |     (sb-c:foldable sb-c:flushable sb-c:movable)
23 |   :overwrite-fndb-silently t)
24 | 
25 | (sb-c:defknown %%sub128 ((unsigned-byte 64) (unsigned-byte 64)
26 |                          (unsigned-byte 64) (unsigned-byte 64))
27 |     (values (unsigned-byte 64) (unsigned-byte 64))
28 |     (sb-c:foldable sb-c:flushable sb-c:movable)
29 |   :overwrite-fndb-silently t)
30 | 
31 | (sb-c:defknown %%mul128 ((unsigned-byte 64) (unsigned-byte 64))
32 |     (values (unsigned-byte 64) (unsigned-byte 64))
33 |     (sb-c:foldable sb-c:flushable sb-c:movable)
34 |   :overwrite-fndb-silently t)
35 | 
36 | (sb-c:defknown %%div128 ((unsigned-byte 64) (unsigned-byte 64) (unsigned-byte 64))
37 |     (values (unsigned-byte 64) (unsigned-byte 64))
38 |     (sb-c:foldable sb-c:flushable sb-c:movable)
39 |   :overwrite-fndb-silently t)
40 | 


--------------------------------------------------------------------------------
/src/solinas.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; solinas.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | ;;; "Generalized Mersenne Primes" by Jerome Solinas
 8 | 
 9 | (defun congruence-matrix (p)
10 |   "Given a polynomial P as a vector, generate the congruence relations of
11 | 
12 |     t^(k + deg p)
13 | 
14 | for 0 <= k < deg p. This will be a matrix M[k, i] where i is the polynomial coefficient index for relation k. (This is the matrix that Solinas produces with a linear-feedback shift register)"
15 |   (let* ((deg (1- (length p)))
16 |          (c (make-array (list deg deg) :initial-element 0)))
17 |     ;; Calculate the first congruence
18 |     (let ((pre  (aref p deg)))
19 |       (assert (= 1 pre))
20 |       (loop :for i :below deg
21 |             :do (setf (aref c 0 i) (* (/ pre) (- (aref p i))))))
22 |     ;; Multiply everything else out
23 |     (do-range (r 1 deg c)
24 |       ;; Shift all terms (degreee < DEG) by t
25 |       (do-range (i 1 deg)
26 |         (setf (aref c r i) (aref c (1- r) (1- i))))
27 |       ;; Add in the t^DEG term
28 |       (let ((coef (aref c (1- r) (1- deg))))
29 |         (dotimes (i deg)
30 |           (incf (aref c r i) (* coef (aref c 0 i))))))))
31 | 
32 | (defun mod-matrix! (mat hom)
33 |   (loop :for i :below (array-total-size mat)
34 |         :do (setf #1=(row-major-aref mat i) (funcall hom #1#))
35 |         :finally (return mat)))
36 | 
37 | (defun update-equations (p b a)
38 |   "Produce update equations from the Solinas polynomial P and variables B and A. Updates will be on B from values of A."
39 |   (check-type b symbol)
40 |   (check-type a symbol)
41 |   (check-type p vector)
42 |   (let* ((c (congruence-matrix p))
43 |          (deg (array-dimension c 0)))
44 |     `(progn
45 |        ,@(loop :for i :below deg
46 |                :for bi := `(aref ,b ,i)
47 |                :collect `(setf ,bi (+ (aref ,a ,i)
48 |                                       ,@(loop :for j :below deg
49 |                                               :for cji := (aref c j i)
50 |                                               :for aj := `(aref ,a ,(+ deg j))
51 |                                               :unless (zerop cji)
52 |                                                 :collect (cond
53 |                                                            ((= 1 cji) aj)
54 |                                                            ((= -1 cji) `(- ,aj))
55 |                                                            (t `(* ,cji ,aj))))))))))
56 | 
57 | (defun solinas-polynomial (a m k)
58 |   "Generate a Solinas polynomial from the prime a*2^m + 1 with t = 2^k."
59 |   (let* ((p (1+ (* a (2^ m))))
60 |          (bits (integer-length p)))
61 |     (check-type p (unsigned-byte 64))
62 |     (assert (primep p))
63 |     (let* ((total-groups (ceiling bits k))
64 |            (length (if (zerop (mod bits k))
65 |                        (1+ total-groups)
66 |                        total-groups))
67 |            (poly (make-array length :initial-element 0)))
68 |       ;; We always have a 2^bits term and a 1 term 
69 |       (setf (aref poly 0) 1)
70 |       (setf (aref poly (1- length)) 1)
71 |       ;; Now we need a minus term. We combine this with 'a'.
72 |       (multiple-value-bind (zero-groups slack-zeros) (floor m k)
73 |         ;; polynomial indices < ZERO-GROUPS will be zero, except the first.
74 |         ;;
75 |         ;; SLACK-ZEROS is the number of zeros before A is considered
76 |         (let ((az (ash a slack-zeros))
77 |               (nontrivial-groups (- total-groups zero-groups)))
78 |           ;; Now AZ := A * 2^(SLACK) accommodates the remaining
79 |           ;; NONTRIVIAL-GROUPS. Go through all but the last one.
80 |           (loop :repeat (1- nontrivial-groups)
81 |                 :for i :from zero-groups
82 |                 :do (setf (aref poly i) (ldb (byte k 0) az))
83 |                     (setf az (ash az (- k))))
84 |           (assert (<= 0 az (1- (2^ k))))
85 |           ;; AZ contains the MSB of the original A of K bits. Now we
86 |           ;; need to make this the one negative term.
87 |           ;; 2^64 - c*2^48 = a*2^m
88 |           ;;
89 |           ;; 2^64 - a*2^m = c*2^48
90 |           ;;
91 |           ;; 2^16 - a*2^(m - 48) = c
92 |           (setf (aref poly (- length 2)) (- az (2^ k)))))
93 |       ;; return the poly
94 |       poly)))
95 | 
96 | (defun evaluate-solinas-polynomial (poly k)
97 |   (loop :for i :below (length poly)
98 |         :sum (* (aref poly i) (2^ (* k i)))))
99 | 


--------------------------------------------------------------------------------
/src/strandh-elster-reversal.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; strandh-elster-reversal.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2019 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | ;;;; This is an implementation of the Strandh-Elster bit reversal
  8 | ;;;; algorithm. It is described in the following reference:
  9 | ;;;;
 10 | ;;;;     Robert Strandh and Anne C. Elster, "A Very Fast Recursive
 11 | ;;;;     Bit-Reversal Algorithm", SIAM CSE'00: First SIAM Conference
 12 | ;;;;     on Computational Science and Engineering, Washington, D.C.,
 13 | ;;;;     Sep 21-24, 2000
 14 | ;;;;
 15 | ;;;; Since I do not have access to it, Prof. Strandh described it with
 16 | ;;;; a reference implementation in the following link:
 17 | ;;;;
 18 | ;;;;     http://metamodular.com/bit-reversal.lisp
 19 | 
 20 | 
 21 | (defmacro with-reverted-operations (bindings &body body)
 22 |   (flet ((reverted-op (form)
 23 |            (ecase (car form)
 24 |              ;; ASH is not totally reversible. (We can shift off
 25 |              ;; values that can't be recovered without saving them.)
 26 |              ((ash) `(ash ,(second form) (- ,(third form))))
 27 |              ((+) `(- ,@(rest form)))
 28 |              ((-) `(+ ,@(rest form)))
 29 |              ;; LOGIOR is not totally reversible. (We can LOGIOR 1
 30 |              ;; into 1, which which can't be recovered without saving
 31 |              ;; them.)
 32 |              ((logior) `(logxor ,@(rest form))))))
 33 |     `(progn
 34 |        ,@(loop :for (var value-form) :in bindings
 35 |                :collect `(setf ,var ,value-form))
 36 |        ,@body
 37 |        ,@(loop :for (var value-form) :in bindings
 38 |                :collect `(setf ,var ,(reverted-op value-form)))
 39 |        (values))))
 40 | 
 41 | (defmacro do-non-symmetric-bit-reversals ((i j width &optional return) &body body)
 42 |   "Call the binary function F on numbers all numbers A and B such that:
 43 | 
 44 |     * A < B;
 45 |     * The bits of B are the reversal of the bits of A;
 46 |     * A and B are N or fewer bits wide.
 47 | 
 48 | Symmetric A and B are not included, and are not needed for most bit-reversal applications."
 49 |   (alexandria:with-gensyms (n b1 b2 all greater user)
 50 |     (multiple-value-bind (body decls doc) (alexandria:parse-body body :documentation nil)
 51 |       (declare (ignore doc))
 52 |       `(let* ((,n ,width)
 53 |               (,i 0)
 54 |               (,j 0)
 55 |               (,b1 0)
 56 |               (,b2 0))
 57 |          (declare (type (integer 0 64) ,n)
 58 |                   (type (unsigned-byte 64) ,i ,j ,b1 ,b2))
 59 |          (labels
 60 |              ((,user ()
 61 |                 ,@decls
 62 |                 ,@body
 63 |                 (values))
 64 |               (,all ()
 65 |                 (declare (optimize speed (safety 0) (debug 0) (space 0) (compilation-speed 0)))
 66 |                 (if (zerop ,n)
 67 |                     (,user)
 68 |                     (with-reverted-operations ((,b1 (ash ,b1 1))
 69 |                                                (,b2 (ash ,b2 -1)))
 70 |                       (if (= ,n 1)
 71 |                           (with-reverted-operations ((,n (- ,n 1)))
 72 |                             (,all)
 73 |                             (with-reverted-operations ((,i (logior ,i ,b1))
 74 |                                                        (,j (logior ,j ,b2)))
 75 |                               (,all)))
 76 |                           (with-reverted-operations ((,n (- ,n 2)))
 77 |                             (,all)
 78 |                             
 79 |                             ;; We avoid using WITH-REVERTED-OPERATIONS here for
 80 |                             ;; efficiency reasons. If we use that macro, then
 81 |                             ;; some of the reverted operations are then
 82 |                             ;; re-applied needlessly. We coalesce those here.
 83 |                             (locally ()
 84 |                               (setf ,i (logior ,i ,b1)
 85 |                                     ,j (logior ,j ,b2))
 86 |                               (,all)
 87 |                               
 88 |                               (setf ,i (logior ,i ,b2)
 89 |                                     ,j (logior ,j ,b1))
 90 |                               (,all)
 91 |                               (setf ,i (logxor ,i ,b1)
 92 |                                     ,j (logxor ,j ,b2))
 93 | 
 94 |                               (,all)
 95 |                               (setf ,i (logxor ,i ,b2)
 96 |                                     ,j (logxor ,j ,b1)))
 97 |                             #+#:equivalent
 98 |                             (progn
 99 |                               (with-reverted-operations ((,i (logior ,i ,b1))
100 |                                                          (,j (logior ,j ,b2)))
101 |                                 (,all))
102 | 
103 |                               (with-reverted-operations ((,i (logior ,i ,b1 ,b2))
104 |                                                          (,j (logior ,j ,b1 ,b2)))
105 |                                 (,all))
106 | 
107 |                               (with-reverted-operations ((,i (logior ,i ,b2))
108 |                                                          (,j (logior ,j ,b1)))
109 |                                 (,all))))))))
110 |               (,greater ()
111 |                 (declare (optimize speed (safety 0) (debug 0) (space 0) (compilation-speed 0)))
112 |                 (with-reverted-operations ((,b1 (ash ,b1 1))
113 |                                            (,b2 (ash ,b2 -1)))
114 |                   (if (< ,n 4)
115 |                       (with-reverted-operations ((,i (logior ,i ,b1))
116 |                                                  (,j (logior ,j ,b2))
117 |                                                  (,n (- ,n 2)))
118 |                         (,all))
119 |                       (with-reverted-operations ((,n (- ,n 2)))
120 |                         (,greater)
121 |                         
122 |                         ;; We avoid using WITH-REVERTED-OPERATIONS here for
123 |                         ;; efficiency reasons. If we use that macro, then
124 |                         ;; some of the reverted operations are then
125 |                         ;; re-applied needlessly. We coalesce those here.
126 |                         (progn
127 |                           (setf ,i (logior ,i ,b1)
128 |                                 ,j (logior ,j ,b2))
129 |                           (,all)
130 |                           
131 |                           (setf ,i (logior ,i ,b2)
132 |                                 ,j (logior ,j ,b1))
133 |                           (,greater)
134 |                           
135 |                           (setf ,i (logxor ,i ,b1 ,b2)
136 |                                 ,j (logxor ,j ,b1 ,b2)))
137 |                         #+#:equivalent
138 |                         (progn
139 |                           (with-reverted-operations ((,i (logior ,i ,b1))
140 |                                                      (,j (logior ,j ,b2)))
141 |                             (,all))
142 |                           (with-reverted-operations ((,i (logior ,i ,b1 ,b2))
143 |                                                      (,j (logior ,j ,b1 ,b2)))
144 |                             (,greater))))))))
145 |            (declare (dynamic-extent (function ,greater)
146 |                                     (function ,all)
147 |                                     (function ,user))
148 |                     (optimize speed (safety 0) (debug 0) (space 0) (compilation-speed 0)))
149 |            (cond
150 |              ((= 2 ,n)
151 |               (setf ,i 1 ,j 2)
152 |               (,user))
153 |              ((= 3 ,n)
154 |               (setf ,i 1 ,j 4)
155 |               (,user)
156 |               (setf ,i 3 ,j 6)
157 |               (,user))
158 |              ((< 3 ,n)
159 |               ;; Avoid repeated reverted subtraction-by-2.
160 |               (decf ,n 2)
161 |               
162 |               ;; Prepare for first call to ,GREATER.
163 |               (setf ,b1 1
164 |                     ,b2 (ash 1 (1+ ,n))
165 |                     ,i 0
166 |                     ,j 0)
167 |               
168 |               (,greater)
169 | 
170 |               ;; Prepare for call to ALL.
171 |               (setf ,b1 1
172 |                     ,b2 (ash 1 (1+ ,n))
173 |                     ,i ,b1
174 |                     ,j ,b2)
175 |               
176 |               (,all)
177 | 
178 |               ;; Prepare for second call to ,GREATER.
179 |               (setf ,b1 1
180 |                     ,b2 (ash 1 (1+ ,n))
181 |                     ,i (logior ,b1 ,b2)
182 |                     ,j ,i)
183 |               
184 |               (,greater)))
185 |                          
186 |            ;; Return.
187 |            ,return)))))
188 | 
189 | (defun bit-reversed-permute! (x)
190 |   "Permute the simple vector X of length 2^N in bit reversed order."
191 |   (let* ((length (length x))
192 |          (bits (integer-length (max 0 (1- length)))))
193 |     ;; Check that this is a power of two.
194 |     (assert (zerop (logand length (1- length))))
195 |     (do-non-symmetric-bit-reversals (a b bits x)
196 |       (rotatef (aref x a) (aref x b)))))
197 | 
198 | (defun bit-reversed-permute-vec! (x)
199 |   "Permute the simple vector X of length 2^N in bit reversed order."
200 |   (let* ((length (vec-digit-length x))
201 |          (bits (integer-length (max 0 (1- length)))))
202 |     ;; Check that this is a power of two.
203 |     (assert (zerop (logand length (1- length))))
204 |     (with-vec (x x_)
205 |       (do-non-symmetric-bit-reversals (a b bits x)
206 |         (rotatef (x_ a) (x_ b))))))
207 | 


--------------------------------------------------------------------------------
/src/timing-utilities.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; timing-utilities.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2022 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica)
 6 | 
 7 | (defun delta-ms (start end)
 8 |   (round (* 1000 (- end start))
 9 |          internal-time-units-per-second))
10 | 
11 | (defmacro time* (&body body)
12 |   (alexandria:with-gensyms (start result end)
13 |     `(let* ((,start (get-internal-real-time))
14 |             (,result (progn ,@body))
15 |             (,end   (get-internal-real-time)))
16 |        (values ,result (delta-ms ,start ,end)))))
17 | 
18 | (defmacro time! (place &body body)
19 |   (alexandria:with-gensyms (delta result)
20 |     `(multiple-value-bind (,result ,delta)
21 |          (time* ,@body)
22 |        (setf ,place ,delta)
23 |        ,result)))
24 | 
25 | (defmacro with-stopwatch ((k &key (log '*verbose*) (stream '*standard-output*) (label "")) &body body)
26 |   (alexandria:with-gensyms (start last gstream now glabel)
27 |     `(let ((,gstream ,stream)
28 |            (,glabel ,label)
29 |            ,start ,last)
30 |        (flet ((,k (message &rest args)
31 |                 (when ,log
32 |                   (let ((,now (get-internal-real-time)))
33 |                     (fresh-line ,gstream)
34 |                     (format ,gstream "[~A~D Δ~D] "
35 |                             ,glabel
36 |                             (delta-ms ,start ,now)
37 |                             (delta-ms ,last ,now))
38 |                     (apply #'format ,gstream message args)
39 |                     (terpri ,gstream)
40 |                     (finish-output ,gstream)
41 |                     (setf ,last ,now)
42 |                     nil))))
43 |          (setf ,start (get-internal-real-time)
44 |                ,last  ,start)
45 |          ,@body))))
46 | 


--------------------------------------------------------------------------------
/src/vec.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; vec.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2021 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica)
  6 | 
  7 | ;;; This file defines a protocol for VECs. A VEC is a data structure
  8 | ;;; to hold digits. That's basically it. There's really no
  9 | ;;; mathematical meaning ascribed to them.
 10 | ;;;
 11 | ;;; All VECs are assumed to be able to be pointed to by a pointer of
 12 | ;;; some sort.
 13 | 
 14 | 
 15 | ;;; VEC Protocol
 16 | 
 17 | (declaim (ftype (function (t) cffi:foreign-pointer) vec-digit-pointer))
 18 | (defgeneric vec-digit-pointer (vec)
 19 |   (:documentation "Retrieve a pointer to readable/writable digits for VEC."))
 20 | 
 21 | (declaim (ftype (function (t) alexandria:array-length) vec-digit-length))
 22 | (defgeneric vec-digit-length (vec)
 23 |   (:documentation "Retrieve the number of digits held by VEC."))
 24 | 
 25 | (defgeneric copy-vec (vec)
 26 |   (:documentation "Produce a copy of VEC."))
 27 | 
 28 | (defgeneric resize-vec-by (vec n-digits)
 29 |   (:documentation "Change the capacity of VEC by N-DIGITS digits."))
 30 | 
 31 | (defgeneric free-vec (vec)
 32 |   (:documentation "Free all memory associated with VEC."))
 33 | 
 34 | (defgeneric vec-ref (vec i)
 35 |   (:documentation "Reference the Ith element of a vector VEC. It is advised to use WITH-VEC when possible instead of this."))
 36 | 
 37 | (defparameter *auto-free-vecs* t
 38 |   "Automatically free VECs during garbage collection?")
 39 | 
 40 | 
 41 | ;;; Convenient VEC Access & Other Pleasantries
 42 | 
 43 | (declaim (inline read-digit-pointer write-digit-pointer))
 44 | (defun read-digit-pointer (digit-pointer index)
 45 |   (declare (type alexandria:array-index index)
 46 |            (type cffi:foreign-pointer digit-pointer)
 47 |            #.*optimize-dangerously-fast*
 48 |            #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
 49 |   (ecase $digit-bits
 50 |     (8  (cffi:mem-aref digit-pointer ':uint8 index))
 51 |     (16 (cffi:mem-aref digit-pointer ':uint16 index))
 52 |     (32 (cffi:mem-aref digit-pointer ':uint32 index))
 53 |     (64 (cffi:mem-aref digit-pointer ':uint64 index))))
 54 | 
 55 | (defun write-digit-pointer (digit-pointer index value)
 56 |   (declare (type alexandria:array-index index)
 57 |            (type cffi:foreign-pointer digit-pointer)
 58 |            (type digit value)
 59 |            #.*optimize-dangerously-fast*
 60 |            #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
 61 |   (ecase $digit-bits
 62 |     (8  (setf (cffi:mem-aref digit-pointer ':uint8 index) value))
 63 |     (16 (setf (cffi:mem-aref digit-pointer ':uint16 index) value))
 64 |     (32 (setf (cffi:mem-aref digit-pointer ':uint32 index) value))
 65 |     (64 (setf (cffi:mem-aref digit-pointer ':uint64 index) value))))
 66 | 
 67 | (defmacro with-vec ((vec accessor) &body body)
 68 |   (alexandria:with-gensyms (pointer i new-digit)
 69 |     (alexandria:once-only (vec)
 70 |       `(let ((,pointer (vec-digit-pointer ,vec)))
 71 |          (labels ((,accessor (,i)
 72 |                     #+(and hypergeometrica-paranoid sbcl)
 73 |                     (declare (sb-ext:muffle-conditions sb-ext:compiler-note))
 74 |                     #+hypergeometrica-paranoid
 75 |                     (assert (<= 0 ,i (vec-digit-length ,vec)))
 76 |                     (read-digit-pointer ,pointer ,i))
 77 |                   ((setf ,accessor) (,new-digit ,i)
 78 |                     #+(and hypergeometrica-paranoid sbcl)
 79 |                     (declare (sb-ext:muffle-conditions sb-ext:compiler-note))
 80 |                     #+hypergeometrica-paranoid
 81 |                     (assert (<= 0 ,i (vec-digit-length ,vec)))
 82 |                     (write-digit-pointer ,pointer ,i ,new-digit)))
 83 |            (declare (inline ,accessor (setf ,accessor))
 84 |                     (ignorable #',accessor #'(setf ,accessor)))
 85 |            ,@body)))))
 86 | 
 87 | (defmethod vec-ref (vec i)
 88 |   (with-vec (vec vec_)
 89 |     (vec_ i)))
 90 | 
 91 | (defmacro with-vecs (vec-accessors &body body)
 92 |   (cond
 93 |     ((null vec-accessors)
 94 |      `(progn ,@body))
 95 |     ((null (rest vec-accessors))
 96 |      (error "bad WITH-VECS syntax"))
 97 |     (t
 98 |      `(with-vec (,(first vec-accessors) ,(second vec-accessors))
 99 |         (with-vecs ,(cddr vec-accessors)
100 |           ,@body)))))
101 | 
102 | (defun vec->vector (vec &optional (start 0) (end (vec-digit-length vec)))
103 |   "Convert a VEC to a Common Lisp VECTOR, starting from index START and bounded above by index END."
104 |   (assert (and (<= 0 start)
105 |                (< start end)
106 |                (<= end (vec-digit-length vec))))
107 |   (let ((vector (make-array (- end start) :element-type 'digit
108 |                                           :initial-element 0)))
109 |     (with-vec (vec vec_)
110 |       (loop :for i :of-type alexandria:array-index
111 |             :from start :below end :do
112 |               (setf (aref vector i) (vec_ i)))
113 |       vector)))
114 | 
115 | (defmacro do-digits ((i digit vec &optional result) &body body)
116 |   (alexandria:with-gensyms (accessor)
117 |     (alexandria:once-only (vec)
118 |       `(with-vec (,vec ,accessor)
119 |          (dotimes (,i (vec-digit-length ,vec) ,result)
120 |            (let* ((,digit (,accessor ,i))
121 |                   (,i ,i))
122 |              ,@body))))))
123 | 
124 | (defun %vec=-upto-unsafe (a b n)
125 |   #+hypergeometrica-paranoid
126 |   (let ((a-length (vec-digit-length a))
127 |         (b-length (vec-digit-length b)))
128 |     (assert (<= n a-length))
129 |     (assert (<= n b-length)))
130 |   (with-vecs (a a_ b b_)
131 |     (loop :for i :below n
132 |           :always (= (a_ i) (b_ i)))))
133 | 
134 | (defun vec= (a b)
135 |   "Are VECs A and B precisely the same?"
136 |   (let ((a-length (vec-digit-length a))
137 |         (b-length (vec-digit-length b)))
138 |     (and (= a-length b-length)
139 |          (%vec=-upto-unsafe a b a-length))))
140 | 
141 | (defun vec-compare (a b &key (start1 0) (start2 0) (num-elements
142 |                                                     (min (vec-digit-length a)
143 |                                                          (vec-digit-length b))))
144 |   "Compare exactly NUM-ELEMENTS elements from VECs A and B, with A starting at START1 and B starting at START2."
145 |   #+hypergeometrica-safe
146 |   (assert (not (or (minusp start1)
147 |                    (minusp start2)
148 |                    (minusp num-elements))))
149 |   #+hypergeometrica-safe
150 |   (assert (<= (- num-elements start1) (vec-digit-length a)))
151 |   #+hypergeometrica-safe
152 |   (assert (<= (- num-elements start2) (vec-digit-length b)))
153 |   (with-vecs (a a_ b b_)
154 |     (loop :repeat num-elements
155 |           :for i :from start1
156 |           :for j :from start2
157 |           :always (= (a_ i) (b_ j)))))
158 | 
159 | (defun vec-fill (vec digit &key (start 0) (end (vec-digit-length vec)))
160 |   (with-vec (vec vec_)
161 |     (loop :for i :from start :below end :do
162 |       (setf (vec_ i) digit))))
163 | 
164 | (defun vec-every (fun vec &key (start 0) (end (vec-digit-length vec)))
165 |   (with-vec (vec vec_)
166 |     (loop :for i :from start :below end
167 |           :always (funcall fun (vec_ i)))))
168 | 
169 | (defun vec-into (vec fun &key (start 0) (end (vec-digit-length vec)))
170 |   (with-vec (vec vec_)
171 |     (loop :for i :from start :below end :do
172 |       (setf (vec_ i) (funcall fun)))))
173 | 
174 | (defun vec-replace/unsafe (dst src &key (start1 0))
175 |   (let ((written-length
176 |           (min (- (vec-digit-length dst) start1)
177 |                (vec-digit-length src))))
178 |     (cond
179 |       ((minusp written-length)
180 |        (warn "Attempting VEC-REPLACE/UNSAFE with OOB offset"))
181 |       (t
182 |        (memcpy (cffi:inc-pointer (vec-digit-pointer dst) (bytes-for-digits start1))
183 |                (vec-digit-pointer src)
184 |                (bytes-for-digits written-length))))))
185 | 
186 | (defun vec-leading-zeros (vec)
187 |   "How many leading zeros does VEC have? (In ideal circumstances, there are zero leading zeros.)"
188 |   (do-digits (i digit vec (vec-digit-length vec))
189 |     (unless (zerop digit)
190 |       (return-from vec-leading-zeros i))))
191 | 
192 | (defun vec-trailing-zeros (vec)
193 |   "How many trailing zeros does VEC have? (In ideal circumstances, there are zero trailing zeros.)
194 | 
195 | See also: VEC-DIGIT-LENGTH*
196 | "
197 |   (with-vec (vec vec_)
198 |     (loop :with n := (vec-digit-length vec)
199 |           :for i :from (1- n) :downto 0
200 |           :for count :from 0
201 |           :for vi := (vec_ i)
202 |           :while (zerop vi)
203 |           :finally (return count))))
204 | 
205 | (defun left-displace-vec (vec k)
206 |   "Displace the elements of VEC to the left (toward negative indexes) by K spots."
207 |   (let ((n (vec-digit-length vec)))
208 |     (cond
209 |       ((>= k n)
210 |        (vec-fill vec 0 :start 0))
211 |       (t
212 |        (with-vec (vec vec_)
213 |          (dotimes (i (- n k))
214 |            (setf (vec_ i) (vec_ (+ i k)))))
215 |        (vec-fill vec 0 :start (- n k))))
216 |     vec))
217 | 
218 | (declaim (ftype (function (t) alexandria:array-length) vec-digit-length*))
219 | (defun vec-digit-length* (vec)
220 |   "What is the value of N such that all elements following the first N elements are zero?
221 | 
222 | This function is like VEC-DIGIT-LENGTH except it ignores trailing zeros.
223 | 
224 | For (1 0 2 0 0), N would be 3."
225 |   (with-vec (vec vec_)
226 |     (loop :for i :from (1- (vec-digit-length vec)) :downto 0
227 |           :unless (zerop (vec_ i))
228 |             :do (return (1+ i))
229 |           :finally (return 0))))
230 | 


--------------------------------------------------------------------------------
/src/xxx.lisp:
--------------------------------------------------------------------------------
 1 | (in-package :hypergeometrica)
 2 | 
 3 | (defun x^x^x/ram (x)
 4 |   (mpz-expt (integer-mpz x) (expt x x)))
 5 | 
 6 | (defun x^x^x/disk (x)
 7 |   (let ((*maximum-vector-size* 0))
 8 |     (mpz-expt (integer-mpz x) (expt x x))))
 9 | 
10 | (defun test-it (x)
11 |   (let ((*verbose* t))
12 |     (let (ram disk)
13 |       (format t "~&RAM &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&~%")
14 |       (setf ram (time (x^x^x/ram x)))
15 |       (format t "~&DISK &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&~%")
16 |       (setf disk (time (x^x^x/disk x)))
17 |       (sb-ext:gc :full t)
18 |       (mpz-= ram disk))))
19 | 
20 | 


--------------------------------------------------------------------------------
/tests/arithmetic.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; arithmetic.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | (deftest test-arith-intrinsics ()
 8 |   "Test the arithmetic intrinsics."
 9 |   (multiple-value-bind (sum carry) (h::add64 0 0)
10 |     (is (= sum   0))
11 |     (is (= carry 0)))
12 |   (multiple-value-bind (sum carry) (h::add64 1 1)
13 |     (is (= sum   2))
14 |     (is (= carry 0)))
15 |   (multiple-value-bind (sum carry) (h::add64 (1- (expt 2 64)) 0)
16 |     (is (= sum   (1- (expt 2 64))))
17 |     (is (= carry 0)))
18 |   (multiple-value-bind (sum carry) (h::add64 (1- (expt 2 64)) 1)
19 |     (is (= sum   0))
20 |     (is (= carry 1)))
21 |   (multiple-value-bind (sum carry) (h::add64 (1- (expt 2 64)) 2)
22 |     (is (= sum   1))
23 |     (is (= carry 1)))
24 |   ;; random testing
25 |   (loop :repeat 1000
26 |         :for little-r := (1+ (random 1000))
27 |         :for a64 := (1+ (random (1- (expt 2 64))))
28 |         :for b64 := (1+ (random (1- (expt 2 64))))
29 |         :for r128 := (1+ (random (1- (expt 2 128))))
30 |         :do (let ((ab (* a64 b64)))
31 |               (multiple-value-bind (lo hi) (h::mul128 a64 b64)
32 |                 (is (= lo (ldb (byte 64 0) ab)))
33 |                 (is (= hi (ldb (byte 64 64) ab))))
34 |               (multiple-value-bind (quo rem) (h::div128 (1+ (ldb (byte 64 0) ab))
35 |                                                         (ldb (byte 64 64) ab)
36 |                                                         a64)
37 |                 (is (= quo b64))
38 |                 (is (= rem 1)))
39 |               (multiple-value-bind (lo hi) (h::add128 a64 b64 b64 a64)
40 |                 (let ((p (+ (+ a64 (ash b64 64))
41 |                             (+ b64 (ash a64 64)))))
42 |                   (is (= lo (ldb (byte 64 0) p)))
43 |                   (is (= hi (ldb (byte 64 64) p)))))
44 |               ;; TODO: test SUB128
45 |               )))
46 | 
47 | (defun %test-m* (n low)
48 |   (flet ((r (&optional (high h::$base))
49 |            (+ low (random (- high low)))))
50 |     (loop :repeat n
51 |           :for m := (r h::$max-modulus)
52 |           :for a := (r m)
53 |           :for b := (r m)
54 |           :for x := (h::m* a b m)
55 |           :for y := (mod (* a b) m)
56 |           :do (is (= x y)))))
57 | 
58 | (deftest test-m* ()
59 |   ;; TODO test all mod functions
60 |   (%test-m* 10000 0)
61 |   (%test-m* 10000 1000)
62 |   (%test-m* 10000 1000000)
63 |   (%test-m* 10000 1000000000)
64 |   (%test-m* 10000 1000000000000))
65 | 


--------------------------------------------------------------------------------
/tests/debug/debug-routines.lisp:
--------------------------------------------------------------------------------
1 | (in-package #:hypergeometrica-debug)
2 | 
3 | (defun dd (mpd)
4 |   "Print out an MPD using MPFR."
5 |   (format t "~A~%" (sb-mpfr:coerce (h::mpd-rational mpd) 'sb-mpfr:mpfr-float))
6 |   mpd)
7 | 


--------------------------------------------------------------------------------
/tests/divrem.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; divrem.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2023 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | (deftest test-divrem-powers-of-two ()
 8 |   (let* ((two (h::integer-mpz 2 'h::mpz/ram))
 9 |          (h (h::mpz-expt two 100)))
10 |     (loop :for zeros :from 100 :downto 1 :do
11 |       (setf h (h::mpz-+ h (h::integer-mpz 1 'h::mpz/ram)))
12 |       ;; Check we are going in with the right number.
13 |       (is (= (h::mpz-integer h) (1+ (expt 2 zeros))))
14 |       ;; Divide by two.
15 |       (multiple-value-bind (q r)
16 |           (h::mpz-divrem h two)
17 |         (is (= 1 (h::mpz-integer r)))
18 |         (setf h q)))))
19 | 
20 | (deftest test-divrem-third ()
21 |   (let* ((three (h::integer-mpz 3 'h::mpz/ram))
22 |          (ten   (h::integer-mpz 10 'h::mpz/ram)))
23 |     (loop :for power :from 1 :to 100000 :by 10000 :do
24 |       (let ((ten (h::mpz-expt ten power)))
25 |         (multiple-value-bind (q r)
26 |             (h::mpz-divrem ten three)
27 |           (multiple-value-bind (qlisp rlisp)
28 |               (floor (expt 10 power) 3)
29 |             (is (= qlisp (h::mpz-integer q)))
30 |             (is (= rlisp (h::mpz-integer r)))))))))
31 | 


--------------------------------------------------------------------------------
/tests/moduli.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; moduli.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | (deftest test-scheme-is-sufficient ()
 8 |   (is (<= 3 (length (h::scheme-moduli h::**scheme**))))
 9 |   (is (<= 50 (h::scheme-max-transform-length h::**scheme**)))
10 |   (is (<= (+ 64 64 50) (reduce #'+ (h::scheme-moduli h::**scheme**) :key #'h::lg))))
11 | 
12 | (deftest test-finding-moduli ()
13 |   (loop :for k :from 2 :to 55
14 |         :for moduli := (h::find-suitable-moduli (expt 2 k) :count 5)
15 |         :do (is (every #'h::primep moduli))
16 |             (is (every (lambda (m)
17 |                          (<= k (nth-value 1 (h::factor-out (1- m) 2))))
18 |                        moduli))))
19 | 
20 | (deftest test-primitive-root ()
21 |   (let* ((N (expt 2 5))
22 |          (moduli (h::find-suitable-moduli N :count 100))
23 |          (generators  (mapcar #'h::find-finite-field-generator moduli))
24 |          (roots (mapcar (lambda (g m) (h::primitive-root-from-generator g N m))
25 |                         generators
26 |                         moduli)))
27 |     (is (every #'h::naive-generator-p generators moduli))
28 |     (flet ((primitive-nth-root-p (w m)
29 |              (h::naive-primitive-root-p w N m)))
30 |       (is (every #'primitive-nth-root-p roots moduli)))))
31 | 


--------------------------------------------------------------------------------
/tests/mpz.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; mpz.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | (deftest test-mpz-size ()
 8 |   (is (zerop (h::mpz-size (h::integer-mpz 0 'h::mpz/ram))))
 9 |   (is (= 1 (h::mpz-size (h::integer-mpz 1 'h::mpz/ram))))
10 |   (is (= 2 (h::mpz-size (h::integer-mpz h::$base 'h::mpz/ram)))))
11 | 
12 | (deftest test-mpz-integer-idempotence ()
13 |   (dotimes (i 100)
14 |     (is (= i (h::mpz-integer (h::integer-mpz i 'h::mpz/ram)))))
15 |   (loop :repeat 10
16 |         :for n := (* (expt -1 (random 2))
17 |                      (expt (random most-positive-fixnum) (random 10000)))
18 |         :do (is (= n (h::mpz-integer (h::integer-mpz n 'h::mpz/ram))))))
19 | 
20 | (deftest test-plus-minus-zero ()
21 |   (is (h::mpz-plusp (h::integer-mpz 1 'h::mpz/ram)))
22 |   (is (h::mpz-zerop (h::integer-mpz 0 'h::mpz/ram)))
23 |   (is (h::mpz-minusp (h::integer-mpz -1 'h::mpz/ram))))
24 | 
25 | (deftest test-mpz-mult-simple ()
26 |   (let* ((k (expt 2 128))
27 |          (n (h::integer-mpz k 'h::mpz/ram)))
28 |     (is (= k (h::mpz-integer n)))       ; Sanity check
29 |     (is (= (h::mpz-integer (h::mpz-* n n))
30 |            (* k k)))))
31 | 
32 | (deftest test-mpz-plus-minus-times-randomly ()
33 |   (flet ((r ()
34 |            (- (floor (expt 10 100) 2) (random (* 2 (expt 10 100))))))
35 |     (loop :repeat 10
36 |           :for a := (r)
37 |           :for za := (h::integer-mpz a 'h::mpz/ram)
38 |           :for b := (r)
39 |           :for zb := (h::integer-mpz b 'h::mpz/ram)
40 |           :do (is (= (+ a b) (h::mpz-integer (h::mpz-+ za zb))))
41 |               (is (= (- a b) (h::mpz-integer (h::mpz-- za zb))))
42 |               (is (= (* a a) (h::mpz-integer (h::mpz-square za))))
43 |               (let ((ab (* a b)))
44 |                 (let ((h::*ntt-multiply-threshold* most-positive-fixnum))
45 |                   (is (= ab (h::mpz-integer (h::mpz-* za zb)))))
46 |                 (let ((h::*ntt-multiply-threshold* 0))
47 |                   (is (= ab (h::mpz-integer (h::mpz-* za zb)))))))))
48 | 
49 | (deftest test-s64*mpz ()
50 |   (flet ((test (a b)
51 |            (= (* a b)
52 |               (let ((x (h::integer-mpz b 'h::mpz/ram)))
53 |                 (h::mpz-multiply-by-s64! a x)
54 |                 (h::mpz-integer x)))))
55 |     (is (test 0 0))
56 |     (is (test 1 1))
57 |     (is (test 1 most-positive-fixnum))
58 |     (is (test -2 2))
59 |     (is (test 2 -2))
60 |     (is (test most-positive-fixnum 1))
61 |     (is (test most-positive-fixnum most-positive-fixnum))
62 |     (is (test 1 (expt most-positive-fixnum 3)))
63 |     (is (test 2 (expt most-positive-fixnum 3)))
64 |     (is (test most-positive-fixnum (expt most-positive-fixnum 3)))
65 |     (is (test (- (random most-positive-fixnum) (floor most-positive-fixnum 2))
66 |               (expt (+ most-positive-fixnum (random 31337)) (+ 2 (random 10)))))))
67 | 


--------------------------------------------------------------------------------
/tests/multiplication.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; multiplication.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2019 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica-tests)
  6 | 
  7 | ;;;;;;;;;;;;;;;; Testing fast multiplication ;;;;;;;;;;;;;;;;
  8 | 
  9 | (deftest test-m*/fast ()
 10 |   (let ((moduli (h::scheme-moduli h::**scheme**)))
 11 |     (dotimes (nmod (length moduli))
 12 |       (let* ((m (aref moduli nmod))
 13 |              (mi (aref (h::scheme-inverses h::**scheme**) nmod)))
 14 |         (loop :repeat 100000 :do
 15 |                 (let ((a (random m))
 16 |                       (b (random m)))
 17 |                   (is (= (mod (* a b) m)
 18 |                          (h::m*/fast a b m mi)))))))))
 19 | 
 20 | ;;;;;;;;;;;;;;;; Testing Bare-Bones NTT Multiplication ;;;;;;;;;;;;;;;;
 21 | 
 22 | (defun vector->storage (vector)
 23 |   (let* ((n (length vector))
 24 |          (vec (h::make-storage n)))
 25 |     (h::with-vec (vec vec_)
 26 |       (dotimes (i n vec)
 27 |         (setf (vec_ i) (aref vector i))))))
 28 | 
 29 | (defun digit-count (n)
 30 |   "How many decimal digits does it take to write N?"
 31 |   (if (zerop n)
 32 |       1
 33 |       (let* ((approx   (ceiling (integer-length n) (log 10.0d0 2)))
 34 |              (exponent (expt 10 (1- approx))))
 35 |         (if (> exponent n)
 36 |             (1- approx)
 37 |             approx))))
 38 | 
 39 | (defun digits (n &key (size (digit-count n)))
 40 |   "Make an array of the decimal digits of N."
 41 |   (loop :with v := (make-array size :initial-element 0 :element-type 'h::digit)
 42 |         :for i :from 0
 43 |         :while (plusp n) :do
 44 |           (multiple-value-bind (div rem) (floor n 10)
 45 |             (setf (aref v i) rem)
 46 |             (setf n div))
 47 |         :finally (return v)))
 48 | 
 49 | (defun carry (v)
 50 |   "Perform carry propagation on the vector V."
 51 |   (loop :for i :below (1- (length v))
 52 |         :when (<= 10 (aref v i))
 53 |           :do (multiple-value-bind (div rem) (floor (aref v i) 10)
 54 |                 (setf (aref v i)      rem)
 55 |                 (incf (aref v (1+ i)) div))
 56 |         :finally (return v)))
 57 | 
 58 | (defun undigits (v)
 59 |   "Take an array of decimal digits V and create a number N."
 60 |   (loop :for x :across (carry v)
 61 |         :for b := 1 :then (* 10 b)
 62 |         :sum (* b x)))
 63 | 
 64 | ;;; A convolution whose length-N input sequences have a maximum value
 65 | ;;; of M will have values bounded by N*M^2. Our prime must be larger
 66 | ;;; than this.
 67 | (defun simplistic-ntt-multiply (a b)
 68 |   (let* ((a-count (digit-count a))
 69 |          (b-count (digit-count b))
 70 |          (length (h::least-power-of-two->= (+ 1 a-count b-count)))
 71 |          (a-digits (vector->storage (digits a :size length)))
 72 |          (b-digits (vector->storage (digits b :size length)))
 73 |          (m (aref (h::scheme-moduli h::**scheme**) 0))
 74 |          (scheme (h::make-modular-scheme (list m))))
 75 |     (when h::*verbose*
 76 |       (format t "Multiplying ~D * ~D = ~D~%" a b (* a b))
 77 | 
 78 |       (format t "A's digits: ~A~%" a-digits)
 79 |       (format t "B's digits: ~A~%" b-digits))
 80 | 
 81 |     (setf a-digits (h::ntt-forward a-digits scheme 0))
 82 |     (setf b-digits (h::ntt-forward b-digits scheme 0))
 83 | 
 84 |     (when h::*verbose*
 85 |       (format t "NTT(A): ~A~%" a-digits)
 86 |       (format t "NTT(B): ~A~%" b-digits))
 87 | 
 88 |     (h::with-vecs (a-digits a_ b-digits b_)
 89 |       (dotimes (i length)
 90 |         (setf (a_ i) (h::m* (a_ i) (b_ i) m))))
 91 | 
 92 |     (when h::*verbose*
 93 |       (format t "C = NTT(A)*NTT(B) mod ~D = ~A~%" m a-digits))
 94 | 
 95 |     (setf a-digits (h::ntt-reverse a-digits scheme 0))
 96 | 
 97 |     (when h::*verbose*
 98 |       (format t "NTT^-1(C): ~A~%" a-digits))
 99 | 
100 |     (undigits (h::vec->vector a-digits))))
101 | 
102 | (defun simplistic-fft-multiply (a b)
103 |   "Multiply two non-negative integers A and B using FFTs."
104 |   (let* ((a-count (digit-count a))
105 |          (b-count (digit-count b))
106 |          (length (h::least-power-of-two->= (+ 1 a-count b-count)))
107 |          (a-digits (coerce (digits a :size length) 'simple-vector))
108 |          (b-digits (coerce (digits b :size length) 'simple-vector)))
109 |     (when h::*verbose*
110 |       (format t "Multiplying ~D * ~D = ~D~%" a b (* a b))
111 | 
112 |       (format t "A's digits: ~A~%" a-digits)
113 |       (format t "B's digits: ~A~%" b-digits))
114 | 
115 |     (setf a-digits (h::dif-forward a-digits))
116 |     (setf b-digits (h::dif-forward b-digits))
117 | 
118 |     (when h::*verbose*
119 |       (format t "FFT(A): ~A~%" a-digits)
120 |       (format t "FFT(B): ~A~%" b-digits))
121 | 
122 |     (setf a-digits (map-into a-digits #'* a-digits b-digits))
123 | 
124 |     (when h::*verbose*
125 |       (format t "C = FFT(A)*FFT(B) = ~A~%" a-digits))
126 | 
127 |     (setf a-digits (h::dit-reverse a-digits))
128 | 
129 |     (when h::*verbose*
130 |       (format t "FFT^-1(C): ~A~%" a-digits))
131 | 
132 |     (undigits (map 'vector (lambda (z)
133 |                              (round (realpart z)))
134 |                    a-digits))))
135 | 
136 | (deftest test-simplistic-ntt-multiply ()
137 |   (loop :repeat 1000
138 |         :for a := (random most-positive-fixnum)
139 |         :for b := (random most-positive-fixnum)
140 |         :do (is (= (* a b) (simplistic-ntt-multiply a b)))))
141 | 
142 | (deftest test-simplistic-fft-multiply ()
143 |   (loop :repeat 1000
144 |         :for a := (random most-positive-fixnum)
145 |         :for b := (random most-positive-fixnum)
146 |         :do (is (= (* a b) (simplistic-fft-multiply a b)))))
147 | 
148 | 


--------------------------------------------------------------------------------
/tests/ntt.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; ntt.lisp
  2 | ;;;;
  3 | ;;;; Copyright (c) 2019 Robert Smith
  4 | 
  5 | (in-package #:hypergeometrica-tests)
  6 | 
  7 | ;;;;;;; Tests for parts of the NTT implementation ;;;;;;;
  8 | ;; Tests for NTT multiplication itself are in tests/multiplication.lisp
  9 | 
 10 | ;;;;;;; These are various NTT implementations used for testing ;;;;;;;
 11 | 
 12 | (defun ntt-forward-matrix (N m w)
 13 |   "Compute the NTT matrix of size N x N over Z/mZ using the primitive Mth root of unity W of order N."
 14 |   (let ((matrix (make-array (list N N) :initial-element 1)))
 15 |     (loop :for row :from 0 :below N :do
 16 |       (loop :for col :from 0 :below N
 17 |             :for exponent := (* col row)
 18 |             :do (setf (aref matrix row col)
 19 |                       (h::expt-mod/safe w exponent m)))
 20 |           :finally (return matrix))))
 21 | 
 22 | (defun ntt-reverse-matrix (N m w)
 23 |   "Compute the inverse NTT matrix of size N x N over Z/mZ using the primitive Mth root of unity W of order N.
 24 | 
 25 | This is just the conjugate-transpose of the NTT matrix, scaled by N."
 26 |   (labels ((conjugate-transpose (in)
 27 |              (let ((out (make-array (array-dimensions in))))
 28 |                (loop :for row :below (array-dimension out 0) :do
 29 |                  (loop :for col :below (array-dimension out 1) :do
 30 |                    (setf (aref out col row)
 31 |                          (h::m/ (h::inv-mod (aref in row col) m) N m))))
 32 |                out)))
 33 |     (conjugate-transpose (ntt-forward-matrix N m w))))
 34 | 
 35 | (defun matmul (A B modulus)
 36 |   "Multiply the matrices A and B over Z/mZ for modulus MODULUS."
 37 |   (let* ((m (array-dimension A 0))
 38 |          (n (array-dimension A 1))
 39 |          (l (array-dimension B 1))
 40 |          (C (make-array `(,m ,l) :initial-element 0)))
 41 |     (loop :for i :below m :do
 42 |       (loop :for k :below l :do
 43 |         (setf (aref C i k)
 44 |               (mod (loop :for j :below n
 45 |                          :sum (* (aref A i j)
 46 |                                  (aref B j k)))
 47 |                    modulus))))
 48 |     C))
 49 | 
 50 | (defun matvecmul (matrix v m)
 51 |   "Multiply the matrix MATRIX by the column vector V over Z/mZ."
 52 |   (let* ((N (length v))
 53 |          (result (copy-seq v)))
 54 |     (loop :for row :below N
 55 |           :do (setf (aref result row)
 56 |                     (loop :for col :below N
 57 |                           :for x := (aref matrix row col)
 58 |                           :for y := (aref v col)
 59 |                           :for x*y := (h::m* x y m)
 60 |                           :for s := x*y :then (h::m+ s x*y m)
 61 |                           :finally (return s)))
 62 |           :finally (return result))))
 63 | 
 64 | (defun ntt-forward-direct (in m w)
 65 |   "Compute the NTT of the vector IN over Z/mZ using the primitive Mth root of unity W of order (LENGTH IN)."
 66 |   (let* ((N (length in))
 67 |          (out (make-array N :initial-element 0)))
 68 |     (loop :for k :below N
 69 |           :for w^k := (h::expt-mod/safe w k m)
 70 |           :do (setf (aref out k)
 71 |                     (loop :for j :below N
 72 |                           :for w^jk := (h::expt-mod/safe w^k j m)
 73 |                           :sum (h::m* w^jk (aref in j) m) :into s
 74 |                           :finally (return (mod s m))))
 75 |           :finally (return out))))
 76 | 
 77 | (defun ntt-reverse-direct (in m w)
 78 |   "Compute the inverse NTT of the vector IN over Z/mZ using the primitive Mth root of unity W of order (LENGTH IN)."
 79 |   (setf w (h::inv-mod w m))
 80 |   (let* ((N (length in))
 81 |          (out (make-array N :initial-element 0)))
 82 |     (loop :for k :below N
 83 |           :for w^k := (h::expt-mod w k m)
 84 |           :do (setf (aref out k)
 85 |                     (loop :for j :below N
 86 |                           :for w^jk := (h::expt-mod/safe w^k j m)
 87 |                           :sum (* w^jk (aref in j)) :into s
 88 |                           :finally (return (h::m/ (mod s m) N m))))
 89 |           :finally (return out))))
 90 | 
 91 | ;;; Tests start HERE!
 92 | (defun test-inversion/matrix (v m w)
 93 |   "Tests inversion property of matrix method."
 94 |   (declare (type vector v))
 95 |   (let* ((N (length v))
 96 |          (eye (matmul
 97 |                (ntt-reverse-matrix N m w)
 98 |                (ntt-forward-matrix N m w)
 99 |                m)))
100 |     (is (loop :for i :below N
101 |               :always (loop :for j :below N
102 |                             :always (= (aref eye i j)
103 |                                        (if (= i j)
104 |                                            1
105 |                                            0)))))))
106 | 
107 | (defun test-inversion/direct (v m w)
108 |   "Tests inversion property of the direct NTTs."
109 |   (declare (type vector v))
110 |   (is (equalp v
111 |               (ntt-reverse-direct (ntt-forward-direct v m w)
112 |                                   m w))))
113 | 
114 | (defun test-inversion/ntt (v scheme i)
115 |   "Tests inversion property of the fast NTTs."
116 |   (declare (type h::storage v))
117 |   (is (h::vec= v
118 |                (h::ntt-reverse (h::ntt-forward (h::copy-vec v) scheme i) scheme i))))
119 | 
120 | 
121 | (deftest test-inversion-properties ()
122 |   "Test that the forward and reverse transforms are actually inverses."
123 |   (let ((N (expt 2 6)))
124 |     (dolist (m (coerce (h::scheme-moduli h::**scheme**) 'list))
125 |       (let* ((v (h::make-storage N))
126 |              (scheme (h::make-modular-scheme (list m)))
127 |              (w (aref (h::scheme-primitive-roots scheme) (h::lg N) 0 0 0)))
128 |         (h::vec-into v (lambda () (random m)))
129 |         (test-inversion/matrix (h::vec->vector v) m w)
130 |         (test-inversion/direct (h::vec->vector v) m w)
131 |         (test-inversion/ntt v scheme 0)))))
132 | 
133 | (deftest test-ntt-from-various-definitions ()
134 |   "Test that the NTTs agree in their transforms."
135 |   (let ((N (expt 2 8)))
136 |     (dolist (m (coerce (h::scheme-moduli h::**scheme**) 'list))
137 |       (let* ((v (h::make-storage N))
138 |              (scheme (h::make-modular-scheme (list m)))
139 |              (w (aref (h::scheme-primitive-roots scheme) (h::lg N) 0 0 0)))
140 |         (h::vec-into v (lambda () (random m)))
141 |         (let* ((v-vector (h::vec->vector v))
142 |                (a (matvecmul (ntt-forward-matrix N m w) v-vector m))
143 |                (b (ntt-forward-direct v-vector m w))
144 |                (c (h::ntt-forward (h::copy-vec v) scheme 0)))
145 |           (h::bit-reversed-permute-vec! c)
146 |           (is (equalp a b))
147 |           (is (equalp b (h::vec->vector c))))
148 |         (let* ((v-vector (h::vec->vector v))
149 |                (a (matvecmul (ntt-reverse-matrix N m w) v-vector m))
150 |                (b (ntt-reverse-direct v-vector m w))
151 |                (c (let ((v (h::copy-vec v)))
152 |                     (h::bit-reversed-permute-vec! v)
153 |                     (h::ntt-reverse v scheme 0))))
154 |           (is (equalp a b))
155 |           (is (equalp b (h::vec->vector c))))))))
156 | 


--------------------------------------------------------------------------------
/tests/package.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; package.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (fiasco:define-test-package #:hypergeometrica-tests
 6 |   (:local-nicknames (#:h #:hypergeometrica))
 7 |   (:export
 8 |    #:test-hypergeometrica))
 9 | 
10 | (cl:defpackage #:hypergeometrica-debug
11 |   (:use #:cl)
12 |   (:local-nicknames (#:h #:hypergeometrica))
13 |   (:export
14 |    #:dd))
15 | 


--------------------------------------------------------------------------------
/tests/pi.lisp:
--------------------------------------------------------------------------------
 1 | ;;; pi.lisp
 2 | ;;;
 3 | ;;; Copyright (c) 2023 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | (defun %test-pi (n &key (from 0)
 8 |                         (check (constantly t)))
 9 |   (loop :for k :from from :below n
10 |         :for bits := (expt 10 k)
11 |         :do
12 |            (let (x-pi r-pi true-pi)
13 |              (h::with-stopwatch (tim :log t :label "pi: ")
14 |                (format t "~&Calculating pi with Hypergeometrica:~%")
15 |                (setf x-pi (h::mpd-pi bits))
16 |                (tim "hypergeometrica pi")
17 | 
18 |                (format t "~&Converting Hypergeometrica pi to MPFR:~%")
19 |                (sb-mpfr:set-precision (+ bits 8))
20 |                (setf r-pi (h::mpd-mpfr x-pi))
21 |                (tim "hypergeometrica -> mpfr")
22 | 
23 |                (format t "~&Calculating MPFR pi:~%")
24 |                ;; Recall that we calculate pi/10.
25 |                (setf true-pi (sb-mpfr:div (sb-mpfr:const-pi) 10))
26 | 
27 |                (format t "~&Calculated ~D bits [~D digits]~%" bits (round (* bits (log 2.0d0 10.0d0))))
28 | 
29 |                ;; TODO: make more efficient by comparing bits.
30 |                (let* ((diff (sb-mpfr:abs (sb-mpfr:sub true-pi r-pi)))
31 |                       (err (cond
32 |                              ;; If it's a perfect match, we still say
33 |                              ;; we have an error of 2^(-bits).
34 |                              ((sb-mpfr:zerop diff) (- bits))
35 |                              ;; Or it's not a perfect match...
36 |                              (t
37 |                               (sb-mpfr:set-precision 64)
38 |                               (round (sb-mpfr:coerce (sb-mpfr:log2 diff) 'rational))))))
39 |                  (funcall check err bits)
40 |                  (finish-output))))))
41 | 
42 | (deftest test-pi ()
43 |   (%test-pi 5 :from 1
44 |               :check (lambda (err-bits desired-bits)
45 |                        ;; ERR-BITS means the error was 2^(ERR-BITS),
46 |                        ;; so it will be negative in "good" cases.
47 |                        (is (and (minusp err-bits)
48 |                                 (<= desired-bits (abs err-bits)))))))
49 | 


--------------------------------------------------------------------------------
/tests/primes.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; primes.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | (deftest test-garner ()
 8 |   (loop :repeat 50000 :do
 9 |     (let* ((moduli #(2 3 5 7 11 13 17 19 23 29 31))
10 |            (n (random (reduce #'* moduli)))
11 |            (vals (h::to-congruence-relations moduli n))
12 |            (g (h::garner moduli vals))
13 |            (n* (h::reconstruct-from-garner moduli g)))
14 |       (is (= n n*)))))
15 | 
16 | (deftest test-factor-out ()
17 |   (flet ((test-it (p k)
18 |            (multiple-value-bind (n pow) (h::factor-out (* (expt p k) 7) p)
19 |              (is (and (= 7 n)
20 |                       (= k pow))))))
21 |     (test-it 2 1)
22 |     (test-it 2 2)
23 |     (test-it 2 3)
24 |     (test-it 3 1)
25 |     (test-it 3 2)
26 |     (test-it 3 3)
27 |     (test-it 6 1)
28 |     (test-it 6 2)
29 |     (test-it 6 3)))
30 | 
31 | (deftest test-primep ()
32 |   (let ((primes-from-the-internet
33 |           '(2      3      5      7     11     13     17     19     23     29
34 |             31     37     41     43     47     53     59     61     67     71
35 |             73     79     83     89     97    101    103    107    109    113
36 |             127    131    137    139    149    151    157    163    167    173
37 |             179    181    191    193    197    199    211    223    227    229
38 |             233    239    241    251    257    263    269    271    277    281
39 |             283    293    307    311    313    317    331    337    347    349
40 |             353    359    367    373    379    383    389    397    401    409
41 |             419    421    431    433    439    443    449    457    461    463
42 |             467    479    487    491    499    503    509    521    523    541
43 |             547    557    563    569    571    577    587    593    599    601
44 |             607    613    617    619    631    641    643    647    653    659
45 |             661    673    677    683    691    701    709    719    727    733
46 |             739    743    751    757    761    769    773    787    797    809
47 |             811    821    823    827    829    839    853    857    859    863
48 |             877    881    883    887    907    911    919    929    937    941
49 |             947    953    967    971    977    983    991    997)))
50 |     (is (every #'h::primep primes-from-the-internet))
51 |     (loop :for p :in primes-from-the-internet
52 |           :for p-next :in (rest primes-from-the-internet)
53 |           :do (is (= p-next (h::next-prime p))))))
54 | 
55 | (deftest test-factorize ()
56 |   (loop :repeat 100
57 |         :for n := (1+ (random 100000))
58 |         :for factorization := (h::factorize n)
59 |         :do (loop :with x := 1
60 |                   :for (f . k) :in factorization
61 |                   :do (setf x (* x (expt f k)))
62 |                   :finally (is (= x n)))))
63 | 


--------------------------------------------------------------------------------
/tests/suite.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; suite.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | (defun test-hypergeometrica ()
 8 |   (let ((h::*verbose* nil))
 9 |     (run-package-tests :package ':hypergeometrica-tests
10 |                        :verbose nil
11 |                        :describe-failures t
12 |                        :interactive t)))
13 | 


--------------------------------------------------------------------------------
/tests/sundries.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; sundries.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | ;;;; Tests which don't fit anywhere else ;;;;
 8 | 
 9 | (deftest test-sundries ()
10 |   ;; POWER-OF-TWO-P
11 |   (is (not (h::power-of-two-p 0)))
12 |   (is (h::power-of-two-p 1))
13 |   (is (h::power-of-two-p 2))
14 |   (is (not (h::power-of-two-p 3)))
15 |   (is (h::power-of-two-p 4))
16 |   (loop :for i :from 1 :to 25
17 |         :do (is (h::power-of-two-p (expt 2 i)))
18 |             (is (not (h::power-of-two-p (+ 3 (expt 2 i))))))
19 |   ;; NEXT-POWER-OF-TWO
20 |   (loop :for i :from 1 :to 25
21 |         :for j := (expt 2 i)
22 |         :do (is (= i (h::next-power-of-two j))))
23 | 
24 |   (loop :for i :from 2 :to 25
25 |         :for j := (+ 3 (expt 2 i))
26 |         :do (is (= (+ 1 i) (h::next-power-of-two j))))
27 | 
28 |   (is (h::coprimep 2 3))
29 |   (is (h::coprimep 4 9))
30 |   (is (not (h::coprimep 4 20)))
31 | 
32 |   (is (h::pairwise-coprimep '(2 3 5 7 11)))
33 |   (is (h::pairwise-coprimep #(2 3 5 7 11)))
34 | 
35 |   (is (not (h::pairwise-coprimep '(2 3 5 7 9 11))))
36 |   (is (not (h::pairwise-coprimep #(2 3 5 7 9 11)))))
37 | 


--------------------------------------------------------------------------------
/tests/vec.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; vec.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2019 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | (deftest test-create-ram-vec ()
 8 |   (loop :for length :below 8192
 9 |         :for length/2 := (floor length 2)
10 |         :for vec := (h::make-ram-vec length)
11 |         :do (is (= length (h::vec-digit-length vec)))
12 |             (h::resize-vec-by vec length/2)
13 |             (is (= (+ length length/2) (h::vec-digit-length vec)))
14 |             (h::resize-vec-by vec (- length/2))
15 |             (is (= length (h::vec-digit-length vec)))
16 |             (h::resize-vec-by vec (- length/2))
17 |             (is (= (- length length/2) (h::vec-digit-length vec)))))
18 | 


--------------------------------------------------------------------------------
/tests/write-number.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; write-number.lisp
 2 | ;;;;
 3 | ;;;; Copyright (c) 2023 Robert Smith
 4 | 
 5 | (in-package #:hypergeometrica-tests)
 6 | 
 7 | (deftest test-write-number-randomly ()
 8 |   (loop :repeat 150
 9 |         :for base := (+ 2 (random 98))
10 |         :for expt := (+ 100 (random 100))
11 |         :for hype-result := (h::mpz-expt (h::integer-mpz base 'h::mpz/ram) expt)
12 |         :for lisp-result := (expt base expt)
13 |         :do
14 |            (is (string=
15 |                 (prin1-to-string lisp-result)
16 |                 (with-output-to-string (s)
17 |                   (h::write-number hype-result s))))))
18 | 


--------------------------------------------------------------------------------