├── .gitignore
├── .conda
├── conda_build_config.yaml
└── meta.yaml
├── pysrc
└── xprec
│ ├── __init__.py
│ └── linalg.py
├── .dev
└── container
│ ├── Dockerfile
│ └── devcontainer.json
├── .github
└── workflows
│ ├── conda.yml
│ ├── wheels.yml
│ └── pytest.yml
├── LICENSE.txt
├── test
├── test_dtype.py
├── test_whitespace.py
├── test_linalg.py
├── test_ufunc.py
└── test_mpmath.py
├── README.md
├── csrc
├── dd_linalg.h
├── dd_linalg.c
├── dd_arith.h
├── _dd_linalg.c
├── dd_arith.c
└── _dd_ufunc.c
├── QD-LICENSE.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .*
2 | *~
3 | \#*\#
4 |
5 | *.pyc
6 | __pycache__/
7 | build/
8 | dist/
9 | *.o
10 | *.so
11 | *.egg-info/
12 |
13 | notebooks/*.ipynb
14 |
15 | !.gitignore
16 | !/.github/
17 | !/.editorconfig
18 |
--------------------------------------------------------------------------------
/.conda/conda_build_config.yaml:
--------------------------------------------------------------------------------
1 | python:
2 | - 3.13
3 | - 3.12
4 | - 3.11
5 |
6 | numpy:
7 | # 1.18 does not build with Python 3.9
8 | #- 1.19
9 | #- 1.20
10 | - 2.1
11 | - 2.2
12 |
13 | pin_run_as_build:
14 | numpy: x.x
15 |
--------------------------------------------------------------------------------
/.conda/meta.yaml:
--------------------------------------------------------------------------------
1 | {% set data = load_setup_py_data(setup_file='../setup.py', from_recipe_dir=True) %}
2 | {% set name = "xprec" %}
3 | {% set version = data.get("version") %}
4 |
5 | package:
6 | name: "{{ name|lower }}"
7 | version: "{{ version }}"
8 |
9 | source:
10 | path: ../
11 |
12 | build:
13 | number: 0
14 | script: "{{ PYTHON }} -m pip install . -vv"
15 |
16 | requirements:
17 | buid:
18 | - python {{ python }}
19 | - numpy {{ numpy }}
20 | host:
21 | - python {{ python }}
22 | - numpy {{ numpy }}
23 | run:
24 | - python {{ python }}
25 | - numpy {{ numpy }}
26 |
27 | about:
28 | home: "https://github.com/tuwien-cms/xprec"
29 | license: MIT
30 | summary: "xprec precision numpy extension"
31 |
32 | extra:
33 | recipe-maintainers:
34 | - shinaoka
35 |
--------------------------------------------------------------------------------
/pysrc/xprec/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Markus Wallerberger and others
2 | # SPDX-License-Identifier: MIT
3 | """
4 | Extension module for numpy providing the `ddouble` data type.
5 |
6 | Loading this module registers an additional scalar data type `ddouble` with
7 | numpy implementing double-double arithmetic. You can use use the data type
8 | by passing `dtype=xprec.ddouble` to numpy functions.
9 |
10 | Example:
11 |
12 | import numpy as np
13 | from xprec import ddouble
14 |
15 | x = np.arange(5, dtype=ddouble)
16 | print(2 * x)
17 |
18 | """
19 | __version__ = "1.4.7"
20 |
21 | import numpy as _np
22 |
23 | from . import _dd_ufunc
24 | from . import _dd_linalg # needed for matmul
25 |
26 | ddouble = _dd_ufunc.dtype
27 |
28 |
29 | def finfo(dtype):
30 | dtype = _np.dtype(dtype)
31 | try:
32 | finfo_dunder = dtype.type.__finfo__
33 | except AttributeError:
34 | return _np.finfo(dtype)
35 | else:
36 | return finfo_dunder()
37 |
--------------------------------------------------------------------------------
/.dev/container/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/anaconda3
2 | #FROM continuumio/anaconda3:2020.02
3 |
4 | ENV PYTHONUNBUFFERED=1
5 |
6 | RUN apt-get update && \
7 | DEBIAN_FRONTEND=noninteractive apt-get install -y \
8 | build-essential \
9 | curl \
10 | ca-certificates \
11 | git \
12 | zip \
13 | vim \
14 | cmake pkg-config gfortran \
15 | sudo \
16 | && \
17 | apt-get clean && rm -rf /var/cache/apt/archives/* /var/lib/apt/lists/* # clean up
18 |
19 | #RUN mkdir /opt/conda/pkgs
20 | #RUN chown 1000:1000 /opt/conda
21 |
22 | # Create non-root user
23 | ARG NB_USER=vscode
24 | ARG NB_UID=1000
25 | RUN useradd -u $NB_UID -m $NB_USER -s /bin/bash && \
26 | echo 'vscode ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
27 | USER $NB_USER
28 | ENV PATH "/home/${NB_USER}/.local/bin:${PATH}"
29 | ENV PYTHONPATH "/home/${NB_USER}/work/src:${PYTONPATH}"
30 |
31 | # for vscode
32 | RUN mkdir /home/${NB_USER}/work
33 |
34 | RUN conda config --add pkgs_dirs /home/vscode/.conda/pkgs
--------------------------------------------------------------------------------
/.github/workflows/conda.yml:
--------------------------------------------------------------------------------
1 | name: Build and upload conda packages
2 |
3 | # Triggered a new tag starting with "v" is pushed
4 | on:
5 | push:
6 | tags:
7 | - 'v*'
8 |
9 | jobs:
10 | build:
11 | runs-on: ${{ matrix .os }}
12 | strategy:
13 | matrix:
14 | # https://github.com/s-weigand/setup-conda/issues/432
15 | os: [ubuntu-latest, windows-2019, macos-latest]
16 |
17 | steps:
18 | - uses: actions/checkout@v4
19 | - uses: conda-incubator/setup-miniconda@v3
20 | with:
21 | auto-update-conda: true
22 | - name: Conda info
23 | shell: bash -el {0}
24 | run: conda info
25 | - name: Install dependencies
26 | run: |
27 | conda install conda-build anaconda-client -y
28 |
29 | - name: Bulid and upload
30 | env:
31 | ANACONDA_API_TOKEN: ${{secrets.ANACONDA_TOKEN}}
32 | run: |
33 | python3 --version
34 | conda config --set anaconda_upload yes
35 | conda build .conda --user SpM-lab
36 |
--------------------------------------------------------------------------------
/.github/workflows/wheels.yml:
--------------------------------------------------------------------------------
1 | name: Build and upload to PyPI
2 |
3 | # Triggered a new tag starting with "v" is pushed
4 | on:
5 | push:
6 | tags:
7 | - 'v*'
8 |
9 | jobs:
10 | build_sdist:
11 | name: Build distribution
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v4
15 |
16 | - name: Examine system
17 | run: pip freeze --all
18 |
19 | - name: Build sdist
20 | run: python setup.py sdist
21 |
22 | - uses: actions/upload-artifact@v4
23 | with:
24 | name: dist
25 | path: dist/xprec-*.tar.gz
26 |
27 | upload_pypi:
28 | name: Upload to PyPI
29 | needs: [build_sdist]
30 | runs-on: ubuntu-latest
31 | steps:
32 | - uses: actions/download-artifact@v4
33 | with:
34 | name: dist
35 | path: dist
36 |
37 | - uses: pypa/gh-action-pypi-publish@v1.4.2
38 | with:
39 | user: __token__
40 | password: ${{ secrets.PYPI_API_TOKEN }}
41 | skip_existing: true
42 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021 Markus Wallerberger and others
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 |
--------------------------------------------------------------------------------
/test/test_dtype.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Markus Wallerberger and others
2 | # SPDX-License-Identifier: MIT
3 | import numpy as np
4 | import pytest
5 |
6 | from xprec import ddouble
7 |
8 |
9 | COMPATIBLE_DTYPES = [
10 | np.int8, np.int16, np.int32, np.int64, np.bool_, np.float32, np.float64,
11 | np.uint8, np.uint16, np.uint32, np.uint64,
12 | ]
13 |
14 |
15 | @pytest.mark.parametrize('other', COMPATIBLE_DTYPES)
16 | def test_cast_from(other):
17 | assert np.can_cast(other, ddouble, 'unsafe')
18 | assert np.can_cast(other, ddouble, 'safe')
19 |
20 | x = np.eye(3, dtype=other)
21 | y = x.astype(ddouble)
22 | assert (x == y).all()
23 |
24 |
25 | @pytest.mark.parametrize('other', COMPATIBLE_DTYPES)
26 | def test_cast_to(other):
27 | assert np.can_cast(ddouble, other, 'unsafe')
28 | assert not np.can_cast(ddouble, other, 'safe')
29 |
30 | x = np.eye(3, dtype=ddouble)
31 | y = x.astype(other)
32 | assert (x == y).all()
33 |
34 |
35 | def test_i64():
36 | x = np.int64((1 << 62) + 1)
37 | assert x == x.astype(ddouble).astype(x.dtype)
38 |
39 | y = -x
40 | assert (y + 1) == (y.astype(ddouble) + 1).astype(x.dtype)
41 |
42 | x = x.astype(np.uint64)
43 | assert x == x.astype(ddouble).astype(x.dtype)
44 |
--------------------------------------------------------------------------------
/.github/workflows/pytest.yml:
--------------------------------------------------------------------------------
1 | name: xprec python package
2 |
3 | on:
4 | push:
5 | branches:
6 | mainline
7 | pull_request:
8 | branches:
9 | mainline
10 |
11 | jobs:
12 | build:
13 | runs-on: ${{ matrix.os }}
14 | strategy:
15 | matrix:
16 | include:
17 | - os: ubuntu-latest
18 | numpy-version: auto
19 | python-version: 3.9
20 | - os: ubuntu-latest
21 | numpy-version: 2.0
22 | python-version: 3.11
23 | - os: windows-latest
24 | numpy-version: auto
25 | python-version: 3.9
26 | - os: macos-latest
27 | numpy-version: auto
28 | python-version: 3.11
29 | steps:
30 | - uses: actions/checkout@v4
31 |
32 | - name: Set up python ${{ matrix.python-version }}
33 | uses: actions/setup-python@v5
34 | with:
35 | python-version: ${{ matrix.python-version }}
36 |
37 | - name: Install numpy ${{ matrix.numpy-version }}
38 | if: ${{ matrix.numpy-version != 'auto' }}
39 | run: |
40 | pip install numpy==${{ matrix.numpy-version }}
41 |
42 | - name: Install package with testing dependencies
43 | run: |
44 | pip install -v .[test]
45 |
46 | - name: Test with pytest
47 | run: |
48 | pytest
49 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Library for double-double arithmetic calculation
2 | ================================================
3 |
4 | Extension module for numpy providing the `ddouble` data type.
5 |
6 | Loading this module registers an additional scalar data type `ddouble` with
7 | numpy implementing double-double arithmetic. You can use use the data type
8 | by passing `dtype=xprec.ddouble` to numpy functions.
9 |
10 | The `xprec.linalg` module provides some linear algebra subroutines, in
11 | particular QR, RRQR, SVD and truncated SVD.
12 |
13 | Installation
14 | ------------
15 |
16 | $ pip install xprec
17 |
18 | Quickstart
19 | ----------
20 |
21 | import numpy as np
22 | x = np.linspace(0, np.pi)
23 |
24 | # import double-double precision data type
25 | from xprec import ddouble
26 | x = x.astype(ddouble)
27 | y = x * x + 1
28 | z = np.sin(x)
29 |
30 | # do some linalg
31 | import xprec.linalg
32 | A = np.vander(np.linspace(-1, 1, 80, dtype=ddouble), 150)
33 | U, s, VT = xprec.linalg.svd(A)
34 |
35 | Trouble shooting
36 | ---
37 |
38 | * icc
39 | You may suffer from a long runtime when xprec is built with icc. If you encounter this problem, please try the following:
40 |
41 | ```
42 | CFLAGS="-fp-model=precise" pip install xprec
43 | ```
44 |
45 | Licence
46 | -------
47 | The xprec library is
48 | Copyright (C) 2021 Markus Wallerberger.
49 | Licensed under the MIT license (see LICENSE.txt).
50 |
51 | Contains code from the QD library, which is
52 | Copyright (C) 2012 Yozo Hida, Xiaoye S. Li, David H. Bailey.
53 | Released under a modified BSD license (see QD-LICENSE.txt).
54 |
--------------------------------------------------------------------------------
/.dev/container/devcontainer.json:
--------------------------------------------------------------------------------
1 | // For format details, see https://aka.ms/vscode-remote/devcontainer.json or the definition README at
2 | // https://github.com/microsoft/vscode-dev-containers/tree/master/containers/docker-existing-dockerfile
3 | {
4 | "name": "Existing Dockerfile",
5 | // Sets the run context to one level up instead of the .devcontainer folder.
6 | "context": "..",
7 | // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
8 | "dockerFile": "./Dockerfile",
9 | // The optional 'runArgs' property can be used to specify additional runtime arguments.
10 | "runArgs": [],
11 | // Use 'settings' to set *default* container specific settings.json values on container create.
12 | // You can edit these settings after create using File > Preferences > Settings > Remote.
13 | // Uncomment the next line if you want to publish any ports.
14 | // "appPort": [],
15 | // Uncomment the next line to run commands after the container is created - for example installing git.
16 | // "postCreateCommand": "apt-get update && apt-get install -y git",
17 | // Add the IDs of extensions you want installed when the container is created in the array below.
18 | "extensions": [
19 | "ms-azuretools.vscode-docker",
20 | "mutantdino.resourcemonitor",
21 | "shardulm94.trailing-spaces",
22 | "cliffordfajardo.hightlight-selections-vscode",
23 | "wdawson.better-kill-ring",
24 | "oderwat.indent-rainbow",
25 | "github.vscode-pull-request-github",
26 | "mhutchie.git-graph",
27 | "donjayamanne.githistory",
28 | "eamodio.gitlens",
29 | "bungcip.better-toml",
30 | "usernamehw.errorlens",
31 | "ms-vscode.live-server",
32 | "christian-kohler.path-intellisense",
33 | "ms-python.python",
34 | ],
35 | "remoteUser": "vscode",
36 | "workspaceFolder": "/home/vscode/work",
37 | "workspaceMount": "src=${localWorkspaceFolder},dst=/home/vscode/work,type=bind",
38 | }
39 |
--------------------------------------------------------------------------------
/test/test_whitespace.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | HEREPATH = os.path.abspath(os.path.dirname(__file__))
4 | print("HEREPATH", HEREPATH)
5 | ROOTDIR = os.path.abspath(os.path.join(HEREPATH, os.path.pardir))
6 | PYSRCDIR = os.path.join(ROOTDIR, "pysrc", "xprec")
7 | CSCRDIR = os.path.join(ROOTDIR, "csrc")
8 |
9 | def check_whitespace(files):
10 | errors = []
11 | blank = 0
12 | lineno = 0
13 | line = ""
14 | def add_error(fmt, *params):
15 | errors.append((fname, lineno, line, fmt.format(*params)))
16 |
17 | for fname in files:
18 | with open(fname, "r") as file:
19 | line = ""
20 | for lineno, line in enumerate(file, start=1):
21 | if line[-1:] != '\n':
22 | add_error("file must end in blank line")
23 | line = line[:-1]
24 | if line:
25 | blank = 0
26 | else:
27 | blank += 1
28 | if line[-1:] == '\r':
29 | add_error("file must only have unix line endings")
30 | if line[-1:] == ' ':
31 | add_error("line ends in whitespace")
32 | if '\t' in line:
33 | add_error("line contains tab characters")
34 | if len(line) > 90:
35 | add_error("line is too long: {:d} chars", len(line))
36 | # end of file
37 | if blank != 0:
38 | add_error("file has {:d} superflouos blank lines", blank)
39 |
40 | msg = ""
41 | for fname, lineno, line, lmsg in errors:
42 | msg += "{}:{}: {}\n".format(fname.name, lineno, lmsg)
43 | if msg:
44 | raise ValueError("Whitespace errors\n" + msg)
45 |
46 |
47 | def all_files(path, ext):
48 | for entry in os.scandir(path):
49 | if entry.is_file() and entry.name.endswith(ext):
50 | yield entry
51 |
52 |
53 | def test_ws_testdir():
54 | check_whitespace(all_files(HEREPATH, ".py"))
55 |
56 |
57 | def test_ws_setup():
58 | check_whitespace(all_files(ROOTDIR, ".py"))
59 |
60 |
61 | def test_ws_pysrcdir():
62 | check_whitespace(all_files(PYSRCDIR, ".py"))
63 |
64 |
65 | def test_ws_csrcdir():
66 | check_whitespace(all_files(CSCRDIR, ".c"))
67 | check_whitespace(all_files(CSCRDIR, ".h"))
68 |
--------------------------------------------------------------------------------
/csrc/dd_linalg.h:
--------------------------------------------------------------------------------
1 | /* Double-double linear algebra library
2 | *
3 | * Implementations were partly inspired by LAPACK, partly from Fredrik
4 | * Johansson's excellent MPMATH library.
5 | *
6 | * Copyright (C) 2021 Markus Wallerberger and others
7 | * SPDX-License-Identifier: MIT
8 | */
9 | #pragma once
10 | #include "dd_arith.h"
11 |
12 | /**
13 | * Apply Givens rotation to vector:
14 | *
15 | * [ a ] = [ c s ] [ x ]
16 | * [ b ] [ -s c ] [ y ]
17 | */
18 | static inline void lmul_givensq(
19 | ddouble *a, ddouble *b, ddouble c, ddouble s, ddouble x, ddouble y)
20 | {
21 | *a = addww(mulww(c, x), mulww(s, y));
22 | *b = subww(mulww(c, y), mulww(s, x));
23 | }
24 |
25 | /** Compute 2-norm of a vector */
26 | ddouble normw(const ddouble *x, long nn, long sxn);
27 |
28 | /**
29 | * Perform a rank-one update of a `ii` times `jj` matrix:
30 | *
31 | * A[i, j] += v[i] * w[j]
32 | */
33 | void rank1updateq(ddouble *a, long ais, long ajs, const ddouble *v, long vs,
34 | const ddouble *w, long ws, long ii, long jj);
35 |
36 | /**
37 | * Compute Givens rotation `R` matrix that satisfies:
38 | *
39 | * [ c s ] [ f ] [ r ]
40 | * [ -s c ] [ g ] = [ 0 ]
41 | */
42 | void givensw(ddouble f, ddouble g, ddouble *c, ddouble *s, ddouble *r);
43 |
44 | /**
45 | * Compute Householder reflector `H[tau, v]`, defined as:
46 | *
47 | * H[tau, v] = I - tau * v @ v.T
48 | *
49 | * that, when applied to a given `x`, zeros out all but the first component.
50 | * The scaling factor `tau` is returned, while `v` is written.
51 | */
52 | ddouble householderw(const ddouble *x, ddouble *v, long nn, long sx, long sv);
53 |
54 | /**
55 | * Perform the SVD of an arbitrary two-by-two matrix:
56 | *
57 | * [ a11 a12 ] = [ cu -su ] [ smax 0 ] [ cv sv ]
58 | * [ a21 a22 ] [ su cu ] [ 0 smin ] [ -sv cv ]
59 | */
60 | void svd_2x2(ddouble a11, ddouble a12, ddouble a21, ddouble a22, ddouble *smin,
61 | ddouble *smax, ddouble *cv, ddouble *sv, ddouble *cu, ddouble *su);
62 |
63 |
64 |
65 | ddouble jacobi_sweep(ddouble *u, long sui, long suj, ddouble *vt, long svi,
66 | long svj, long ii, long jj);
67 |
68 |
69 | void golub_kahan_chaseq(ddouble *d, long sd, ddouble *e, long se, long ii,
70 | ddouble *rot);
71 |
--------------------------------------------------------------------------------
/QD-LICENSE.txt:
--------------------------------------------------------------------------------
1 | Contains code from the QD library, which is:
2 | Copyright (C) 2012 Yozo Hida, Xiaoye S. Li, David H. Bailey.
3 |
4 | This License Agreement is entered into by The Regents of the University of
5 | California, Department of Energy contract-operators of the Lawrence Berkeley
6 | National Laboratory, 1 Cyclotron Road, Berkeley, CA 94720 (“Berkeley Lab”),
7 | and the entity listed below (“you” or "Licensee").
8 |
9 | 1. Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are met:
11 |
12 | (1) Redistributions of source code must retain the copyright notice, this
13 | list of conditions and the following disclaimer.
14 |
15 | (2) Redistributions in binary form must reproduce the copyright notice,
16 | this list of conditions and the following disclaimer in the
17 | documentation and/or other materials provided with the distribution.
18 |
19 | (3) Neither the name of the University of California, Lawrence Berkeley
20 | National Laboratory, U.S. Dept. of Energy nor the names of its
21 | contributors may be used to endorse or promote products derived from
22 | this software without specific prior written permission.
23 |
24 | 2. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 |
36 | 3. You are under no obligation whatsoever to provide any bug fixes, patches,
37 | or upgrades to the features, functionality or performance of the source
38 | code ("Enhancements") to anyone; however, if you choose to make your
39 | Enhancements available either publicly, or directly to Lawrence Berkeley
40 | National Laboratory, without imposing a separate written license agreement
41 | for such Enhancements, then you hereby grant the following license: a
42 | non-exclusive, royalty-free perpetual license to install, use, modify,
43 | prepare derivative works, incorporate into other computer software,
44 | distribute, and sublicense such enhancements or derivative works thereof,
45 | in binary and source code form.
46 |
--------------------------------------------------------------------------------
/test/test_linalg.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Markus Wallerberger and others
2 | # SPDX-License-Identifier: MIT
3 | import numpy as np
4 |
5 | import xprec
6 | import xprec.linalg
7 | from xprec import ddouble
8 |
9 |
10 | def test_householder_vec():
11 | rng = np.random.RandomState(4711)
12 | xd = rng.random_sample(20)
13 | xq = np.array(xd, dtype=ddouble)
14 |
15 | betaq, vq = xprec.linalg.householder(xq)
16 | eq = xq - betaq * vq * (vq @ xq)
17 | np.testing.assert_allclose(eq[1:].astype(float), 0, atol=1e-31)
18 |
19 |
20 | def test_bidiag():
21 | rng = np.random.RandomState(4711)
22 | m, n = 7, 5
23 | A = rng.normal(size=(m,n)).astype(ddouble)
24 | Q, B, RT = xprec.linalg.bidiag(A)
25 | diff = Q @ B @ RT - A
26 |
27 | # FIXME: too large precision goals
28 | np.testing.assert_allclose(diff.astype(float), 0, atol=1e-29)
29 |
30 |
31 | def test_svd():
32 | rng = np.random.RandomState(4711)
33 | A = rng.randn(100, 84)
34 |
35 | U, s, VT = xprec.linalg.svd(A.astype(xprec.ddouble), full_matrices=False)
36 | R = U * s @ VT - A
37 | np.testing.assert_allclose(R.astype(float), 0, atol=5e-29, rtol=0)
38 |
39 | _, sx, _ = np.linalg.svd(A.astype(float), full_matrices=False)
40 | np.testing.assert_allclose(s, sx, atol=1e-14 * sx[0], rtol=0)
41 |
42 |
43 | def test_givens():
44 | f, g = np.array([3.0, -2.0], dtype=ddouble)
45 | c, s, r = xprec.linalg.givens_rotation(f, g)
46 |
47 | R = np.reshape([c, s, -s, c], (2,2))
48 | v = np.hstack([f, g])
49 | w = np.hstack([r, np.zeros_like(r)])
50 | res = R @ v - w
51 | np.testing.assert_allclose(res.astype(float), 0, atol=1e-31)
52 |
53 |
54 | def test_givens():
55 | a = np.array([3.0, -2.0], dtype=ddouble)
56 | r, G = xprec.linalg.givens(a)
57 | diff = r - G @ a
58 | np.testing.assert_allclose(diff.astype(float), 0, atol=1e-31)
59 |
60 |
61 | def test_qr():
62 | A = np.vander(np.linspace(-1, 1, 60), 80).astype(ddouble)
63 | Q, R = xprec.linalg.qr(A)
64 | I_m = np.eye(60)
65 | D = Q @ Q.T - I_m
66 | np.testing.assert_allclose(D.astype(float), 0, atol=4e-30)
67 | D = Q @ R - A
68 | np.testing.assert_allclose(D.astype(float), 0, atol=4e-30)
69 |
70 |
71 | def test_qr_pivot():
72 | A = np.vander(np.linspace(-1, 1, 60), 80).astype(ddouble)
73 | Q, R, piv = xprec.linalg.rrqr(A)
74 | I_m = np.eye(60)
75 | D = Q @ Q.T - I_m
76 | np.testing.assert_allclose(D.astype(float), 0, atol=4e-30)
77 |
78 | D = Q @ R - A[:,piv]
79 | np.testing.assert_allclose(D.astype(float), 0, atol=4e-30)
80 |
81 | Rdiag = np.abs(R.diagonal())
82 | assert (Rdiag[1:] <= Rdiag[:-1]).all()
83 |
84 |
85 | def test_jacobi():
86 | A = np.vander(np.linspace(-1, 1, 60), 80).astype(ddouble)
87 | U, s, VT = xprec.linalg.svd_trunc(A)
88 | np.testing.assert_allclose((U * s) @ VT - A, 0.0, atol=5e-30)
89 |
--------------------------------------------------------------------------------
/test/test_ufunc.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Markus Wallerberger and others
2 | # SPDX-License-Identifier: MIT
3 | import numpy as np
4 | import xprec
5 |
6 |
7 | def _compare_ufunc(ufunc, *args, ulps=1):
8 | fx_d = ufunc(*args)
9 | fx_q = ufunc(*(a.astype(xprec.ddouble) for a in args)).astype(float)
10 |
11 | # Ensure relative accuracy of 2 ulps
12 | np.testing.assert_array_almost_equal_nulp(fx_d, fx_q, ulps)
13 |
14 |
15 | def test_log():
16 | x = np.geomspace(1e-300, 1e300, 1953)
17 | _compare_ufunc(np.log, x)
18 |
19 | zeroq = xprec.ddouble.type(0)
20 | assert np.isinf(np.log(zeroq))
21 |
22 |
23 | def test_sqrt():
24 | x = np.geomspace(1e-300, 1e300, 1953)
25 | _compare_ufunc(np.sqrt, x)
26 |
27 |
28 | def test_exp():
29 | x = np.geomspace(1e-300, 700, 4953)
30 | x = np.hstack([-x[::-1], 0, x])
31 | _compare_ufunc(np.exp, x)
32 |
33 | # Unfortunately, on Windows expm1 is less precise, so we need to increase
34 | # the tolerance slightly
35 | _compare_ufunc(np.expm1, x, ulps=2)
36 |
37 |
38 | def test_cosh():
39 | x = np.geomspace(1e-300, 700, 4953)
40 | x = np.hstack([-x[::-1], 0, x])
41 | _compare_ufunc(np.cosh, x)
42 | _compare_ufunc(np.sinh, x)
43 |
44 | thousand = xprec.ddouble.type(1000)
45 | assert np.isinf(np.cosh(thousand))
46 | assert np.isinf(np.cosh(-thousand))
47 |
48 |
49 | def test_hypot():
50 | x = np.geomspace(1e-300, 1e260, 47)
51 | x = np.hstack([-x[::-1], 0, x])
52 | _compare_ufunc(np.hypot, x[:,None], x[None,:])
53 |
54 |
55 | def test_modf():
56 | ulps = 1
57 | x = np.linspace(-100, 100, 100)
58 | x_d = x.astype(xprec.ddouble)
59 |
60 | fx_d = np.modf(x)
61 | fx_q = np.modf(x_d)
62 |
63 | # Ensure relative accuracy of 1 ulp
64 | np.testing.assert_array_almost_equal_nulp(fx_d[0], fx_q[0].astype(float), ulps)
65 | np.testing.assert_array_almost_equal_nulp(fx_d[1], fx_q[1].astype(float), ulps)
66 |
67 |
68 | def test_power():
69 | x = np.linspace(0, 100, 100)
70 | _compare_ufunc(np.power, x[:,None], x[None,:])
71 |
72 |
73 | def test_arctan2():
74 | x = np.linspace(-100, 100, 100)
75 | _compare_ufunc(np.arctan2, x[:,None], x[None,:], ulps=2)
76 |
77 |
78 | def test_arcsin():
79 | x = np.linspace(-1, 1, 100)
80 | _compare_ufunc(np.arcsin, x, ulps=2)
81 |
82 |
83 | def test_arccos():
84 | x = np.linspace(-1, 1, 100)
85 | _compare_ufunc(np.arccos, x)
86 |
87 |
88 | def test_arctan():
89 | x = np.linspace(-100, 100, 100)
90 | _compare_ufunc(np.arctan, x)
91 |
92 |
93 | def test_arccosh():
94 | x = np.linspace(1, 100, 100)
95 | _compare_ufunc(np.arccosh, x)
96 |
97 |
98 | def test_arcsinh():
99 | x = np.linspace(-100, 100, 100)
100 | _compare_ufunc(np.arcsinh, x)
101 |
102 |
103 | def test_arctanh():
104 | x = np.linspace(-0.99, 0.99, 100)
105 | _compare_ufunc(np.arctanh, x)
106 |
--------------------------------------------------------------------------------
/test/test_mpmath.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Markus Wallerberger and others
2 | # SPDX-License-Identifier: MIT
3 | import numpy as np
4 | import pytest
5 |
6 | import xprec
7 |
8 | EPS = xprec.finfo(xprec.ddouble).eps
9 |
10 | try:
11 | import mpmath
12 | except ImportError:
13 | pytest.skip("No mpmath library avaiable", allow_module_level=True)
14 | else:
15 | mpmath.mp.prec = 120
16 |
17 |
18 | def mpf_for_xprec(x):
19 | """Converts xprec.ddouble array to array of mpmath mpf scalars"""
20 | x = np.asarray(x)
21 | if x.dtype != xprec.ddouble:
22 | raise ValueError("dtype shall be ddouble")
23 |
24 | x_flat = x.ravel()
25 | x_hi = x_flat.astype(float)
26 | x_lo = (x_flat - x_hi).astype(float)
27 | x_mpf = np.array(list(map(mpmath.mpf, x_hi)))
28 | x_mpf += x_lo
29 | return x_mpf.reshape(x.shape)
30 |
31 |
32 | def map_mpmath(fn, x):
33 | x = np.asarray(x)
34 | x_flat = x.ravel()
35 | y_flat = np.array(list(map(fn, x_flat)), dtype=object)
36 | y = y_flat.reshape(x.shape)
37 | return y
38 |
39 |
40 | def check_unary(mpmath_fn, numpy_fn, x, rtol):
41 | y_ref = map_mpmath(mpmath_fn, x)
42 | y_our = numpy_fn(x.astype(xprec.ddouble))
43 | y_float = y_ref.astype(float)
44 |
45 | diff = (y_ref - mpf_for_xprec(y_our)).astype(float)
46 | ok = np.abs(diff) <= rtol * np.abs(y_float)
47 | if not ok.all():
48 | x = x[~ok]
49 | y_float = y_float[~ok]
50 | y_our = y_our[~ok]
51 | diff = diff[~ok]
52 | reldiff = diff / np.abs(y_float)
53 |
54 | msg = f"{'x':>13s} {'mpmath':>13s} {'xprec':>13s} {'rel diff':>13s}\n"
55 | msg += "\n".join(f"{xi:13g} {y_refi:13g} {y_ouri:13g} {reldiffi:13g}"
56 | for xi, y_refi, y_ouri, reldiffi, _
57 | in zip(x, y_float, y_our, reldiff, range(10))
58 | )
59 | raise ValueError(f"not equal to rtol = {rtol:3g}\n" + msg)
60 |
61 |
62 |
63 | def test_sqrt():
64 | # Once the low part of the ddouble becomes a denormal number, we
65 | # are in trouble, so we truncate the lower end of the range by
66 | # another 16 digits
67 | x = np.geomspace(1e-292, 1e307, 1953)
68 | check_unary(mpmath.sqrt, np.sqrt, x, 2*EPS)
69 |
70 |
71 | def test_log():
72 | x = np.reciprocal(np.geomspace(1e-292, 1e307, 1953))
73 | check_unary(mpmath.log, np.log, x, 70 * EPS)
74 |
75 |
76 | def test_exp():
77 | x = np.geomspace(1e-280, 670, 1511)
78 | x = np.hstack([-x[::-1], 0, x])
79 | check_unary(mpmath.exp, np.exp, x, 60 * EPS)
80 | check_unary(mpmath.expm1, np.expm1, x, 60 * EPS)
81 |
82 | check_unary(mpmath.sinh, np.sinh, x, 60 * EPS)
83 | check_unary(mpmath.cosh, np.cosh, x, 60 * EPS)
84 | check_unary(mpmath.tanh, np.tanh, x, 60 * EPS)
85 |
86 |
87 | def test_sincos():
88 | x = np.geomspace(1e-280, 4.8 * np.pi, 1511)
89 | x = np.hstack([-x[::-1], 0, x])
90 | check_unary(mpmath.sin, np.sin, x, 2 * EPS)
91 | check_unary(mpmath.cos, np.cos, x, 2 * EPS)
92 |
--------------------------------------------------------------------------------
/csrc/dd_linalg.c:
--------------------------------------------------------------------------------
1 | /* Double-double linear algebra library
2 | *
3 | * Implementations were partly inspired by LAPACK, partly from Fredrik
4 | * Johansson's excellent MPMATH library.
5 | *
6 | * Copyright (C) 2021 Markus Wallerberger and others
7 | * SPDX-License-Identifier: MIT
8 | */
9 | #include "dd_linalg.h"
10 |
11 | // 2**500 and 2**(-500);
12 | static const double LARGE = 3.273390607896142e+150;
13 | static const double INV_LARGE = 3.054936363499605e-151;
14 |
15 | static ddouble normq_scaled(const ddouble *x, long nn, long sxn,
16 | double scaling)
17 | {
18 | ddouble sum = Q_ZERO;
19 | for (long n = 0; n < nn; ++n, x += sxn) {
20 | ddouble curr = mul_pwr2(*x, scaling);
21 | sum = addww(sum, sqrw(curr));
22 | };
23 | return mul_pwr2(sqrtw(sum), 1.0/scaling);
24 | }
25 |
26 | ddouble normw(const ddouble *x, long nn, long sxn)
27 | {
28 | ddouble sum = normq_scaled(x, nn, sxn, 1.0);
29 |
30 | // fall back to other routines in case of over/underflow
31 | if (sum.hi > LARGE)
32 | return normq_scaled(x, nn, sxn, INV_LARGE);
33 | else if (sum.hi < INV_LARGE)
34 | return normq_scaled(x, nn, sxn, LARGE);
35 | else
36 | return sum;
37 | }
38 |
39 | ddouble householderw(const ddouble *x, ddouble *v, long nn, long sx, long sv)
40 | {
41 | if (nn == 0)
42 | return Q_ZERO;
43 |
44 | ddouble norm_x = normw(x + sx, nn - 1, sx);
45 | if (iszerow(norm_x))
46 | return Q_ZERO;
47 |
48 | ddouble alpha = *x;
49 | ddouble beta = copysignww(hypotww(alpha, norm_x), alpha);
50 |
51 | ddouble diff = subww(beta, alpha);
52 | ddouble tau = divww(diff, beta);
53 | ddouble scale = reciprocalw(negw(diff));
54 |
55 | v[0] = Q_ONE;
56 | for (long n = 1; n != nn; ++n)
57 | v[n * sv] = mulww(scale, x[n * sx]);
58 | return tau;
59 | }
60 |
61 | void rank1updateq(ddouble *a, long ais, long ajs, const ddouble *v, long vs,
62 | const ddouble *w, long ws, long ii, long jj)
63 | {
64 | #pragma omp parallel for collapse(2)
65 | for (long i = 0; i < ii; ++i) {
66 | for (long j = 0; j < jj; ++j) {
67 | ddouble tmp = mulww(v[i * vs], w[j * ws]);
68 | a[i * ais + j * ajs] = addww(a[i * ais + j * ajs], tmp);
69 | }
70 | }
71 | }
72 |
73 | void givensw(ddouble f, ddouble g, ddouble *c, ddouble *s, ddouble *r)
74 | {
75 | /* ACM Trans. Math. Softw. 28(2), 206, Alg 1 */
76 | if (iszerow(g)) {
77 | *c = Q_ONE;
78 | *s = Q_ZERO;
79 | *r = f;
80 | } else if (iszerow(f)) {
81 | *c = Q_ZERO;
82 | *s = (ddouble) {signbitw(g), 0.0};
83 | *r = absw(g);
84 | } else {
85 | *r = copysignww(hypotww(f, g), f);
86 |
87 | /* This may come at a slight loss of precision, however, we should
88 | * not really have to care ...
89 | */
90 | ddouble inv_r = reciprocalw(*r);
91 | *c = mulww(f, inv_r);
92 | *s = mulww(g, inv_r);
93 | }
94 | }
95 |
96 | static void svd_tri2x2(
97 | ddouble f, ddouble g, ddouble h, ddouble *smin, ddouble *smax,
98 | ddouble *cv, ddouble *sv, ddouble *cu, ddouble *su)
99 | {
100 | ddouble fa = absw(f);
101 | ddouble ga = absw(g);
102 | ddouble ha = absw(h);
103 | bool compute_uv = cv != NULL;
104 |
105 | if (lessww(fa, ha)) {
106 | // switch h <-> f, cu <-> sv, cv <-> su
107 | svd_tri2x2(h, g, f, smin, smax, su, cu, sv, cv);
108 | return;
109 | }
110 | if (iszerow(ga)) {
111 | // already diagonal
112 | *smin = ha;
113 | *smax = fa;
114 | if (compute_uv) {
115 | *cu = Q_ONE;
116 | *su = Q_ZERO;
117 | *cv = Q_ONE;
118 | *sv = Q_ZERO;
119 | }
120 | return;
121 | }
122 | if (fa.hi < Q_EPS.hi * ga.hi) {
123 | // ga is very large
124 | *smax = ga;
125 | if (ha.hi > 1.0)
126 | *smin = divww(fa, divww(ga, ha));
127 | else
128 | *smin = mulww(divww(fa, ga), ha);
129 | if (compute_uv) {
130 | *cu = Q_ONE;
131 | *su = divww(h, g);
132 | *cv = Q_ONE;
133 | *sv = divww(f, g);
134 | }
135 | return;
136 | }
137 | // normal case
138 | ddouble fmh = subww(fa, ha);
139 | ddouble d = divww(fmh, fa);
140 | ddouble q = divww(g, f);
141 | ddouble s = subdw(2.0, d);
142 | ddouble spw = hypotww(q, s);
143 | ddouble dpw = hypotww(d, q);
144 | ddouble a = mul_pwr2(addww(spw, dpw), 0.5);
145 | *smin = absw(divww(ha, a));
146 | *smax = absw(mulww(fa, a));
147 |
148 | if (compute_uv) {
149 | ddouble tmp = addww(divww(q, addww(spw, s)),
150 | divww(q, addww(dpw, d)));
151 | tmp = mulww(tmp, adddw(1.0, a));
152 | ddouble tt = hypotwd(tmp, 2.0);
153 | *cv = divdw(2.0, tt);
154 | *sv = divww(tmp, tt);
155 | *cu = divww(addww(*cv, mulww(*sv, q)), a);
156 | *su = divww(mulww(divww(h, f), *sv), a);
157 | }
158 | }
159 |
160 | void svd_2x2(ddouble a11, ddouble a12, ddouble a21, ddouble a22, ddouble *smin,
161 | ddouble *smax, ddouble *cv, ddouble *sv, ddouble *cu, ddouble *su)
162 | {
163 | bool compute_uv = cv != NULL;
164 | if(iszerow(a21))
165 | return svd_tri2x2(a11, a12, a22, smin, smax, cv, sv, cu, su);
166 |
167 | /* First, we use a givens rotation Rx
168 | * [ cx sx ] [ a11 a12 ] = [ rx a12' ]
169 | * [ -sx cx ] [ a21 a22 ] [ 0 a22' ]
170 | */
171 | ddouble cx, sx, rx;
172 | givensw(a11, a21, &cx, &sx, &rx);
173 | a11 = rx;
174 | a21 = Q_ZERO;
175 | lmul_givensq(&a12, &a22, cx, sx, a12, a22);
176 |
177 | /* Next, use the triangular routine
178 | * [ f g ] = [ cu -su ] [ smax 0 ] [ cv sv ]
179 | * [ 0 h ] [ su cu ] [ 0 smin ] [ -sv cv ]
180 | */
181 | svd_tri2x2(a11, a12, a22, smin, smax, cv, sv, cu, su);
182 |
183 | /* Finally, update the LHS (U) transform as follows:
184 | * [ cx -sx ] [ cu -su ] = [ cu' -su' ]
185 | * [ sx cx ] [ su cu ] [ su' cu' ]
186 | */
187 | if (compute_uv)
188 | lmul_givensq(cu, su, cx, negw(sx), *cu, *su);
189 | }
190 |
191 | ddouble jacobi_sweep(ddouble *u, long sui, long suj, ddouble *vt, long svi,
192 | long svj, long ii, long jj)
193 | {
194 | ddouble _cu, _su, cv, sv, _smin, _smax;
195 | ddouble offd = Q_ZERO;
196 |
197 | if (ii < jj)
198 | return nanw();
199 |
200 | // Note that the inner loop only runs over the square portion!
201 | for (long i = 0; i < jj - 1; ++i) {
202 | for (long j = i + 1; j < jj; ++j) {
203 | // Construct the matrix to be diagonalized
204 | ddouble Hii = Q_ZERO, Hij = Q_ZERO, Hjj = Q_ZERO;
205 | for (long k = 0; k != ii; ++k) {
206 | ddouble u_ki = u[k * sui + i * suj];
207 | ddouble u_kj = u[k * sui + j * suj];
208 | Hii = addww(Hii, mulww(u_ki, u_ki));
209 | Hij = addww(Hij, mulww(u_ki, u_kj));
210 | Hjj = addww(Hjj, mulww(u_kj, u_kj));
211 | }
212 | offd = addww(offd, sqrw(Hij));
213 |
214 | // diagonalize
215 | svd_2x2(Hii, Hij, Hij, Hjj, &_smin, &_smax, &cv, &sv, &_cu, &_su);
216 |
217 | // apply rotation to VT
218 | for (long k = 0; k < jj; ++k) {
219 | ddouble *vt_ik = &vt[i * svi + k * svj];
220 | ddouble *vt_jk = &vt[j * svi + k * svj];
221 | lmul_givensq(vt_ik, vt_jk, cv, sv, *vt_ik, *vt_jk);
222 | }
223 |
224 | // apply transposed rotation to U
225 | for (long k = 0; k < ii; ++k) {
226 | ddouble *u_ki = &u[k * sui + i * suj];
227 | ddouble *u_kj = &u[k * sui + j * suj];
228 | lmul_givensq(u_ki, u_kj, cv, sv, *u_ki, *u_kj);
229 | }
230 | }
231 | }
232 | offd = sqrtw(offd);
233 | return offd;
234 | }
235 |
236 | static ddouble gk_shift(ddouble d1, ddouble e1, ddouble d2)
237 | {
238 | /* Get singular values of 2x2 triangular matrix formed from the lower
239 | * right corner in the array:
240 | *
241 | * [ d[ii-2] e[ii-2] ]
242 | * [ 0 d[ii-1] ]
243 | */
244 | ddouble smin, smax;
245 | svd_tri2x2(d1, e1, d2, &smin, &smax, NULL, NULL, NULL, NULL);
246 |
247 | ddouble smin_dist = absw(subww(smin, d2));
248 | ddouble smax_dist = absw(subww(smax, d2));
249 | return lessww(smin_dist, smax_dist) ? smin : smax;
250 | }
251 |
252 | void golub_kahan_chaseq(ddouble *d, long sd, ddouble *e, long se, long ii,
253 | ddouble *rot)
254 | {
255 | if (ii < 2)
256 | return;
257 |
258 | ddouble shift = gk_shift(d[(ii-2)*sd], e[(ii-2)*se], d[(ii-1)*sd]);
259 | ddouble g = e[0];
260 | ddouble f = addww(copysigndw(1.0, d[0]), divww(shift, d[0]));
261 | f = mulww(f, subww(absw(d[0]), shift));
262 |
263 | for (long i = 0; i < (ii - 1); ++i) {
264 | ddouble r, cosr, sinr;
265 | givensw(f, g, &cosr, &sinr, &r);
266 | if (i != 0)
267 | e[(i-1)*se] = r;
268 |
269 | lmul_givensq(&f, &e[i*se], cosr, sinr, d[i*sd], e[i*se]);
270 | lmul_givensq(&g, &d[(i+1)*sd], cosr, sinr, Q_ZERO, d[(i+1)*sd]);
271 | *(rot++) = cosr;
272 | *(rot++) = sinr;
273 |
274 | ddouble cosl, sinl;
275 | givensw(f, g, &cosl, &sinl, &r);
276 | d[i*sd] = r;
277 | lmul_givensq(&f, &d[(i+1)*sd], cosl, sinl, e[i*se], d[(i+1)*sd]);
278 | if (i < ii - 2) {
279 | lmul_givensq(&g, &e[(i+1)*se], cosl, sinl, Q_ZERO, e[(i+1)*se]);
280 | }
281 | *(rot++) = cosl;
282 | *(rot++) = sinl;
283 | }
284 | e[(ii-2)*se] = f;
285 | }
286 |
--------------------------------------------------------------------------------
/pysrc/xprec/linalg.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Markus Wallerberger and others
2 | # SPDX-License-Identifier: MIT
3 | #
4 | # Some of the code in this module is adapted from the LAPACK reference
5 | # implementation.
6 | import numpy as np
7 | from warnings import warn
8 |
9 | from . import ddouble
10 | from . import _dd_linalg
11 |
12 | norm = _dd_linalg.norm
13 | givens = _dd_linalg.givens
14 | householder = _dd_linalg.householder
15 | rank1update = _dd_linalg.rank1update
16 |
17 |
18 | def qr(A, reflectors=False):
19 | """QR decomposition without pivoting.
20 |
21 | Decomposes a `(m, n)` matrix `A` into the product:
22 |
23 | A == Q @ R
24 |
25 | where `Q` is an `(m, m)` orthogonal matrix and `R` is a `(m, n)` upper
26 | triangular matrix. No pivoting is used.
27 | """
28 | R = np.array(A)
29 | m, n = R.shape
30 | k = min(m, n)
31 |
32 | Q = np.zeros((k, m), A.dtype)
33 | for i in range(k):
34 | householder_update(R[i:,i:], Q[i:,i:])
35 | if not reflectors:
36 | I = np.eye(m, dtype=A.dtype)
37 | Q = householder_apply(Q, I)
38 | return Q, R
39 |
40 |
41 | def rrqr(A, tol=5e-32, reflectors=False):
42 | """Truncated rank-revealing QR decomposition with full column pivoting.
43 |
44 | Decomposes a `(m, n)` matrix `A` into the product:
45 |
46 | A[:,piv] == Q @ R
47 |
48 | where `Q` is an `(m, k)` isometric matrix, `R` is a `(k, n)` upper
49 | triangular matrix, `piv` is a permutation vector, and `k` is chosen such
50 | that the relative tolerance `tol` is met in the equality above.
51 | """
52 | R = np.array(A)
53 | m, n = R.shape
54 | k = min(m, n)
55 |
56 | Q = np.zeros((m, k), A.dtype)
57 | jpvt = np.arange(n)
58 | norms = norm(A.T)
59 | xnorms = norms.copy()
60 | TOL3Z = np.finfo(float).eps
61 | for i in range(k):
62 | pvt = i + np.argmax(norms[i:])
63 | if i != pvt:
64 | R[:,[i, pvt]] = R[:,[pvt, i]]
65 | jpvt[[i, pvt]] = jpvt[[pvt, i]]
66 | norms[pvt] = norms[i]
67 | xnorms[pvt] = xnorms[i]
68 |
69 | householder_update(R[i:,i:], Q[i:,i:])
70 |
71 | js = (i + 1) + norms[i + 1:].nonzero()[0]
72 | temp = np.abs(R[i,js]) / norms[js]
73 | temp = np.fmax(0.0, (1 + temp)*(1 - temp))
74 | temp2 = temp * np.square(norms[js] / xnorms[js])
75 |
76 | wheresmall = temp2 < TOL3Z
77 | jsmall = js[wheresmall]
78 | upd_norms = norm(R[i+1:,jsmall].T)
79 | norms[jsmall] = upd_norms
80 | xnorms[jsmall] = upd_norms
81 | jbig = js[~wheresmall]
82 | norms[jbig] *= np.sqrt(temp[~wheresmall])
83 |
84 | if tol is not None:
85 | acc = np.abs(R[i,i] / R[0,0])
86 | if acc < tol:
87 | k = i + 1
88 | Q = Q[:,:k]
89 | R = R[:k,:]
90 | break
91 |
92 | if not reflectors:
93 | I = np.eye(m, k, dtype=A.dtype)
94 | Q = householder_apply(Q, I)
95 | return Q, R, jpvt
96 |
97 |
98 | def svd(A, full_matrices=True):
99 | """Truncated singular value decomposition.
100 |
101 | Decomposes a `(m, n)` matrix `A` into the product:
102 |
103 | A == U @ (s[:,None] * VT)
104 |
105 | where `U` is a `(m, k)` matrix with orthogonal columns, `VT` is a `(k, n)`
106 | matrix with orthogonal rows and `s` are the singular values, a set of `k`
107 | nonnegative numbers in non-ascending order and `k = min(m, n)`.
108 | """
109 | A = np.asarray(A)
110 | m, n = A.shape
111 | if m < n:
112 | U, s, VT = svd(A.T, full_matrices)
113 | return VT.T, s, U.T
114 |
115 | Q, B, RT = bidiag(A)
116 | for _ in range(20 * n):
117 | if svd_bidiag_step(Q, B, RT):
118 | break
119 | else:
120 | warn("Did not converge")
121 |
122 | U, s, VH = svd_normalize(Q, B.diagonal(), RT)
123 | if not full_matrices:
124 | U = U[:,:n]
125 | return U, s, VH
126 |
127 |
128 | def svd_trunc(A, tol=5e-32, method='jacobi', max_iter=20):
129 | """Truncated singular value decomposition.
130 |
131 | Decomposes a `(m, n)` matrix `A` into the product:
132 |
133 | A == U @ (s[:,None] * VT)
134 |
135 | where `U` is a `(m, k)` matrix with orthogonal columns, `VT` is a `(k, n)`
136 | matrix with orthogonal rows and `s` are the singular values, a set of `k`
137 | nonnegative numbers in non-ascending order. The SVD is truncated in the
138 | sense that singular values below `tol` are discarded.
139 | """
140 | # RRQR is an excellent preconditioner for Jacobi. One should then perform
141 | # Jacobi on RT
142 | Q, R, p = rrqr(A, tol)
143 | if method == 'jacobi':
144 | U, s, VT = svd_jacobi(R.T, tol, max_iter)
145 | elif method == 'golub-kahan':
146 | U, s, VT = svd(R.T, full_matrices=False)
147 | else:
148 | raise ValueError("invalid method")
149 |
150 | # Reconstruct A from QRs
151 | U_A = Q @ VT.T
152 | VT_B = U.T[:, p.argsort()]
153 | return U_A, s, VT_B
154 |
155 |
156 | def bidiag(A, reflectors=False, force_structure=False):
157 | """Biadiagonalizes an arbitray rectangular matrix.
158 |
159 | Decomposes a `(m, n)` matrix `A` into the product:
160 |
161 | A == Q @ B @ RT
162 |
163 | where `Q` is a `(m, m)` orthogonal matrix, `RT` is a `(n, n)` orthogonal
164 | matrix, and `B` is a bidiagonal matrix, where the upper diagonal is
165 | nonzero for `m >= n` and the lower diagonal is nonzero for `m < n`.
166 | """
167 | A = np.asarray(A)
168 | m, n = A.shape
169 | if m < n:
170 | Q, B, RT = bidiag(A.T, reflectors)
171 | return RT.T, B.T, Q.T
172 |
173 | rq = n - (m == n)
174 | B = A.copy()
175 | Q = np.zeros_like(B)
176 | R = np.zeros_like(B[:n,:n])
177 |
178 | for j in range(n-2):
179 | householder_update(B[j:,j:], Q[j:,j:])
180 | householder_update(B[j:,j+1:].T, R[j+1:,j+1:])
181 | for j in range(n-2, rq):
182 | householder_update(B[j:,j:], Q[j:,j:])
183 |
184 | if force_structure:
185 | d = B.diagonal().copy()
186 | e = B.diagonal(1).copy()
187 | B[...] = 0
188 | i = np.arange(n)
189 | B[i, i] = d
190 | B[i[:-1], i[:-1]+1] = e
191 | if not reflectors:
192 | Q = householder_apply(Q, np.eye(m, dtype=B.dtype))
193 | R = householder_apply(R, np.eye(n, dtype=B.dtype))
194 | return Q, B, R.T
195 |
196 |
197 | def svd_jacobi(A, tol=5e-32, max_iter=20):
198 | """Singular value decomposition using Jacobi rotations."""
199 | U = A.copy()
200 | m, n = U.shape
201 | if m < n:
202 | raise RuntimeError("expecting tall matrix")
203 |
204 | VT = np.eye(n, dtype=U.dtype)
205 | offd = np.empty((), ddouble)
206 |
207 | limit = tol * np.linalg.norm(U[:n,:n], 'fro')
208 | for _ in range(max_iter):
209 | _dd_linalg.jacobi_sweep(U, VT, out=(U, VT, offd))
210 | if offd <= limit:
211 | break
212 | else:
213 | warn("Did not converge")
214 |
215 | s = norm(U.T)
216 | U = U / s
217 | return U, s, VT
218 |
219 |
220 | def householder_update(A, Q):
221 | """Reflects the zeroth column onto a multiple of the unit vector"""
222 | beta, v = householder(A[:,0])
223 | w = -beta * (A.T @ v)
224 | rank1update(A, v, w, out=A)
225 | Q[0,0] = beta
226 | Q[1:,0] = v[1:]
227 |
228 |
229 | def householder_apply(H, Q):
230 | """Applies a set of reflectors to a matrix"""
231 | H = np.asarray(H)
232 | Q = Q.copy()
233 | m, r = H.shape
234 | if Q.shape[0] != m:
235 | raise ValueError("invalid shape")
236 | if Q.shape[1] < r:
237 | raise ValueError("invalid shape")
238 | for j in range(r-1, -1, -1):
239 | beta = H[j,j]
240 | if np.equal(beta, 0):
241 | continue
242 | v = np.empty_like(H[j:,0])
243 | v[0] = 1
244 | v[1:] = H[j+1:,j]
245 | Qpart = Q[j:,j:]
246 | w = -beta * (Qpart.T @ v)
247 | rank1update(Qpart, v, w, out=Qpart)
248 | return Q
249 |
250 |
251 | def svd_normalize(U, d, VH):
252 | """Given a SVD-like decomposition, normalize"""
253 | # Invert
254 | n = d.size
255 | VH[np.signbit(d)] = -VH[np.signbit(d)]
256 | d = np.abs(d)
257 |
258 | # Sort
259 | order = np.argsort(d)[::-1]
260 | d = d[order]
261 | VH = VH[order]
262 | U = U.copy()
263 | U[:,:n] = U[:,order]
264 | return U, d, VH
265 |
266 |
267 | def svd_bidiag_step(Q, B, RT):
268 | """Single SVD step for a bidiagonal matrix"""
269 | d = B.diagonal().copy()
270 | e = np.hstack([B.diagonal(1), 0.0])
271 |
272 | p, q = bidiag_partition(d, e)
273 | if q <= 1:
274 | return True
275 |
276 | d_part = d[p:q]
277 | e_part = e[p:q]
278 | rot = np.empty((d_part.size, 4), d.dtype)
279 | _dd_linalg.golub_kahan_chase(d_part, e_part, out=(d_part, e_part, rot))
280 |
281 | i = np.arange(p, q)
282 | B[i, i] = d_part
283 | B[i[:-1], i[:-1]+1] = e_part[:-1]
284 |
285 | rot_Q = rot[:, 2:]
286 | rot_R = rot[:, :2]
287 | QT_part = Q[:, p:q].T
288 | RT_part = RT[p:q, :]
289 | _dd_linalg.givens_seq(rot_Q, QT_part, out=QT_part)
290 | _dd_linalg.givens_seq(rot_R, RT_part, out=RT_part)
291 | return False
292 |
293 |
294 | def bidiag_partition(d, e, eps=5e-32):
295 | """Partition bidiagonal matrix into blocks for implicit QR.
296 |
297 | Return `p,q` which partions a bidiagonal `B` matrix into three blocks:
298 |
299 | - B[0:p, 0:p], an arbitrary bidiaonal matrix
300 | - B[p:q, p:q], a matrix with all off-diagonal elements nonzero
301 | - B[q:, q:], a diagonal matrix
302 | """
303 | abs_e = np.abs(e)
304 | abs_d = np.abs(d)
305 | e_zero = abs_e <= eps * (abs_d + abs_e)
306 | e[e_zero] = 0
307 |
308 | q = _find_last(~e_zero) + 1
309 | if q <= 0:
310 | return 0, 0
311 | p = _find_last(e_zero[:q]) + 1
312 | return p, q + 1
313 |
314 |
315 | def _find_last(a, axis=-1):
316 | a = a.astype(bool)
317 | maxloc = a.shape[axis] - 1 - a[::-1].argmax(axis)
318 | return np.where(a[maxloc], maxloc, -1)
319 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Setup script - embracing the setuptools madness.
2 | #
3 | # Copyright (C) 2021 Markus Wallerberger and others
4 | # SPDX-License-Identifier: MIT
5 | import io
6 | import os.path
7 | import os
8 | import platform
9 | import re
10 |
11 | from setuptools import setup, find_packages
12 | from setuptools.extension import Extension
13 | from setuptools.command.build_ext import build_ext as BuildExt
14 |
15 |
16 | def readfile(*parts):
17 | """Return contents of file with path relative to script directory"""
18 | herepath = os.path.abspath(os.path.dirname(__file__))
19 | fullpath = os.path.join(herepath, *parts)
20 | with io.open(fullpath, 'r') as f:
21 | return f.read()
22 |
23 |
24 | def extract_version(*parts):
25 | """Extract value of __version__ variable by parsing python script"""
26 | initfile = readfile(*parts)
27 | version_re = re.compile(r"(?m)^__version__\s*=\s*['\"]([^'\"]*)['\"]")
28 | match = version_re.search(initfile)
29 | return match.group(1)
30 |
31 |
32 | def rebase_links(text, base_url):
33 | """Rebase links to doc/ directory to ensure they work online."""
34 | doclink_re = re.compile(
35 | r"(?m)^\s*\[\s*([^\]\n\r]+)\s*\]:\s*(doc/[./\w]+)\s*$")
36 | result, nsub = doclink_re.subn(r"[\1]: %s/\2" % base_url, text)
37 | return result
38 |
39 |
40 | def append_if_absent(list, arg):
41 | """Append argument to list if absent"""
42 | if arg not in list:
43 | list.append(arg)
44 |
45 |
46 | def get_flags_dict(exec):
47 | # First, let us clean up the mess of compiler options a little bit: Move
48 | # flags out into a dictionary, thereby removing the myriad of duplicates
49 | cc_so, *cflags_so_list = exec
50 | cflags_curr = None
51 | cflags_so = {}
52 | for arg in cflags_so_list:
53 | if arg.startswith("-"):
54 | if cflags_curr is not None:
55 | cflags_so[cflags_curr] = None
56 | cflags_curr = None
57 | arg = arg.split("=", 1)
58 | if len(arg) == 1:
59 | cflags_curr, = arg
60 | else:
61 | k, v = arg
62 | cflags_so[k] = v
63 | else:
64 | if cflags_curr is None:
65 | raise ValueError("expected flag" + str(exec))
66 | cflags_so[cflags_curr] = arg
67 | cflags_curr = None
68 | if cflags_curr is not None:
69 | cflags_so[cflags_curr] = None
70 |
71 | return cc_so, cflags_so
72 |
73 |
74 | def make_exec_string(cc_so, cflags_so):
75 | # Now update the flags
76 | cflags_so = [k + ("=" + v if v is not None else "")
77 | for (k,v) in cflags_so.items()]
78 | return [cc_so] + cflags_so
79 |
80 |
81 | class OptionsMixin:
82 | _convert_to_bool = {"true": True, "false": False}
83 | user_options = [
84 | ("with-openmp=", None, "use openmp to build (default: false)"),
85 | ("opt-arch=", None, "optimized for architecture"),
86 | ("numpy-include-dir=", None, "numpy include directory"),
87 | ]
88 |
89 | def initialize_options(self):
90 | super().initialize_options()
91 | self.with_openmp = None
92 | self.numpy_include_dir = None
93 | self.opt_arch = None
94 |
95 | def finalize_options(self):
96 | if self.with_openmp is not None:
97 | self.with_openmp = self._convert_to_bool[self.with_openmp.lower()]
98 | if self.opt_arch is not None:
99 | self.opt_arch = self._convert_to_bool[self.opt_arch.lower()]
100 | if self.numpy_include_dir is not None:
101 | if not os.path.isdir(self.numpy_include_dir):
102 | raise ValueError("include directory must exist")
103 | super().finalize_options()
104 |
105 |
106 | class BuildExtWithNumpy(OptionsMixin, BuildExt):
107 | """Wrapper class for building numpy extensions"""
108 | user_options = BuildExt.user_options + OptionsMixin.user_options
109 |
110 | def build_extensions(self):
111 | """Modify paths according to options"""
112 | # This must be deferred to build time, because that is when
113 | # self.compiler starts being a compiler instance (before, it is
114 | # a flag) *slow-clap*
115 | # compiler type is either 'unix', 'msvc' or 'mingw'
116 | compiler_type = self.compiler.compiler_type
117 |
118 | compiler_binary = getattr(self.compiler, 'compiler', [''])[0]
119 | compiler_binary = os.path.basename(compiler_binary)
120 | compiler_make = ''
121 | if 'gcc' in compiler_binary or 'g++' in compiler_binary:
122 | compiler_make = 'gcc'
123 | elif 'clang' in compiler_binary:
124 | compiler_make = 'clang'
125 | elif 'icc' in compiler_binary:
126 | compiler_make = 'icc'
127 | elif compiler_type == 'msvc':
128 | # See msvccompiler.py:206 - a comment worth reading in its
129 | # entirety. distutils sets up an abstraction which it immediately
130 | # break with its own derived classes. *slow-clap*
131 | compiler_make = 'msvc'
132 |
133 | if compiler_type != 'msvc':
134 | new_flags = {"-Wextra": None, "-std": "c11"}
135 | # By default, we do not optimize for the architecture by default,
136 | # because this is harmful when building a binary package.
137 | if self.opt_arch:
138 | new_flags["-mtune"] = new_flags["-march"] = "native"
139 |
140 | cc_so, flags_dict = get_flags_dict(self.compiler.compiler_so)
141 |
142 | # Replace arch with march
143 | if "-arch" in flags_dict:
144 | flags_dict["-march"] = flags_dict.pop("-arch")
145 |
146 | # Remove any existing -mtune, -march, -arch flags if not self.opt_arch
147 | if not self.opt_arch:
148 | for key in ["-mtune", "-march", "-arch"]:
149 | if key in flags_dict:
150 | del flags_dict[key]
151 |
152 | flags_dict.update(new_flags)
153 | self.compiler.compiler_so = make_exec_string(cc_so, flags_dict)
154 |
155 | # clang on 14.4.1 fails to include C header files...
156 | if platform.system() == 'Darwin':
157 | sdk_path = (
158 | "/Applications/Xcode.app/Contents/Developer/Platforms/"
159 | "MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include"
160 | )
161 | current_cpath = os.environ.get('CPATH', '')
162 | os.environ['CPATH'] = f"{sdk_path}:{current_cpath}"
163 |
164 | # This has to be set to false because MacOS does not ship openmp
165 | # by default.
166 | if self.with_openmp is None:
167 | self.with_openmp = platform.system() == 'Linux'
168 |
169 | # Numpy headers: numpy must be imported here rather than
170 | # globally, because otherwise it may not be available at the time
171 | # when the setup script is run. *slow-cl ... ah, f*ck it.
172 | if self.numpy_include_dir is None:
173 | import numpy
174 | self.numpy_include_dir = numpy.get_include()
175 |
176 | for ext in self.extensions:
177 | append_if_absent(ext.include_dirs, self.numpy_include_dir)
178 | if self.with_openmp:
179 | append_if_absent(ext.extra_compile_args, '-fopenmp')
180 | append_if_absent(ext.extra_link_args, '-fopenmp')
181 | if compiler_make == 'clang':
182 | append_if_absent(ext.extra_link_args, '-lomp')
183 |
184 | super().build_extensions()
185 |
186 | def get_source_files(self):
187 | """Return list of files to include in source dist"""
188 | # Specifying include_dirs= argument in Extension adds headers from that
189 | # directory to the sdist ... on some machines. On others, not. Note
190 | # that overriding sdist will not save you, since this is not called
191 | # from sdist.add_defaults(), as you might expect. (With setuptools, it
192 | # is never what you expect.) Instead, sdist requires egg_info, which
193 | # hooks into a hidden manifest_maker class derived from sdist, where
194 | # add_defaults() called, the list passed back to sdist, sidestepping
195 | # the method in the orginal class. Kudos.
196 | #
197 | # Really, if you have monkeys type out 1000 pages on typewriters, use
198 | # the result as toilet paper for a month, unfold it, scan it at 20 dpi,
199 | # and run it through text recognition software, it would still yield
200 | # better code than setuptools.
201 | source_files = super().get_source_files()
202 | header_regex = re.compile(r"\.(?:h|hh|hpp|hxx|H|HH|HPP|HXX)$")
203 |
204 | include_dirs = set()
205 | for ext in self.extensions:
206 | include_dirs.update(ext.include_dirs)
207 | for dir in include_dirs:
208 | for entry in os.scandir(dir):
209 | if not entry.is_file():
210 | continue
211 | if not header_regex.search(entry.name):
212 | continue
213 | source_files.append(entry.path)
214 |
215 | return source_files
216 |
217 |
218 | VERSION = extract_version('pysrc', 'xprec', '__init__.py')
219 | REPO_URL = "https://github.com/tuwien-cms/xprec"
220 | DOCTREE_URL = "%s/tree/v%s" % (REPO_URL, VERSION)
221 | LONG_DESCRIPTION = rebase_links(readfile('README.md'), DOCTREE_URL)
222 |
223 | setup(
224 | name='xprec',
225 | version=VERSION,
226 |
227 | description='xprec precision numpy extension',
228 | long_description=LONG_DESCRIPTION,
229 | long_description_content_type='text/markdown',
230 | keywords=' '.join([
231 | 'double-double'
232 | ]),
233 | classifiers=[
234 | 'Development Status :: 5 - Production/Stable',
235 | 'Intended Audience :: Science/Research',
236 | 'Intended Audience :: Developers',
237 | 'Programming Language :: Python :: 3',
238 | 'License :: OSI Approved :: MIT License',
239 | ],
240 |
241 | url=REPO_URL,
242 | author=', '.join([
243 | 'Markus Wallerberger'
244 | ]),
245 | author_email='markus.wallerberger@tuwien.ac.at',
246 |
247 | python_requires='>=3',
248 | install_requires=[
249 | # we need matmul to be an ufunc -> 1.16
250 | 'numpy>=1.16',
251 | ],
252 | extras_require={
253 | 'test': ['pytest', 'mpmath'],
254 | },
255 |
256 | ext_modules=[
257 | Extension("xprec._dd_ufunc",
258 | ["csrc/_dd_ufunc.c", "csrc/dd_arith.c"],
259 | include_dirs=["csrc"]),
260 | Extension("xprec._dd_linalg",
261 | ["csrc/_dd_linalg.c", "csrc/dd_arith.c", "csrc/dd_linalg.c"],
262 | include_dirs=["csrc"]),
263 | ],
264 | setup_requires=[
265 | 'numpy>=1.16'
266 | ],
267 | cmdclass={
268 | 'build_ext': BuildExtWithNumpy
269 | },
270 |
271 | package_dir={'': 'pysrc'},
272 | packages=find_packages(where='pysrc'),
273 | )
274 |
--------------------------------------------------------------------------------
/csrc/dd_arith.h:
--------------------------------------------------------------------------------
1 | /* Double-double arithmetic library
2 | *
3 | * Part of the functions are modified from the QD library for U.C. Berkeley
4 | * and licensed under a modified BSD license (see QD-LICENSE.txt)
5 | *
6 | * Some of the algorithms were updated according to the findings in
7 | * M. Joldes, et al., ACM Trans. Math. Softw. 44, 1-27 (2018)
8 | * (Algorithm numbers in the code)
9 | *
10 | * Copyright (C) 2012 Yozo Hida, Xiaoye S. Li, David H. Bailey
11 | * Copyright (C) 2021 Markus Wallerberger and others
12 | * SPDX-License-Identifier: MIT and Modified-BSD
13 | */
14 | #pragma once
15 | #include
16 | #include
17 | #include
18 |
19 | /**
20 | * Type for double-double calculations
21 | */
22 | typedef struct {
23 | double hi;
24 | double lo;
25 | } ddouble;
26 |
27 | static inline ddouble two_sum_quick(double a, double b)
28 | {
29 | double s = a + b;
30 | double lo = b - (s - a);
31 | return (ddouble){.hi = s, .lo = lo};
32 | }
33 |
34 | static inline ddouble two_sum(double a, double b)
35 | {
36 | double s = a + b;
37 | double v = s - a;
38 | double lo = (a - (s - v)) + (b - v);
39 | return (ddouble){.hi = s, .lo = lo};
40 | }
41 |
42 | static inline ddouble two_diff(double a, double b)
43 | {
44 | double s = a - b;
45 | double v = s - a;
46 | double lo = (a - (s - v)) - (b + v);
47 | return (ddouble){.hi = s, .lo = lo};
48 | }
49 |
50 | static inline ddouble two_prod(double a, double b)
51 | {
52 | double s = a * b;
53 | double lo = fma(a, b, -s);
54 | return (ddouble){.hi = s, .lo = lo};
55 | }
56 |
57 | /* -------------------- Combining quad/double ------------------------ */
58 |
59 | static inline ddouble addwd(ddouble x, double y)
60 | {
61 | ddouble s = two_sum(x.hi, y);
62 | double v = x.lo + s.lo;
63 | return two_sum_quick(s.hi, v);
64 | }
65 |
66 | static inline ddouble subwd(ddouble x, double y)
67 | {
68 | ddouble s = two_diff(x.hi, y);
69 | double v = x.lo + s.lo;
70 | return two_sum_quick(s.hi, v);
71 | }
72 |
73 | static inline ddouble mulwd(ddouble x, double y)
74 | {
75 | ddouble c = two_prod(x.hi, y);
76 | double v = fma(x.lo, y, c.lo);
77 | return two_sum_quick(c.hi, v);
78 | }
79 |
80 | static inline ddouble divwd(ddouble x, double y)
81 | {
82 | /* Alg 14 */
83 | double t_hi = x.hi / y;
84 | ddouble pi = two_prod(t_hi, y);
85 | double d_hi = x.hi - pi.hi;
86 | double d_lo = x.lo - pi.lo;
87 | double t_lo = (d_hi + d_lo) / y;
88 | return two_sum_quick(t_hi, t_lo);
89 | }
90 |
91 | /* -------------------- Combining double/quad ------------------------- */
92 |
93 | static inline ddouble negw(ddouble);
94 | static inline ddouble reciprocalw(ddouble);
95 |
96 | static inline ddouble adddw(double x, ddouble y)
97 | {
98 | return addwd(y, x);
99 | }
100 |
101 | static inline ddouble subdw(double x, ddouble y)
102 | {
103 | /* TODO: Probably not ideal */
104 | return addwd(negw(y), x);
105 | }
106 |
107 | static inline ddouble muldw(double x, ddouble y)
108 | {
109 | return mulwd(y, x);
110 | }
111 |
112 | static inline ddouble divdw(double x, ddouble y)
113 | {
114 | /* TODO: Probably not ideal */
115 | return mulwd(reciprocalw(y), x);
116 | }
117 |
118 | static inline ddouble mul_pwr2(ddouble a, double b) {
119 | return (ddouble){a.hi * b, a.lo * b};
120 | }
121 |
122 | /* -------------------- Combining quad/quad ------------------------- */
123 |
124 | static inline ddouble addww(ddouble x, ddouble y)
125 | {
126 | ddouble s = two_sum(x.hi, y.hi);
127 | ddouble t = two_sum(x.lo, y.lo);
128 | ddouble v = two_sum_quick(s.hi, s.lo + t.hi);
129 | ddouble z = two_sum_quick(v.hi, t.lo + v.lo);
130 | return z;
131 | }
132 |
133 | static inline ddouble subww(ddouble x, ddouble y)
134 | {
135 | ddouble s = two_diff(x.hi, y.hi);
136 | ddouble t = two_diff(x.lo, y.lo);
137 | ddouble v = two_sum_quick(s.hi, s.lo + t.hi);
138 | ddouble z = two_sum_quick(v.hi, t.lo + v.lo);
139 | return z;
140 | }
141 |
142 | static inline ddouble mulww(ddouble a, ddouble b)
143 | {
144 | /* Alg 11 */
145 | ddouble c = two_prod(a.hi, b.hi);
146 | double t = a.hi * b.lo;
147 | t = fma(a.lo, b.hi, t);
148 | return two_sum_quick(c.hi, c.lo + t);
149 | }
150 |
151 | static inline ddouble divww(ddouble x, ddouble y)
152 | {
153 | /* Alg 17 */
154 | double t_hi = x.hi / y.hi;
155 | ddouble r = mulwd(y, t_hi);
156 | double pi_hi = x.hi - r.hi;
157 | double d = pi_hi + (x.lo - r.lo);
158 | double t_lo = d / y.hi;
159 | return two_sum_quick(t_hi, t_lo);
160 | }
161 |
162 | /* -------------------- Unary functions ------------------------- */
163 |
164 | static inline ddouble negw(ddouble a)
165 | {
166 | return (ddouble){-a.hi, -a.lo};
167 | }
168 |
169 | static inline ddouble posw(ddouble a)
170 | {
171 | return (ddouble){-a.hi, -a.lo};
172 | }
173 |
174 | static inline ddouble absw(ddouble a)
175 | {
176 | return signbit(a.hi) ? negw(a) : a;
177 | }
178 |
179 | static inline ddouble reciprocalw(ddouble y)
180 | {
181 | /* Alg 17 with x = 1 */
182 | double t_hi = 1.0 / y.hi;
183 | ddouble r = mulwd(y, t_hi);
184 | double pi_hi = 1.0 - r.hi;
185 | double d = pi_hi - r.lo;
186 | double t_lo = d / y.hi;
187 | return two_sum_quick(t_hi, t_lo);
188 | }
189 |
190 | static inline ddouble sqrw(ddouble a)
191 | {
192 | /* Alg 11 */
193 | ddouble c = two_prod(a.hi, a.hi);
194 | double t = 2 * a.hi * a.lo;
195 | return two_sum_quick(c.hi, c.lo + t);
196 | }
197 |
198 | static inline ddouble roundw(ddouble a)
199 | {
200 | double hi = round(a.hi);
201 | double lo;
202 |
203 | if (hi == a.hi) {
204 | /* High word is an integer already. Round the low word.*/
205 | lo = round(a.lo);
206 |
207 | /* Renormalize. This is needed if x[0] = some integer, x[1] = 1/2.*/
208 | return two_sum_quick(hi, lo);
209 | } else {
210 | /* High word is not an integer. */
211 | lo = 0.0;
212 | if (fabs(hi - a.hi) == 0.5 && a.lo < 0.0) {
213 | /* There is a tie in the high word, consult the low word
214 | * to break the tie.
215 | * NOTE: This does not cause INEXACT.
216 | */
217 | hi -= 1.0;
218 | }
219 | return (ddouble){hi, lo};
220 | }
221 | }
222 |
223 | static inline ddouble floorw(ddouble a)
224 | {
225 | double hi = floor(a.hi);
226 | double lo = 0.0;
227 |
228 | if (hi == a.hi) {
229 | /* High word is integer already. Round the low word. */
230 | lo = floor(a.lo);
231 | return two_sum_quick(hi, lo);
232 | }
233 | return (ddouble){hi, lo};
234 | }
235 |
236 | static inline ddouble ceilw(ddouble a)
237 | {
238 | double hi = ceil(a.hi);
239 | double lo = 0.0;
240 |
241 | if (hi == a.hi) {
242 | /* High word is integer already. Round the low word. */
243 | lo = ceil(a.lo);
244 | return two_sum_quick(hi, lo);
245 | }
246 | return (ddouble){hi, lo};
247 | }
248 |
249 | static inline bool signbitw(ddouble x)
250 | {
251 | return signbit(x.hi);
252 | }
253 |
254 | static inline ddouble copysignww(ddouble x, ddouble y)
255 | {
256 | /* The sign is determined by the hi part, however, the sign of hi and lo
257 | * need not be the same, so we cannot merely broadcast copysign to both
258 | * parts.
259 | */
260 | return signbitw(x) != signbitw(y) ? negw(x) : x;
261 | }
262 |
263 | static inline ddouble copysignwd(ddouble x, double y)
264 | {
265 | return signbitw(x) != signbit(y) ? negw(x) : x;
266 | }
267 |
268 | static inline ddouble copysigndw(double x, ddouble y)
269 | {
270 | /* It is less surprising to return a ddouble here */
271 | double res = copysign(x, y.hi);
272 | return (ddouble) {res, 0.0};
273 | }
274 |
275 | static inline bool iszerow(ddouble x);
276 |
277 | static inline ddouble signw(ddouble x)
278 | {
279 | /* The numpy sign function does not respect signed zeros. We do. */
280 | if (iszerow(x))
281 | return x;
282 | return copysigndw(1.0, x);
283 | }
284 |
285 | /******************************** Constants *********************************/
286 |
287 | static inline ddouble nanw()
288 | {
289 | double nan = strtod("NaN", NULL);
290 | return (ddouble){nan, nan};
291 | }
292 |
293 | static inline ddouble infw()
294 | {
295 | double inf = strtod("Inf", NULL);
296 | return (ddouble){inf, inf};
297 | }
298 |
299 | static const ddouble Q_ZERO = {0.0, 0.0};
300 | static const ddouble Q_ONE = {1.0, 0.0};
301 | static const ddouble Q_2PI = {6.283185307179586232e+00, 2.449293598294706414e-16};
302 | static const ddouble Q_PI = {3.141592653589793116e+00, 1.224646799147353207e-16};
303 | static const ddouble Q_PI_2 = {1.570796326794896558e+00, 6.123233995736766036e-17};
304 | static const ddouble Q_PI_4 = {7.853981633974482790e-01, 3.061616997868383018e-17};
305 | static const ddouble Q_3PI_4 = {2.356194490192344837e+00, 9.1848509936051484375e-17};
306 | static const ddouble Q_PI_16 = {1.963495408493620697e-01, 7.654042494670957545e-18};
307 | static const ddouble Q_E = {2.718281828459045091e+00, 1.445646891729250158e-16};
308 | static const ddouble Q_LOG2 = {6.931471805599452862e-01, 2.319046813846299558e-17};
309 | static const ddouble Q_LOG10 = {2.302585092994045901e+00, -2.170756223382249351e-16};
310 |
311 | static const ddouble Q_EPS = {4.93038065763132e-32, 0.0};
312 | static const ddouble Q_MIN = {2.0041683600089728e-292, 0.0};
313 | static const ddouble Q_MAX = {1.79769313486231570815e+308, 0.0};
314 | static const ddouble Q_TINY = {2.2250738585072014e-308, 0.0};
315 |
316 |
317 | static inline bool isfinitew(ddouble x)
318 | {
319 | return isfinite(x.hi);
320 | }
321 |
322 | static inline bool isinfw(ddouble x)
323 | {
324 | return isinf(x.hi);
325 | }
326 |
327 | static inline bool isnanw(ddouble x)
328 | {
329 | return isnan(x.hi);
330 | }
331 |
332 | /*********************** Comparisons q/q ***************************/
333 |
334 | static inline bool equalww(ddouble a, ddouble b)
335 | {
336 | return a.hi == b.hi && a.lo == b.lo;
337 | }
338 |
339 | static inline bool notequalww(ddouble a, ddouble b)
340 | {
341 | return a.hi != b.hi || a.lo != b.lo;
342 | }
343 |
344 | static inline bool greaterww(ddouble a, ddouble b)
345 | {
346 | return a.hi > b.hi || (a.hi == b.hi && a.lo > b.lo);
347 | }
348 |
349 | static inline bool lessww(ddouble a, ddouble b)
350 | {
351 | return a.hi < b.hi || (a.hi == b.hi && a.lo < b.lo);
352 | }
353 |
354 | static inline bool greaterequalww(ddouble a, ddouble b)
355 | {
356 | return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo);
357 | }
358 |
359 | static inline bool lessequalww(ddouble a, ddouble b)
360 | {
361 | return a.hi < b.hi || (a.hi == b.hi && a.lo <= b.lo);
362 | }
363 |
364 | /*********************** Comparisons q/d ***************************/
365 |
366 | static inline bool equalwd(ddouble a, double b)
367 | {
368 | return equalww(a, (ddouble){b, 0});
369 | }
370 |
371 | static inline bool notequalwd(ddouble a, double b)
372 | {
373 | return notequalww(a, (ddouble){b, 0});
374 | }
375 |
376 | static inline bool greaterwd(ddouble a, double b)
377 | {
378 | return greaterww(a, (ddouble){b, 0});
379 | }
380 |
381 | static inline bool lesswd(ddouble a, double b)
382 | {
383 | return lessww(a, (ddouble){b, 0});
384 | }
385 |
386 | static inline bool greaterequalwd(ddouble a, double b)
387 | {
388 | return greaterequalww(a, (ddouble){b, 0});
389 | }
390 |
391 | static inline bool lessequalwd(ddouble a, double b)
392 | {
393 | return lessequalww(a, (ddouble){b, 0});
394 | }
395 |
396 | /*********************** Comparisons d/q ***************************/
397 |
398 | static inline bool equaldw(double a, ddouble b)
399 | {
400 | return equalww((ddouble){a, 0}, b);
401 | }
402 |
403 | static inline bool notequaldw(double a, ddouble b)
404 | {
405 | return notequalww((ddouble){a, 0}, b);
406 | }
407 |
408 | static inline bool greaterdw(double a, ddouble b)
409 | {
410 | return greaterww((ddouble){a, 0}, b);
411 | }
412 |
413 | static inline bool lessdw(double a, ddouble b)
414 | {
415 | return lessww((ddouble){a, 0}, b);
416 | }
417 |
418 | static inline bool greaterequaldw(double a, ddouble b)
419 | {
420 | return greaterequalww((ddouble){a, 0}, b);
421 | }
422 |
423 | static inline bool lessequaldw(double a, ddouble b)
424 | {
425 | return lessequalww((ddouble){a, 0}, b);
426 | }
427 |
428 | /************************ Minimum/maximum ************************/
429 |
430 | static inline ddouble fminww(ddouble a, ddouble b)
431 | {
432 | return lessww(a, b) ? a : b;
433 | }
434 |
435 | static inline ddouble fmaxww(ddouble a, ddouble b)
436 | {
437 | return greaterww(a, b) ? a : b;
438 | }
439 |
440 | static inline ddouble fminwd(ddouble a, double b)
441 | {
442 | return lesswd(a, b) ? a : (ddouble) {b, 0};
443 | }
444 |
445 | static inline ddouble fmaxwd(ddouble a, double b)
446 | {
447 | return greaterwd(a, b) ? a : (ddouble) {b, 0};
448 | }
449 |
450 | static inline ddouble fmindw(double a, ddouble b)
451 | {
452 | return lessdw(a, b) ? (ddouble) {a, 0} : b;
453 | }
454 |
455 | static inline ddouble fmaxdw(double a, ddouble b)
456 | {
457 | return greaterdw(a, b) ? (ddouble) {a, 0} : b;
458 | }
459 |
460 | /************************** Unary tests **************************/
461 |
462 | static inline bool iszerow(ddouble x)
463 | {
464 | return x.hi == 0.0;
465 | }
466 |
467 | static inline bool isonew(ddouble x)
468 | {
469 | return x.hi == 1.0 && x.lo == 0.0;
470 | }
471 |
472 | static inline bool ispositivew(ddouble x)
473 | {
474 | return x.hi > 0.0;
475 | }
476 |
477 | static inline bool isnegativew(ddouble x)
478 | {
479 | return x.hi < 0.0;
480 | }
481 |
482 | /************************** Advanced math functions ********************/
483 |
484 | ddouble sqrtw(ddouble a);
485 |
486 | static inline ddouble ldexpw(ddouble a, int exp)
487 | {
488 | return (ddouble) {ldexp(a.hi, exp), ldexp(a.lo, exp)};
489 | }
490 |
491 | /************************* Binary functions ************************/
492 |
493 | ddouble _hypotqq_ordered(ddouble x, ddouble y);
494 |
495 | static inline ddouble hypotww(ddouble x, ddouble y)
496 | {
497 | x = absw(x);
498 | y = absw(y);
499 | if (x.hi < y.hi)
500 | return _hypotqq_ordered(y, x);
501 | else
502 | return _hypotqq_ordered(x, y);
503 | }
504 |
505 | static inline ddouble hypotdw(double x, ddouble y)
506 | {
507 | return hypotww((ddouble){x, 0}, y);
508 | }
509 |
510 | static inline ddouble hypotwd(ddouble x, double y)
511 | {
512 | return hypotww(x, (ddouble){y, 0});
513 | }
514 |
515 | /* Computes the nearest integer to d. */
516 | static inline ddouble nintw(ddouble d) {
517 | if (equalww(d, floorw(d))) {
518 | return d;
519 | }
520 | return floorw(addww(d, (ddouble){0.5, 0}));
521 | }
522 |
523 | ddouble expw(ddouble a);
524 | ddouble expm1w(ddouble a);
525 | ddouble ldexpwi(ddouble a, int m);
526 | ddouble logw(ddouble a);
527 | ddouble sinw(ddouble a);
528 | ddouble cosw(ddouble a);
529 | ddouble tanw(ddouble a);
530 | ddouble sinhw(ddouble a);
531 | ddouble coshw(ddouble a);
532 | ddouble tanhw(ddouble a);
533 | ddouble atanw(ddouble a);
534 | ddouble acosw(ddouble a);
535 | ddouble asinw(ddouble a);
536 | ddouble atanhw(ddouble a);
537 | ddouble acoshw(ddouble a);
538 | ddouble asinhw(ddouble a);
539 | ddouble atan2wd(ddouble a, double b);
540 | ddouble atan2dw(double a, ddouble b);
541 | ddouble atan2ww(ddouble a, ddouble b);
542 | ddouble powww(ddouble a, ddouble b);
543 | ddouble powwd(ddouble a, double b);
544 | ddouble powdw(double a, ddouble b);
545 | ddouble modfww(ddouble a, ddouble *b);
546 |
--------------------------------------------------------------------------------
/csrc/_dd_linalg.c:
--------------------------------------------------------------------------------
1 | /* Python extension module for linear algebra functions.
2 | *
3 | * Copyright (C) 2021 Markus Wallerberger and others
4 | * SPDX-License-Identifier: MIT
5 | */
6 | #include "Python.h"
7 | #include "math.h"
8 | #include "stdio.h"
9 |
10 | #include "dd_arith.h"
11 | #include "dd_linalg.h"
12 |
13 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
14 | #include "numpy/ndarraytypes.h"
15 | #include "numpy/ufuncobject.h"
16 | #include "numpy/npy_3kcompat.h"
17 |
18 | /**
19 | * Allows parameter to be marked unused
20 | */
21 | #define MARK_UNUSED(x) do { (void)(x); } while(false)
22 |
23 | /************************ Linear algebra ***************************/
24 |
25 | static void u_matmulw(char **args, const npy_intp *dims, const npy_intp* steps,
26 | void *data)
27 | {
28 | // signature (n;i,j),(n;j,k)->(n;i,k)
29 | const npy_intp nn = dims[0], ii = dims[1], jj = dims[2], kk = dims[3];
30 | const npy_intp _san = steps[0], _sbn = steps[1], _scn = steps[2],
31 | _sai = steps[3], _saj = steps[4], _sbj = steps[5],
32 | _sbk = steps[6], _sci = steps[7], _sck = steps[8];
33 | char *_a = args[0], *_b = args[1], *_c = args[2];
34 |
35 | const npy_intp sai = _sai / sizeof(ddouble), saj = _saj / sizeof(ddouble),
36 | sbj = _sbj / sizeof(ddouble), sbk = _sbk / sizeof(ddouble),
37 | sci = _sci / sizeof(ddouble), sck = _sck / sizeof(ddouble);
38 |
39 | for (npy_intp n = 0; n != nn; ++n, _a += _san, _b += _sbn, _c += _scn) {
40 | const ddouble *a = (const ddouble *)_a, *b = (const ddouble *)_b;
41 | ddouble *c = (ddouble *)_c;
42 |
43 | #pragma omp parallel for collapse(2)
44 | for (npy_intp i = 0; i < ii; ++i) {
45 | for (npy_intp k = 0; k < kk; ++k) {
46 | ddouble val = Q_ZERO, tmp;
47 | for (npy_intp j = 0; j < jj; ++j) {
48 | tmp = mulww(a[i * sai + j * saj], b[j * sbj + k * sbk]);
49 | val = addww(val, tmp);
50 | }
51 | c[i * sci + k * sck] = val;
52 | }
53 | }
54 | }
55 | MARK_UNUSED(data);
56 | }
57 |
58 | /****************************** Helper functions *************************/
59 |
60 | static void ensure_inplace_2(
61 | char *in, char *out, npy_intp n1, npy_intp si1, npy_intp so1,
62 | npy_intp n2, npy_intp si2, npy_intp so2)
63 | {
64 | if (in == out)
65 | return;
66 |
67 | char *in1 = in, *out1 = out;
68 | for (npy_intp i1 = 0; i1 != n1; ++i1, in1 += si1, out1 += so1) {
69 | char *in2 = in1, *out2 = out1;
70 | for (npy_intp i2 = 0; i2 != n2; ++i2, in2 += si2, out2 += so2) {
71 | char *inx = in2, *outx = out2;
72 | *(ddouble *)outx = *(ddouble *)inx;
73 | }
74 | }
75 | }
76 |
77 | static void ensure_inplace_3(
78 | char *in, char *out, npy_intp n1, npy_intp si1, npy_intp so1,
79 | npy_intp n2, npy_intp si2, npy_intp so2, npy_intp n3, npy_intp si3,
80 | npy_intp so3)
81 | {
82 | if (in == out)
83 | return;
84 |
85 | char *in1 = in, *out1 = out;
86 | for (npy_intp i1 = 0; i1 != n1; ++i1, in1 += si1, out1 += so1) {
87 | char *in2 = in1, *out2 = out1;
88 | for (npy_intp i2 = 0; i2 != n2; ++i2, in2 += si2, out2 += so2) {
89 | char *in3 = in2, *out3 = out2;
90 | for (npy_intp i3 = 0; i3 != n3; ++i3, in3 += si3, out3 += so3) {
91 | char *inx = in3, *outx = out3;
92 | *(ddouble *)outx = *(ddouble *)inx;
93 | }
94 | }
95 | }
96 | }
97 |
98 | /*************************** More complicated ***********************/
99 |
100 | static void u_normw(
101 | char **args, const npy_intp *dims, const npy_intp* steps, void *data)
102 | {
103 | // signature (n;i)->(n;)
104 | const npy_intp nn = dims[0], ii = dims[1];
105 | const npy_intp san = steps[0], sbn = steps[1], _sai = steps[2];
106 | char *_a = args[0], *_b = args[1];
107 |
108 | for (npy_intp n = 0; n != nn; ++n, _a += san, _b += sbn) {
109 | *(ddouble *)_b = normw((const ddouble *)_a, ii, _sai / sizeof(ddouble));
110 | }
111 | MARK_UNUSED(data);
112 | }
113 |
114 | static void u_householderw(
115 | char **args, const npy_intp *dims, const npy_intp* steps, void *data)
116 | {
117 | // signature (n;i)->(n;),(n;i)
118 | const npy_intp nn = dims[0], ii = dims[1];
119 | const npy_intp _san = steps[0], _sbn = steps[1], _scn = steps[2],
120 | _sai = steps[3], _sci = steps[4];
121 | char *_a = args[0], *_b = args[1], *_c = args[2];
122 |
123 | for (npy_intp n = 0; n != nn; ++n, _a += _san, _b += _sbn, _c += _scn) {
124 | *(ddouble *)_b = householderw(
125 | (const ddouble *)_a, (ddouble *)_c, ii,
126 | _sai / sizeof(ddouble), _sci / sizeof(ddouble));
127 | }
128 | MARK_UNUSED(data);
129 | }
130 |
131 | static void u_rank1updateq(
132 | char **args, const npy_intp *dims, const npy_intp* steps, void *data)
133 | {
134 | // signature (n;i,j),(n;i),(n;j)->(n;i,j)
135 | const npy_intp nn = dims[0], ii = dims[1], jj = dims[2];
136 | const npy_intp _san = steps[0], _sbn = steps[1], _scn = steps[2],
137 | _sdn = steps[3], _sai = steps[4], _saj = steps[5],
138 | _sbi = steps[6], _scj = steps[7], _sdi = steps[8],
139 | _sdj = steps[9];
140 | char *_a = args[0], *_b = args[1], *_c = args[2], *_d = args[3];
141 |
142 | ensure_inplace_3(_a, _d, nn, _san, _sdn, ii, _sai, _sdi, jj, _saj, _sdj);
143 | for (npy_intp n = 0; n != nn; ++n, _a += _san, _b += _sbn, _c += _scn) {
144 | rank1updateq(
145 | (ddouble *)_d, _sai / sizeof(ddouble), _saj / sizeof(ddouble),
146 | (const ddouble *)_b, _sbi / sizeof(ddouble),
147 | (const ddouble *)_c, _scj / sizeof(ddouble), ii, jj);
148 | }
149 | MARK_UNUSED(data);
150 | }
151 |
152 | static void u_jacobisweepw(
153 | char **args, const npy_intp *dims, const npy_intp* steps, void *data)
154 | {
155 | // signature (n;i,j),(n;i=j,j)->(n;i,j),(n;i=j,j);(n,)
156 | const npy_intp nn = dims[0], ii = dims[1], jj = dims[2];
157 | const npy_intp _san = steps[0], _sbn = steps[1], _scn = steps[2],
158 | _sdn = steps[3], _sen = steps[4], _sai = steps[5],
159 | _saj = steps[6], _sbi = steps[7], _sbj = steps[8],
160 | _sci = steps[9], _scj = steps[10], _sdi = steps[11],
161 | _sdj = steps[12];
162 | char *_a = args[0], *_b = args[1], *_c = args[2], *_d = args[3],
163 | *_e = args[4];
164 |
165 | ensure_inplace_3(_a, _c, nn, _san, _scn, ii, _sai, _sci, jj, _saj, _scj);
166 | ensure_inplace_3(_b, _d, nn, _sbn, _sdn, jj, _sbi, _sdi, jj, _sbj, _sdj);
167 | for (npy_intp n = 0; n != nn; ++n, _c += _scn, _d += _sdn, _e += _sen) {
168 | ddouble *c = (ddouble *)_c, *d = (ddouble *)_d, *e = (ddouble *)_e;
169 | const npy_intp
170 | sci = _sci / sizeof(ddouble), scj = _scj / sizeof(ddouble),
171 | sdi = _sdi / sizeof(ddouble), sdj = _sdj / sizeof(ddouble);
172 |
173 | *e = jacobi_sweep(c, sci, scj, d, sdi, sdj, ii, jj);
174 | }
175 | MARK_UNUSED(data);
176 | }
177 |
178 | static void u_givensw(
179 | char **args, const npy_intp *dims, const npy_intp* steps, void *data)
180 | {
181 | // signature (n;2)->(n;2),(n;2,2)
182 | const npy_intp nn = dims[0];
183 | const npy_intp san = steps[0], sbn = steps[1], scn = steps[2],
184 | sai = steps[3], sbi = steps[4], sci = steps[5],
185 | scj = steps[6];
186 | char *_a = args[0], *_b = args[1], *_c = args[2];
187 |
188 | for (npy_intp n = 0; n != nn; ++n, _a += san, _b += sbn, _c += scn) {
189 | ddouble f = *(ddouble *) _a;
190 | ddouble g = *(ddouble *) (_a + sai);
191 |
192 | ddouble c, s, r;
193 | givensw(f, g, &c, &s, &r);
194 |
195 | *(ddouble *)_b = r;
196 | *(ddouble *)(_b + sbi) = Q_ZERO;
197 | *(ddouble *)_c = c;
198 | *(ddouble *)(_c + scj) = s;
199 | *(ddouble *)(_c + sci) = negw(s);
200 | *(ddouble *)(_c + sci + scj) = c;
201 | }
202 | MARK_UNUSED(data);
203 | }
204 |
205 | static void u_givens_seqq(
206 | char **args, const npy_intp *dims, const npy_intp* steps, void *data)
207 | {
208 | // signature (n;i,2),(n;i,j)->(n;i,j)
209 | const npy_intp nn = dims[0], ii = dims[1], jj = dims[3];
210 | const npy_intp _san = steps[0], _sbn = steps[1], _scn = steps[2],
211 | _sai = steps[3], _saq = steps[4], _sbi = steps[5],
212 | _sbj = steps[6], _sci = steps[7], _scj = steps[8];
213 | char *_a = args[0], *_b = args[1], *_c = args[2];
214 |
215 | ensure_inplace_3(_b, _c, nn, _sbn, _scn, ii, _sbi, _sci, jj, _sbj, _scj);
216 | for (npy_intp n = 0; n != nn; ++n, _a += _san, _c += _scn) {
217 | /* The rotation are interdependent, so we splice the array in
218 | * the other direction.
219 | */
220 | #pragma omp parallel for
221 | for (npy_intp j = 0; j < jj; ++j) {
222 | for (npy_intp i = 0; i < ii - 1; ++i) {
223 | ddouble *c_x = (ddouble *)(_c + i *_sci + j * _scj);
224 | ddouble *c_y = (ddouble *)(_c + (i + 1) *_sci + j * _scj);
225 | ddouble g_cos = *(ddouble *)(_a + i * _sai);
226 | ddouble g_sin = *(ddouble *)(_a + i * _sai + _saq);
227 | lmul_givensq(c_x, c_y, g_cos, g_sin, *c_x, *c_y);
228 | }
229 | }
230 | }
231 | MARK_UNUSED(data);
232 | }
233 |
234 | static void u_golub_kahan_chaseq(
235 | char **args, const npy_intp *dims, const npy_intp* steps, void *data)
236 | {
237 | // signature (n;i),(n;i)->(n;i),(n;i),(n;i,4)
238 | const npy_intp nn = dims[0], ii = dims[1];
239 | const npy_intp _san = steps[0], _sbn = steps[1], _scn = steps[2],
240 | _sdn = steps[3], _sen = steps[4], _sai = steps[5],
241 | _sbi = steps[6], _sci = steps[7], _sdi = steps[8],
242 | _sei = steps[9], _se4 = steps[10];
243 | char *_a = args[0], *_b = args[1], *_c = args[2], *_d = args[3],
244 | *_e = args[4];
245 |
246 | ensure_inplace_2(_a, _c, nn, _san, _scn, ii, _sai, _sci);
247 | ensure_inplace_2(_b, _d, nn, _sbn, _sdn, ii, _sbi, _sdi);
248 | if (_se4 != sizeof(ddouble) || _sei != 4 * sizeof(ddouble)) {
249 | fprintf(stderr, "rot is not contiguous, but needs to be");
250 | return;
251 | }
252 |
253 | for (npy_intp n = 0; n != nn; ++n, _c += _scn, _d += _sdn, _e += _sen) {
254 | golub_kahan_chaseq((ddouble *)_c, _sci / sizeof(ddouble),
255 | (ddouble *)_d, _sdi / sizeof(ddouble),
256 | ii, (ddouble *)_e);
257 | }
258 | MARK_UNUSED(data);
259 | }
260 |
261 | static void u_svd_2x2(
262 | char **args, const npy_intp *dims, const npy_intp* steps, void *data)
263 | {
264 | // signature (n;2,2)->(n;2,2),(n;2),(n;2,2)
265 | const npy_intp nn = dims[0];
266 | const npy_intp san = steps[0], sbn = steps[1], scn = steps[2],
267 | sdn = steps[3], sai = steps[4], saj = steps[5],
268 | sbi = steps[6], sbj = steps[7], sci = steps[8],
269 | sdi = steps[9], sdj = steps[10];
270 | char *_a = args[0], *_b = args[1], *_c = args[2], *_d = args[3];
271 |
272 | for (npy_intp n = 0; n != nn;
273 | ++n, _a += san, _b += sbn, _c += scn, _d += sdn) {
274 | ddouble a11 = *(ddouble *) _a;
275 | ddouble a12 = *(ddouble *) (_a + saj);
276 | ddouble a21 = *(ddouble *) (_a + sai);
277 | ddouble a22 = *(ddouble *) (_a + sai + saj);
278 |
279 | ddouble smin, smax, cu, su, cv, sv;
280 | svd_2x2(a11, a12, a21, a22, &smin, &smax, &cv, &sv, &cu, &su);
281 |
282 | *(ddouble *)_b = cu;
283 | *(ddouble *)(_b + sbj) = negw(su);
284 | *(ddouble *)(_b + sbi) = su;
285 | *(ddouble *)(_b + sbi + sbj) = cu;
286 |
287 | *(ddouble *)_c = smax;
288 | *(ddouble *)(_c + sci) = smin;
289 |
290 | *(ddouble *)_d = cv;
291 | *(ddouble *)(_d + sdj) = sv;
292 | *(ddouble *)(_d + sdi) = negw(sv);
293 | *(ddouble *)(_d + sdi + sdj) = cv;
294 | }
295 | MARK_UNUSED(data);
296 | }
297 |
298 | static void u_svvals_2x2(
299 | char **args, const npy_intp *dims, const npy_intp* steps, void *data)
300 | {
301 | // signature (n;2,2)->(n;2)
302 | const npy_intp nn = dims[0];
303 | const npy_intp san = steps[0], sbn = steps[1], sai = steps[2],
304 | saj = steps[3], sbi = steps[4];
305 | char *_a = args[0], *_b = args[1];
306 |
307 | for (npy_intp n = 0; n != nn; ++n, _a += san, _b += sbn) {
308 | ddouble a11 = *(ddouble *) _a;
309 | ddouble a12 = *(ddouble *) (_a + saj);
310 | ddouble a21 = *(ddouble *) (_a + sai);
311 | ddouble a22 = *(ddouble *) (_a + sai + saj);
312 |
313 | ddouble smin, smax;
314 | svd_2x2(a11, a12, a21, a22, &smin, &smax, NULL, NULL, NULL, NULL);
315 |
316 | *(ddouble *)_b = smax;
317 | *(ddouble *)(_b + sbi) = smin;
318 | }
319 | MARK_UNUSED(data);
320 | }
321 |
322 | /* ----------------------- Python stuff -------------------------- */
323 |
324 | static PyObject *module;
325 | static PyObject *numpy_module = NULL;
326 | static int type_num;
327 |
328 | static PyObject *make_module()
329 | {
330 | static PyMethodDef no_methods[] = {
331 | {NULL, NULL, 0, NULL} // No methods defined
332 | };
333 | static struct PyModuleDef module_def = {
334 | PyModuleDef_HEAD_INIT,
335 | "_dd_linalg",
336 | NULL,
337 | -1,
338 | no_methods,
339 | NULL,
340 | NULL,
341 | NULL,
342 | NULL
343 | };
344 | module = PyModule_Create(&module_def);
345 | return module;
346 | }
347 |
348 | static int import_ddouble_dtype()
349 | {
350 | PyObject *dd_module = PyImport_ImportModule("xprec._dd_ufunc");
351 | if (dd_module == NULL)
352 | return -1;
353 |
354 | PyArray_Descr *dtype =
355 | (PyArray_Descr *)PyObject_GetAttrString(dd_module, "dtype");
356 | if (dtype == NULL)
357 | return -1;
358 |
359 | /* Let's pray at least this stays public */
360 | type_num = dtype->type_num;
361 | return 0;
362 | }
363 |
364 | static int gufunc(
365 | PyUFuncGenericFunction uloop, int nin, int nout,
366 | const char *signature, const char *name, const char *docstring,
367 | bool in_numpy)
368 | {
369 | PyUFuncObject *ufunc = NULL;
370 | int *arg_types = NULL, retcode = 0;
371 |
372 | if (in_numpy) {
373 | ufunc = (PyUFuncObject *)PyObject_GetAttrString(numpy_module, name);
374 | } else {
375 | ufunc = (PyUFuncObject *)PyUFunc_FromFuncAndDataAndSignature(
376 | NULL, NULL, NULL, 0, nin, nout, PyUFunc_None, name,
377 | docstring, 0, signature);
378 | }
379 | if (ufunc == NULL) goto error;
380 |
381 | int *dtypes = PyMem_New(int, nin + nout);
382 | if (dtypes == NULL) goto error;
383 |
384 | for (int i = 0; i != nin + nout; ++i)
385 | dtypes[i] = type_num;
386 |
387 | retcode = PyUFunc_RegisterLoopForType(ufunc, type_num,
388 | uloop, arg_types, NULL);
389 | if (retcode < 0) goto error;
390 |
391 | return PyModule_AddObject(module, name, (PyObject *)ufunc);
392 |
393 | error:
394 | if (!in_numpy)
395 | Py_XDECREF(ufunc);
396 | PyMem_Free(arg_types);
397 | return -1;
398 | }
399 |
400 | PyMODINIT_FUNC PyInit__dd_linalg(void)
401 | {
402 | if (!make_module())
403 | return NULL;
404 |
405 | /* Initialize numpy things */
406 | import_array();
407 | import_umath();
408 |
409 | numpy_module = PyImport_ImportModule("numpy");
410 | if (numpy_module == NULL)
411 | return NULL;
412 |
413 | if (import_ddouble_dtype() < 0)
414 | return NULL;
415 |
416 | gufunc(u_normw, 1, 1, "(i)->()",
417 | "norm", "Vector 2-norm", false);
418 | gufunc(u_matmulw, 2, 1, "(i?,j),(j,k?)->(i?,k?)",
419 | "matmul", "Matrix multiplication", true);
420 | gufunc(u_givensw, 1, 2, "(2)->(2),(2,2)",
421 | "givens", "Generate Givens rotation", false);
422 | gufunc(u_givens_seqq, 2, 1, "(i,2),(i,j?)->(i,j?)",
423 | "givens_seq", "apply sequence of givens rotation to matrix", false);
424 | gufunc(u_householderw, 1, 2, "(i)->(),(i)",
425 | "householder", "Generate Householder reflectors", false);
426 | gufunc(u_rank1updateq, 3, 1, "(i,j),(i),(j)->(i,j)",
427 | "rank1update", "Perform rank-1 update of matrix", false);
428 | gufunc(u_svd_2x2, 1, 3, "(2,2)->(2,2),(2),(2,2)",
429 | "svd2x2", "SVD of upper triangular 2x2 problem", false);
430 | gufunc(u_svvals_2x2, 1, 1, "(2,2)->(2)",
431 | "svvals2x2", "singular values of upper triangular 2x2 problem", false);
432 | gufunc(u_jacobisweepw, 2, 3, "(i,j),(j,j)->(i,j),(j,j),()",
433 | "jacobi_sweep", "Perform sweep of one-sided Jacobi rotations", false);
434 | gufunc(u_golub_kahan_chaseq, 2, 3, "(i),(i)->(i),(i),(i,4)",
435 | "golub_kahan_chase", "bidiagonal chase procedure", false);
436 |
437 | /* Make dtype */
438 | PyArray_Descr *dtype = PyArray_DescrFromType(NPY_CDOUBLE);
439 | PyModule_AddObject(module, "dtype", (PyObject *)dtype);
440 |
441 | /* Module is ready */
442 | return module;
443 | }
444 |
--------------------------------------------------------------------------------
/csrc/dd_arith.c:
--------------------------------------------------------------------------------
1 | /* Double-double arithmetic library
2 | *
3 | * Part of the functions are copied from the QD library for U.C. Berkeley
4 | * and licensed modified BSD (see QD-LICENSE.txt)
5 | *
6 | * Copyright (C) 2012 Yozo Hida, Xiaoye S. Li, David H. Bailey
7 | * Copyright (C) 2021 Markus Wallerberger and others
8 | * SPDX-License-Identifier: MIT and Modified-BSD
9 | */
10 | #include "./dd_arith.h"
11 | #include
12 |
13 | // 2**500 and 2**(-500);
14 | static const double LARGE = 3.273390607896142e+150;
15 | static const double INV_LARGE = 3.054936363499605e-151;
16 |
17 | static ddouble hypotqq_compute(ddouble x, ddouble y)
18 | {
19 | return sqrtw(addww(sqrw(x), sqrw(y)));
20 | }
21 |
22 | ddouble _hypotqq_ordered(ddouble x, ddouble y)
23 | {
24 | // assume that x >= y >= 0
25 | // special cases
26 | if (iszerow(y))
27 | return x;
28 |
29 | // if very large or very small, renormalize
30 | if (x.hi > LARGE) {
31 | x = mul_pwr2(x, INV_LARGE);
32 | y = mul_pwr2(y, INV_LARGE);
33 | return mul_pwr2(hypotqq_compute(x, y), LARGE);
34 | }
35 | if (x.hi < INV_LARGE) {
36 | x = mul_pwr2(x, LARGE);
37 | y = mul_pwr2(y, LARGE);
38 | return mul_pwr2(hypotqq_compute(x, y), INV_LARGE);
39 | }
40 |
41 | // normal case
42 | return hypotqq_compute(x, y);
43 | }
44 |
45 | ddouble sqrtw(ddouble a)
46 | {
47 | /* Given approximation x to 1/sqrt(a), perform a single Newton step:
48 | *
49 | * sqrt(a) = a*x + [a - (a*x)^2] * x / 2 (approx)
50 | *
51 | * The approximation is accurate to twice the accuracy of x.
52 | * Also, the multiplication (a*x) and [-]*x can be done with
53 | * only half the precision.
54 | * From: Karp, High Precision Division and Square Root, 1993
55 | */
56 | if (a.hi <= 0)
57 | return (ddouble){sqrt(a.hi), 0};
58 |
59 | double x = 1.0 / sqrt(a.hi);
60 | double ax = a.hi * x;
61 | ddouble ax_sqr = sqrw((ddouble){ax, 0});
62 | double diff = subww(a, ax_sqr).hi * x * 0.5;
63 | return two_sum(ax, diff);
64 | }
65 |
66 | /* Inverse Factorials from 1/0!, 1/1!, 1/2!, asf. */
67 | static int _n_inv_fact = 18;
68 | static const ddouble _inv_fact[] = {
69 | {1.00000000000000000e+00, 0.00000000000000000e+00},
70 | {1.00000000000000000e+00, 0.00000000000000000e+00},
71 | {5.00000000000000000e-01, 0.00000000000000000e+00},
72 | {1.66666666666666657e-01, 9.25185853854297066e-18},
73 | {4.16666666666666644e-02, 2.31296463463574266e-18},
74 | {8.33333333333333322e-03, 1.15648231731787138e-19},
75 | {1.38888888888888894e-03, -5.30054395437357706e-20},
76 | {1.98412698412698413e-04, 1.72095582934207053e-22},
77 | {2.48015873015873016e-05, 2.15119478667758816e-23},
78 | {2.75573192239858925e-06, -1.85839327404647208e-22},
79 | {2.75573192239858883e-07, 2.37677146222502973e-23},
80 | {2.50521083854417202e-08, -1.44881407093591197e-24},
81 | {2.08767569878681002e-09, -1.20734505911325997e-25},
82 | {1.60590438368216133e-10, 1.25852945887520981e-26},
83 | {1.14707455977297245e-11, 2.06555127528307454e-28},
84 | {7.64716373181981641e-13, 7.03872877733453001e-30},
85 | {4.77947733238738525e-14, 4.39920548583408126e-31},
86 | {2.81145725434552060e-15, 1.65088427308614326e-31}
87 | };
88 |
89 | /**
90 | * For the exponential of `a`, return compute tuple `x, m` such that:
91 | *
92 | * exp(a) = ldexp(1 + x, m),
93 | *
94 | * where `m` is chosen such that `abs(x) < 1`. The value `x` is returned,
95 | * whereas the value `m` is given as an out parameter.
96 | */
97 | static ddouble _exp_reduced(ddouble a, int *m)
98 | {
99 | // Strategy: We first reduce the size of x by noting that
100 | //
101 | // exp(k * r + m * log(2)) = 2^m * exp(r)^k
102 | //
103 | // where m and k are integers. By choosing m appropriately
104 | // we can make |k * r| <= log(2) / 2 = 0.347.
105 | const double k = 512.0;
106 | const double inv_k = 1.0 / k;
107 | double mm = floor(a.hi / Q_LOG2.hi + 0.5);
108 | ddouble r = mul_pwr2(subww(a, mulwd(Q_LOG2, mm)), inv_k);
109 | *m = (int)mm;
110 |
111 | // Now, evaluate exp(r) using the Taylor series, since reducing
112 | // the argument substantially speeds up the convergence. We omit order 0
113 | // and start at order 1:
114 | ddouble rpower = r;
115 | ddouble term = r;
116 | ddouble sum = term;
117 |
118 | // Order 2
119 | rpower = sqrw(r);
120 | term = mul_pwr2(rpower, 0.5);
121 | sum = addww(sum, term);
122 |
123 | // Order 3 and up
124 | for (int i = 3; i < 9; i++) {
125 | rpower = mulww(rpower, r);
126 | term = mulww(rpower, _inv_fact[i]);
127 | sum = addww(sum, term);
128 | if (fabs(term.hi) <= inv_k * Q_EPS.hi)
129 | break;
130 | }
131 |
132 | // We now have that approximately exp(r) == 1 + sum. Raise that to
133 | // the m'th (512) power by squaring the binomial nine times
134 | sum = addww(mul_pwr2(sum, 2.0), sqrw(sum));
135 | sum = addww(mul_pwr2(sum, 2.0), sqrw(sum));
136 | sum = addww(mul_pwr2(sum, 2.0), sqrw(sum));
137 | sum = addww(mul_pwr2(sum, 2.0), sqrw(sum));
138 | sum = addww(mul_pwr2(sum, 2.0), sqrw(sum));
139 | sum = addww(mul_pwr2(sum, 2.0), sqrw(sum));
140 | sum = addww(mul_pwr2(sum, 2.0), sqrw(sum));
141 | sum = addww(mul_pwr2(sum, 2.0), sqrw(sum));
142 | sum = addww(mul_pwr2(sum, 2.0), sqrw(sum));
143 | return sum;
144 | }
145 |
146 | ddouble expw(ddouble a)
147 | {
148 | if (a.hi <= -709.0)
149 | return Q_ZERO;
150 | if (a.hi >= 709.0)
151 | return infw();
152 | if (iszerow(a))
153 | return Q_ONE;
154 | if (isonew(a))
155 | return Q_E;
156 |
157 | int m;
158 | ddouble sum = _exp_reduced(a, &m);
159 |
160 | /** Add back the one and multiply by 2 to the m */
161 | sum = addwd(sum, 1.0);
162 | return ldexpw(sum, (int)m);
163 | }
164 |
165 | ddouble expm1w(ddouble a)
166 | {
167 | if (a.hi <= -709.0)
168 | return (ddouble){-1.0, 0.0};
169 | if (a.hi >= 709.0)
170 | return infw();
171 | if (iszerow(a))
172 | return Q_ZERO;
173 |
174 | int m;
175 | ddouble sum = _exp_reduced(a, &m);
176 |
177 | /* Truncation case: simply return sum */
178 | if (m == 0)
179 | return sum;
180 |
181 | /* Non-truncation case: compute full exp, then remove the one */
182 | sum = addwd(sum, 1.0);
183 | sum = ldexpw(sum, (int)m);
184 | return subwd(sum, 1.0);
185 | }
186 |
187 | ddouble ldexpwi(ddouble a, int exp)
188 | {
189 | return ldexpw(a, exp);
190 | }
191 |
192 | ddouble logw(ddouble a)
193 | {
194 | /* Strategy. The Taylor series for log converges much more
195 | * slowly than that of exp, due to the lack of the factorial
196 | * term in the denominator. Hence this routine instead tries
197 | * to determine the root of the function
198 | *
199 | * f(x) = exp(x) - a
200 | *
201 | * using Newton iteration. The iteration is given by
202 | *
203 | * x' = x - f(x)/f'(x)
204 | * = x - (1 - a * exp(-x))
205 | * = x + a * exp(-x) - 1.
206 | *
207 | * Only one iteration is needed, since Newton's iteration
208 | * approximately doubles the number of digits per iteration.
209 | */
210 | if (isonew(a))
211 | return Q_ZERO;
212 | if (iszerow(a))
213 | return negw(infw());
214 | if (!ispositivew(a))
215 | return nanw();
216 |
217 | ddouble x = {log(a.hi), 0.0}; /* Initial approximation */
218 | x = subwd(addww(x, mulww(a, expw(negw(x)))), 1.0);
219 | return x;
220 | }
221 |
222 | /* Table of sin(k * pi/16) and cos(k * pi/16). */
223 | static const ddouble _sin_table[] = {
224 | {1.950903220161282758e-01, -7.991079068461731263e-18},
225 | {3.826834323650897818e-01, -1.005077269646158761e-17},
226 | {5.555702330196021776e-01, 4.709410940561676821e-17},
227 | {7.071067811865475727e-01, -4.833646656726456726e-17}
228 | };
229 |
230 | static const ddouble _cos_table[] = {
231 | {9.807852804032304306e-01, 1.854693999782500573e-17},
232 | {9.238795325112867385e-01, 1.764504708433667706e-17},
233 | {8.314696123025452357e-01, 1.407385698472802389e-18},
234 | {7.071067811865475727e-01, -4.833646656726456726e-17}
235 | };
236 |
237 | static ddouble sin_taylor(ddouble a)
238 | {
239 | // Use the Taylor series a - a^3/3! + a^5/5! + ...
240 | const double thresh = 0.5 * fabs(a.hi) * Q_EPS.hi;
241 | const ddouble minus_asquared = negw(sqrw(a));
242 |
243 | // First order:
244 | ddouble apow = a;
245 | ddouble term = a;
246 | ddouble sum = a;
247 |
248 | // Subsequent orders:
249 | for (int i = 3; i < _n_inv_fact; i += 2) {
250 | apow = mulww(apow, minus_asquared);
251 | term = mulww(apow, _inv_fact[i]);
252 | sum = addww(sum, term);
253 | if (fabs(term.hi) <= thresh)
254 | break;
255 | }
256 | return sum;
257 | }
258 |
259 | static ddouble cos_taylor(ddouble a)
260 | {
261 | // Use Taylor series 1 - x^2/2! + x^4/4! + ...
262 | const double thresh = 0.5 * Q_EPS.hi;
263 | const ddouble minus_asquared = negw(sqrw(a));
264 |
265 | // Zeroth and second order:
266 | ddouble apow = minus_asquared;
267 | ddouble term = mul_pwr2(apow, 0.5);
268 | ddouble sum = adddw(1.0, term);
269 |
270 | // From fourth order:
271 | for (int i = 4; i < _n_inv_fact; i += 2) {
272 | apow = mulww(apow, minus_asquared);
273 | term = mulww(apow, _inv_fact[i]);
274 | sum = addww(sum, term);
275 | if (fabs(term.hi) <= thresh)
276 | break;
277 | }
278 | return sum;
279 | }
280 |
281 | static void sincos_taylor(ddouble a, ddouble *sin_a, ddouble *cos_a)
282 | {
283 | if (iszerow(a)) {
284 | *sin_a = Q_ZERO;
285 | *cos_a = Q_ONE;
286 | } else {
287 | *sin_a = sin_taylor(a);
288 | *cos_a = sqrtw(subdw(1.0, sqrw(*sin_a)));
289 | }
290 | }
291 |
292 | /**
293 | * To compute 2pi-periodic function, we reduce the argument `a` by
294 | * choosing integers z, -2 <= j <= 2 and -4 <= k <= 4 such that:
295 | *
296 | * a == z * (2*pi) + j * (pi/2) + k * (pi/16) + t,
297 | *
298 | * where `abs(t) <= pi/32`.
299 | */
300 | static ddouble mod_pi16(ddouble a, int *j, int *k)
301 | {
302 | static const ddouble pi_16 =
303 | {1.963495408493620697e-01, 7.654042494670957545e-18};
304 |
305 | // approximately reduce modulo 2*pi
306 | ddouble z = roundw(divww(a, Q_2PI));
307 | ddouble r = subww(a, mulww(Q_2PI, z));
308 |
309 | // approximately reduce modulo pi/2
310 | double q = floor(r.hi / Q_PI_2.hi + 0.5);
311 | ddouble t = subww(r, mulwd(Q_PI_2, q));
312 | *j = (int)q;
313 |
314 | // approximately reduce modulo pi/16.
315 | q = floor(t.hi / pi_16.hi + 0.5);
316 | t = subww(t, mulwd(pi_16, q));
317 | *k = (int)q;
318 | return t;
319 | }
320 |
321 | ddouble sinw(ddouble a)
322 | {
323 | /* Strategy. To compute sin(x), we choose integers a, b so that
324 | *
325 | * x = s + a * (pi/2) + b * (pi/16)
326 | *
327 | * and |s| <= pi/32. Using the fact that
328 | *
329 | * sin(pi/16) = 0.5 * sqrt(2 - sqrt(2 + sqrt(2)))
330 | *
331 | * we can compute sin(x) from sin(s), cos(s). This greatly
332 | * increases the convergence of the sine Taylor series.
333 | */
334 | if (iszerow(a))
335 | return Q_ZERO;
336 |
337 | int j, k;
338 | ddouble t = mod_pi16(a, &j, &k);
339 | int abs_k = abs(k);
340 |
341 | if (j < -2 || j > 2)
342 | return nanw();
343 |
344 | if (abs_k > 4)
345 | return nanw();
346 |
347 | if (k == 0) {
348 | switch (j)
349 | {
350 | case 0:
351 | return sin_taylor(t);
352 | case 1:
353 | return cos_taylor(t);
354 | case -1:
355 | return negw(cos_taylor(t));
356 | default:
357 | return negw(sin_taylor(t));
358 | }
359 | }
360 |
361 | ddouble u = _cos_table[abs_k - 1];
362 | ddouble v = _sin_table[abs_k - 1];
363 | ddouble sin_x, cos_x, r;
364 | sincos_taylor(t, &sin_x, &cos_x);
365 | if (j == 0) {
366 | if (k > 0)
367 | r = addww(mulww(u, sin_x), mulww(v, cos_x));
368 | else
369 | r = subww(mulww(u, sin_x), mulww(v, cos_x));
370 | } else if (j == 1) {
371 | if (k > 0)
372 | r = subww(mulww(u, cos_x), mulww(v, sin_x));
373 | else
374 | r = addww(mulww(u, cos_x), mulww(v, sin_x));
375 | } else if (j == -1) {
376 | if (k > 0)
377 | r = subww(mulww(v, sin_x), mulww(u, cos_x));
378 | else if (k < 0) /* NOTE! */
379 | r = subww(mulww(negw(u), cos_x), mulww(v, sin_x));
380 | } else {
381 | if (k > 0)
382 | r = subww(mulww(negw(u), sin_x), mulww(v, cos_x));
383 | else
384 | r = subww(mulww(v, cos_x), mulww(u, sin_x));
385 | }
386 | return r;
387 | }
388 |
389 | ddouble cosw(ddouble a)
390 | {
391 | if (iszerow(a))
392 | return Q_ONE;
393 |
394 | int j, k;
395 | ddouble t = mod_pi16(a, &j, &k);
396 | int abs_k = abs(k);
397 |
398 | if (j < -2 || j > 2)
399 | return nanw();
400 |
401 | if (abs_k > 4)
402 | return nanw();
403 |
404 | if (k == 0) {
405 | switch (j) {
406 | case 0:
407 | return cos_taylor(t);
408 | case 1:
409 | return negw(sin_taylor(t));
410 | case -1:
411 | return sin_taylor(t);
412 | default:
413 | return negw(cos_taylor(t));
414 | }
415 | }
416 |
417 | ddouble sin_x, cos_x, r;
418 | sincos_taylor(t, &sin_x, &cos_x);
419 | ddouble u = _cos_table[abs_k - 1];
420 | ddouble v = _sin_table[abs_k - 1];
421 |
422 | if (j == 0) {
423 | if (k > 0)
424 | r = subww(mulww(u, cos_x), mulww(v, sin_x));
425 | else
426 | r = addww(mulww(u, cos_x), mulww(v, sin_x));
427 | } else if (j == 1) {
428 | if (k > 0)
429 | r = subww(mulww(negw(u), sin_x), mulww(v, cos_x));
430 | else
431 | r = subww(mulww(v, cos_x), mulww(u, sin_x));
432 | } else if (j == -1) {
433 | if (k > 0)
434 | r = addww(mulww(u, sin_x), mulww(v, cos_x));
435 | else
436 | r = subww(mulww(u, sin_x), mulww(v, cos_x));
437 | } else {
438 | if (k > 0)
439 | r = subww(mulww(v, sin_x), mulww(u, cos_x));
440 | else
441 | r = subww(mulww(negw(u), cos_x), mulww(v, sin_x));
442 | }
443 | return r;
444 | }
445 |
446 | ddouble sinhw(ddouble a)
447 | {
448 | if (iszerow(a))
449 | return Q_ZERO;
450 |
451 | if (absw(a).hi > 0.05) {
452 | ddouble ea = expw(a);
453 | if (isinfw(ea))
454 | return ea;
455 | if (iszerow(ea))
456 | return negw(infw());
457 | return mul_pwr2(subww(ea, reciprocalw(ea)), 0.5);
458 | }
459 |
460 | // When a is small, using the above formula gives a lot of cancellation.
461 | // Use Taylor series: x + x^3/3! + x^5/5! + ...
462 | const ddouble asquared = sqrw(a);
463 | const double thresh = fabs(a.hi) * Q_EPS.hi;
464 |
465 | // First order:
466 | ddouble apower = a;
467 | ddouble sum = a;
468 | ddouble term = a;
469 |
470 | // From third order:
471 | for (int i = 3; i < _n_inv_fact; i += 2) {
472 | apower = mulww(apower, asquared);
473 | term = mulww(apower, _inv_fact[i]);
474 | sum = addww(sum, term);
475 | if (fabs(term.hi) <= thresh)
476 | break;
477 | }
478 | return sum;
479 | }
480 |
481 | ddouble coshw(ddouble a)
482 | {
483 | if (iszerow(a))
484 | return Q_ONE;
485 |
486 | ddouble ea = expw(a);
487 | if (isinfw(ea) || iszerow(ea))
488 | return infw();
489 | return mul_pwr2(addww(ea, reciprocalw(ea)), 0.5);
490 | }
491 |
492 | ddouble tanhw(ddouble a)
493 | {
494 | if (iszerow(a))
495 | return Q_ZERO;
496 |
497 | if (fabs(a.hi) > 0.05) {
498 | ddouble ea = expw(a);
499 | ddouble inv_ea = reciprocalw(ea);
500 | return divww(subww(ea, inv_ea), addww(ea, inv_ea));
501 | }
502 |
503 | ddouble s, c;
504 | s = sinhw(a);
505 | c = sqrtw(adddw(1.0, sqrw(s)));
506 | return divww(s, c);
507 | }
508 |
509 | ddouble tanw(ddouble a)
510 | {
511 | if (iszerow(a))
512 | return Q_ZERO;
513 |
514 | ddouble s, c;
515 | s = sinw(a);
516 | c = cosw(a);
517 | return divww(s, c);
518 | }
519 |
520 | void sincosw(const ddouble a, ddouble *sin_a, ddouble *cos_a)
521 | {
522 | if (iszerow(a)) {
523 | *sin_a = Q_ZERO;
524 | *cos_a = Q_ONE;
525 | return;
526 | }
527 |
528 | int j, k;
529 | ddouble t = mod_pi16(a, &j, &k);
530 | int abs_j = abs(j), abs_k = abs(k);
531 |
532 | if (abs_j > 2 || abs_k > 4) {
533 | *cos_a = *sin_a = nanw();
534 | return;
535 | }
536 |
537 | ddouble sin_t, cos_t;
538 | ddouble s, c;
539 |
540 | sincos_taylor(t, &sin_t, &cos_t);
541 |
542 | if (abs_k == 0) {
543 | s = sin_t;
544 | c = cos_t;
545 | } else {
546 | ddouble u = _cos_table[abs_k - 1];
547 | ddouble v = _sin_table[abs_k - 1];
548 |
549 | if (k > 0) {
550 | s = addww(mulww(u, sin_t), mulww(v, cos_t));
551 | c = subww(mulww(u, cos_t), mulww(v, sin_t));
552 | } else {
553 | s = subww(mulww(u, sin_t), mulww(v, cos_t));
554 | c = addww(mulww(u, cos_t), mulww(v, sin_t));
555 | }
556 | }
557 | if (abs_j == 0) {
558 | *sin_a = s;
559 | *cos_a = c;
560 | } else if (j == 1) {
561 | *sin_a = c;
562 | *cos_a = negw(s);
563 | } else if (j == -1) {
564 | *sin_a = negw(c);
565 | *cos_a = s;
566 | } else {
567 | *sin_a = negw(s);
568 | *cos_a = negw(c);
569 | }
570 |
571 | }
572 |
573 | ddouble atan2ww(ddouble y, ddouble x)
574 | {
575 | /* Strategy: Instead of using Taylor series to compute
576 | * arctan, we instead use Newton's iteration to solve
577 | * the equation
578 | *
579 | * sin(z) = y/r or cos(z) = x/r
580 | *
581 | * where r = sqrt(x^2 + y^2).
582 | * The iteration is given by
583 | *
584 | * z' = z + (y - sin(z)) / cos(z) (for equation 1)
585 | * z' = z - (x - cos(z)) / sin(z) (for equation 2)
586 | *
587 | * Here, x and y are normalized so that x^2 + y^2 = 1.
588 | * If |x| > |y|, then first iteration is used since the
589 | * denominator is larger. Otherwise, the second is used.
590 | */
591 | if (iszerow(x) && iszerow(y))
592 | return Q_ZERO;
593 | if (iszerow(x))
594 | return (ispositivew(y)) ? Q_PI_2 : negw(Q_PI_2);
595 | if (iszerow(y))
596 | return (ispositivew(x)) ? Q_ZERO : Q_PI;
597 | if (equalww(x, y))
598 | return (ispositivew(y)) ? Q_PI_4: negw(Q_3PI_4);
599 | if (equalww(x, negw(y)))
600 | return (ispositivew(y)) ? Q_3PI_4 : negw(Q_PI_4);
601 |
602 | ddouble r = hypotww(x, y);
603 | x = divww(x, r);
604 | y = divww(y, r);
605 |
606 | /* Compute double precision approximation to atan. */
607 | ddouble z = (ddouble){atan2(y.hi, x.hi), 0.};
608 | ddouble sin_z, cos_z;
609 |
610 | sincosw(z, &sin_z, &cos_z);
611 | if (fabs(x.hi) > fabs(y.hi)) {
612 | /* Use Newton iteration 1. z' = z + (y - sin(z)) / cos(z) */
613 | z = addww(z, divww(subww(y, sin_z), cos_z));
614 | } else {
615 | /* Use Newton iteration 2. z' = z - (x - cos(z)) / sin(z) */
616 | z = subww(z, divww(subww(x, cos_z), sin_z));
617 | }
618 | return z;
619 | }
620 |
621 | ddouble atan2dw(const double a, const ddouble b)
622 | {
623 | return atan2ww((ddouble){a, 0.}, b);
624 | }
625 |
626 | ddouble atan2wd(const ddouble a, const double b)
627 | {
628 | return atan2ww(a, (ddouble){b, 0.});
629 | }
630 |
631 | ddouble atanw(const ddouble a)
632 | {
633 | return atan2ww(a, Q_ONE);
634 | }
635 |
636 | ddouble acosw(const ddouble a)
637 | {
638 | ddouble abs_a = absw(a);
639 | if (greaterww(abs_a, Q_ONE))
640 | return nanw();
641 | if (isonew(abs_a))
642 | return (ispositivew(a)) ? Q_ZERO : Q_PI;
643 |
644 | return atan2ww(sqrtw(subdw(1.0, sqrw(a))), a);
645 | }
646 |
647 | ddouble asinw(const ddouble a)
648 | {
649 | ddouble abs_a = absw(a);
650 | if (greaterwd(abs_a, 1.0))
651 | return nanw();
652 | if (isonew(abs_a))
653 | return (ispositivew(a)) ? Q_PI_2 : negw(Q_PI_2);
654 |
655 | return atan2ww(a, sqrtw(subdw(1.0, sqrw(a))));
656 | }
657 |
658 | ddouble asinhw(const ddouble a)
659 | {
660 | return logw(addww(a,sqrtw(addwd(sqrw(a),1.0))));
661 | }
662 |
663 | ddouble acoshw(const ddouble a)
664 | {
665 | if (lesswd(a, 1.0))
666 | return nanw();
667 |
668 | return logw(addww(a, sqrtw(subwd(sqrw(a), 1.0))));
669 | }
670 |
671 | ddouble atanhw(const ddouble a)
672 | {
673 | if (equalwd(a, -1.0))
674 | return negw(infw());
675 | if (isonew(a))
676 | return infw();
677 | if (greaterwd(absw(a), 1.0))
678 | return nanw();
679 |
680 | return mul_pwr2(logw(divww(adddw(1.0, a) , subdw(1.0, a))), 0.5);
681 | }
682 |
683 | ddouble powww(const ddouble a, const ddouble b)
684 | {
685 | if (iszerow(a) && iszerow(b))
686 | return Q_ONE;
687 | if (iszerow(a) && !iszerow(b))
688 | return Q_ZERO;
689 |
690 | return expw(mulww(b, logw(a)));
691 | }
692 |
693 | ddouble powwd(const ddouble a, const double b)
694 | {
695 | if (iszerow(a) && b == 0)
696 | return Q_ONE;
697 | if (iszerow(a) && b != 0)
698 | return Q_ZERO;
699 |
700 | return expw(muldw(b, logw(a)));
701 | }
702 |
703 | ddouble powdw(const double a, const ddouble b)
704 | {
705 | if (a == 0 && iszerow(b))
706 | return Q_ONE;
707 | if (a == 0 && !iszerow(b))
708 | return Q_ZERO;
709 |
710 | return expw(mulwd(b, log(a)));
711 | }
712 |
713 | ddouble modfww(const ddouble a, ddouble *b)
714 | {
715 | if (isnegativew(a)) {
716 | *b = ceilw(a);
717 | } else {
718 | *b = floorw(a);
719 | }
720 | return subww(a, *b);
721 | }
722 |
--------------------------------------------------------------------------------
/csrc/_dd_ufunc.c:
--------------------------------------------------------------------------------
1 | /* Python extension module for the ddouble data type.
2 | *
3 | * Code is adapted from tensorflow's bfloat16 extension type, found here:
4 | * `tensorflow/python/lib/core/bfloat16.cc` and licensed Apache 2.0.
5 | *
6 | * Copyright (C) 2021 Markus Wallerberger and others
7 | * SPDX-License-Identifier: MIT
8 | */
9 | #include
10 | #include
11 |
12 | #include
13 | #include
14 | #include
15 |
16 | #include "dd_arith.h"
17 |
18 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
19 | #include "numpy/ndarraytypes.h"
20 | #include "numpy/ufuncobject.h"
21 | #include "numpy/npy_3kcompat.h"
22 |
23 | #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_TYPE)
24 | static inline void _Py_SET_TYPE(PyObject *ob, PyTypeObject *type)
25 | { ob->ob_type = type; }
26 | #define Py_SET_TYPE(ob, type) _Py_SET_TYPE((PyObject*)(ob), type)
27 | #endif
28 |
29 | /**
30 | * Allows parameter to be marked unused
31 | */
32 | #define MARK_UNUSED(x) do { (void)(x); } while(false)
33 |
34 | #ifdef _MSC_VER
35 | #define alignof __alignof
36 | #endif
37 |
38 | /* ------------------------ DDouble object ----------------------- */
39 |
40 | static PyObject *module = NULL;
41 | static PyObject *numpy_module = NULL;
42 | static int type_num = -1; //FIXME
43 |
44 | static PyTypeObject *pyddouble_type = NULL;
45 | static PyObject *pyddouble_finfo = NULL;
46 |
47 | typedef struct {
48 | PyObject_HEAD
49 | ddouble x;
50 | } PyDDouble;
51 |
52 | static bool PyDDouble_Check(PyObject* object)
53 | {
54 | return PyObject_IsInstance(object, (PyObject *)pyddouble_type);
55 | }
56 |
57 | static PyObject *PyDDouble_Wrap(ddouble x)
58 | {
59 | PyDDouble *obj = (PyDDouble *) pyddouble_type->tp_alloc(pyddouble_type, 0);
60 | if (obj != NULL)
61 | obj->x = x;
62 | return (PyObject *)obj;
63 | }
64 |
65 | static ddouble PyDDouble_Unwrap(PyObject *arg)
66 | {
67 | return ((PyDDouble *)arg)->x;
68 | }
69 |
70 | static bool PyDDouble_Cast(PyObject *arg, ddouble *out)
71 | {
72 | if (PyDDouble_Check(arg)) {
73 | *out = PyDDouble_Unwrap(arg);
74 | } else if (PyFloat_Check(arg)) {
75 | double val = PyFloat_AsDouble(arg);
76 | *out = (ddouble) {val, 0.0};
77 | } else if (PyLong_Check(arg)) {
78 | long val = PyLong_AsLong(arg);
79 | *out = (ddouble) {val, 0.0};
80 | } else if (PyArray_IsScalar(arg, Float)) {
81 | float val;
82 | PyArray_ScalarAsCtype(arg, &val);
83 | *out = (ddouble) {val, 0.0};
84 | } else if (PyArray_IsScalar(arg, Double)) {
85 | double val;
86 | PyArray_ScalarAsCtype(arg, &val);
87 | *out = (ddouble) {val, 0.0};
88 | } else if (PyArray_IsZeroDim(arg)) {
89 | PyArrayObject* arr = (PyArrayObject *)arg;
90 | if (PyArray_TYPE(arr) == type_num) {
91 | *out = *(ddouble *)PyArray_DATA(arr);
92 | } else {
93 | arr = (PyArrayObject *)PyArray_Cast(arr, type_num);
94 | if (!PyErr_Occurred())
95 | *out = *(ddouble *)PyArray_DATA(arr);
96 | else
97 | *out = nanw();
98 | Py_XDECREF(arr);
99 | }
100 | } else {
101 | *out = nanw();
102 | PyErr_Format(PyExc_TypeError,
103 | "Cannot cast instance of %s to ddouble scalar",
104 | arg->ob_type->tp_name);
105 | }
106 | return !PyErr_Occurred();
107 | }
108 |
109 | static PyObject* PyDDouble_New(PyTypeObject *type, PyObject *args, PyObject *kwds)
110 | {
111 | PyObject *arg = NULL;
112 | if (PyArg_ParseTuple(args, "O", &arg) < 0)
113 | return NULL;
114 |
115 | ddouble val;
116 | if (PyDDouble_Check(arg)) {
117 | Py_INCREF(arg);
118 | return arg;
119 | } else if (PyDDouble_Cast(arg, &val)) {
120 | return PyDDouble_Wrap(val);
121 | } else {
122 | PyErr_Format(PyExc_TypeError, "expected ddouble, got %s",
123 | arg->ob_type->tp_name);
124 | return NULL;
125 | }
126 | MARK_UNUSED(type);
127 | MARK_UNUSED(kwds);
128 | }
129 |
130 | static PyObject* PyDDouble_Float(PyObject* self)
131 | {
132 | ddouble x = PyDDouble_Unwrap(self);
133 | return PyFloat_FromDouble(x.hi);
134 | }
135 |
136 | static PyObject* PyDDouble_Int(PyObject* self)
137 | {
138 | ddouble x = PyDDouble_Unwrap(self);
139 | return PyFloat_FromDouble((long) x.hi);
140 | }
141 |
142 | #define PYWRAP_UNARY(name, inner) \
143 | static PyObject* name(PyObject* _x) \
144 | { \
145 | ddouble r, x; \
146 | x = PyDDouble_Unwrap(_x); \
147 | r = inner(x); \
148 | return PyDDouble_Wrap(r); \
149 | }
150 |
151 | #define PYWRAP_BINARY(name, inner, tp_inner_op) \
152 | static PyObject* name(PyObject* _x, PyObject* _y) \
153 | { \
154 | ddouble r, x, y; \
155 | if (PyArray_Check(_y)) \
156 | return PyArray_Type.tp_as_number->tp_inner_op(_x, _y); \
157 | if (PyDDouble_Cast(_x, &x) && PyDDouble_Cast(_y, &y)) { \
158 | r = inner(x, y); \
159 | return PyDDouble_Wrap(r); \
160 | } \
161 | return NULL; \
162 | }
163 |
164 | #define PYWRAP_INPLACE(name, inner) \
165 | static PyObject* name(PyObject* _self, PyObject* _y) \
166 | { \
167 | PyDDouble *self = (PyDDouble *)_self; \
168 | ddouble y; \
169 | if (PyDDouble_Cast(_y, &y)) { \
170 | self->x = inner(self->x, y); \
171 | Py_XINCREF(_self); \
172 | return _self; \
173 | } else { \
174 | return NULL; \
175 | } \
176 | }
177 |
178 | PYWRAP_UNARY(PyDDouble_Positive, posw)
179 | PYWRAP_UNARY(PyDDouble_Negative, negw)
180 | PYWRAP_UNARY(PyDDouble_Absolute, absw)
181 |
182 | PYWRAP_BINARY(PyDDouble_Add, addww, nb_add)
183 | PYWRAP_BINARY(PyDDouble_Subtract, subww, nb_subtract)
184 | PYWRAP_BINARY(PyDDouble_Multiply, mulww, nb_multiply)
185 | PYWRAP_BINARY(PyDDouble_Divide, divww, nb_true_divide)
186 |
187 | PYWRAP_INPLACE(PyDDouble_InPlaceAdd, addww)
188 | PYWRAP_INPLACE(PyDDouble_InPlaceSubtract, subww)
189 | PYWRAP_INPLACE(PyDDouble_InPlaceMultiply, mulww)
190 | PYWRAP_INPLACE(PyDDouble_InPlaceDivide, divww)
191 |
192 | static int PyDDouble_Nonzero(PyObject* _x)
193 | {
194 | ddouble x = PyDDouble_Unwrap(_x);
195 | return !(x.hi == 0);
196 | }
197 |
198 | static PyObject* PyDDouble_RichCompare(PyObject* _x, PyObject* _y, int op)
199 | {
200 | ddouble x, y;
201 | if (!PyDDouble_Cast(_x, &x) || !PyDDouble_Cast(_y, &y))
202 | return PyGenericArrType_Type.tp_richcompare(_x, _y, op);
203 |
204 | bool result;
205 | switch (op) {
206 | case Py_LT:
207 | result = lessww(x, y);
208 | break;
209 | case Py_LE:
210 | result = lessequalww(x, y);
211 | break;
212 | case Py_EQ:
213 | result = equalww(x, y);
214 | break;
215 | case Py_NE:
216 | result = notequalww(x, y);
217 | break;
218 | case Py_GT:
219 | result = greaterww(x, y);
220 | break;
221 | case Py_GE:
222 | result = greaterequalww(x, y);
223 | break;
224 | default:
225 | PyErr_SetString(PyExc_RuntimeError, "Invalid op type");
226 | return NULL;
227 | }
228 | return PyBool_FromLong(result);
229 | }
230 |
231 | static Py_hash_t PyDDouble_Hash(PyObject *_x)
232 | {
233 | ddouble x = PyDDouble_Unwrap(_x);
234 |
235 | int exp;
236 | double mantissa;
237 | mantissa = frexp(x.hi, &exp);
238 | return (Py_hash_t)(LONG_MAX * mantissa) + exp;
239 | }
240 |
241 | static PyObject *PyDDouble_Str(PyObject *self)
242 | {
243 | char out[200];
244 | ddouble x = PyDDouble_Unwrap(self);
245 | snprintf(out, 200, "%.16g", x.hi);
246 | return PyUnicode_FromString(out);
247 | }
248 |
249 | static PyObject *PyDDouble_Repr(PyObject *self)
250 | {
251 | char out[200];
252 | ddouble x = PyDDouble_Unwrap(self);
253 | snprintf(out, 200, "ddouble(%.16g+%.16g)", x.hi, x.lo);
254 | return PyUnicode_FromString(out);
255 | }
256 |
257 | static PyObject *PyDDoubleGetFinfo(PyObject *self, PyObject *_dummy)
258 | {
259 | Py_INCREF(pyddouble_finfo);
260 | return pyddouble_finfo;
261 | MARK_UNUSED(self);
262 | MARK_UNUSED(_dummy);
263 | }
264 |
265 | static int make_ddouble_type()
266 | {
267 | static PyNumberMethods ddouble_as_number = {
268 | .nb_add = PyDDouble_Add,
269 | .nb_subtract = PyDDouble_Subtract,
270 | .nb_multiply = PyDDouble_Multiply,
271 | .nb_true_divide = PyDDouble_Divide,
272 | .nb_inplace_add = PyDDouble_InPlaceAdd,
273 | .nb_inplace_subtract = PyDDouble_InPlaceSubtract,
274 | .nb_inplace_multiply = PyDDouble_InPlaceMultiply,
275 | .nb_inplace_true_divide = PyDDouble_InPlaceDivide,
276 | .nb_negative = PyDDouble_Negative,
277 | .nb_positive = PyDDouble_Positive,
278 | .nb_absolute = PyDDouble_Absolute,
279 | .nb_bool = PyDDouble_Nonzero,
280 | .nb_int = PyDDouble_Int,
281 | .nb_float = PyDDouble_Float,
282 | };
283 | static PyMethodDef ddouble_methods[] = {
284 | {"__finfo__", PyDDoubleGetFinfo, METH_NOARGS | METH_CLASS,
285 | "floating point information for type"},
286 | {NULL, NULL, 0, NULL}
287 | };
288 | static PyTypeObject ddouble_type = {
289 | PyVarObject_HEAD_INIT(NULL, 0)
290 | .tp_name = "ddouble",
291 | .tp_basicsize = sizeof(PyDDouble),
292 | .tp_repr = PyDDouble_Repr,
293 | .tp_as_number = &ddouble_as_number,
294 | .tp_hash = PyDDouble_Hash,
295 | .tp_str = PyDDouble_Str,
296 | .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
297 | .tp_doc = "double-double floating point type",
298 | .tp_richcompare = PyDDouble_RichCompare,
299 | .tp_new = PyDDouble_New,
300 | .tp_methods = ddouble_methods
301 | };
302 |
303 | ddouble_type.tp_base = &PyFloatingArrType_Type;
304 | if (PyType_Ready(&ddouble_type) < 0)
305 | return -1;
306 |
307 | pyddouble_type = &ddouble_type;
308 | return PyModule_AddObject(module, "ddouble", (PyObject *)pyddouble_type);
309 | }
310 |
311 | /* --------------------- Ddouble Finfo object -------------------- */
312 |
313 | typedef struct {
314 | PyObject_HEAD
315 | PyObject *dtype; // which dtype
316 | int bits; // number of bits
317 | PyObject *max; // largest positive number
318 | PyObject *min; // largest negative number
319 | PyObject *eps; // machine epsilon (spacing)
320 | int nexp; // number of exponent bits
321 | int nmant; // number of mantissa bits
322 | PyObject *machar; // machar object (unused)
323 | } PyDDoubleFInfo;
324 |
325 | static PyTypeObject *PyDDoubleFinfoType;
326 |
327 | static PyObject *PPyDDoubleFInfo_Make()
328 | {
329 | PyDDoubleFInfo *self =
330 | (PyDDoubleFInfo *) PyDDoubleFinfoType->tp_alloc(PyDDoubleFinfoType, 0);
331 | if (self == NULL)
332 | return NULL;
333 |
334 | Py_INCREF(Py_None);
335 | self->dtype = (PyObject *)PyArray_DescrFromType(type_num);
336 | self->bits = CHAR_BIT * sizeof(ddouble);
337 | self->max = PyDDouble_Wrap(Q_MAX);
338 | self->min = PyDDouble_Wrap(Q_MIN);
339 | self->eps = PyDDouble_Wrap(Q_EPS);
340 | self->nexp = 11;
341 | self->nmant = 104;
342 | self->machar = Py_None;
343 | return (PyObject *)self;
344 | }
345 |
346 | static int make_finfo()
347 | {
348 | static PyMemberDef finfo_members[] = {
349 | {"dtype", T_OBJECT_EX, offsetof(PyDDoubleFInfo, dtype), READONLY,
350 | "underlying dtype object"},
351 | {"bits", T_INT, offsetof(PyDDoubleFInfo, bits), READONLY,
352 | "storage size of object in bits"},
353 | {"max", T_OBJECT_EX, offsetof(PyDDoubleFInfo, max), READONLY,
354 | "largest positive number"},
355 | {"min", T_OBJECT_EX, offsetof(PyDDoubleFInfo, min), READONLY,
356 | "largest negative number"},
357 | {"eps", T_OBJECT_EX, offsetof(PyDDoubleFInfo, eps), READONLY,
358 | "machine epsilon"},
359 | {"nexp", T_INT, offsetof(PyDDoubleFInfo, nexp), READONLY,
360 | "number of bits in exponent"},
361 | {"nmant", T_INT, offsetof(PyDDoubleFInfo, nmant), READONLY,
362 | "number of bits in mantissa"},
363 | {"machar", T_OBJECT_EX, offsetof(PyDDoubleFInfo, machar), READONLY,
364 | "machar object (unused)"},
365 | {NULL, 0, 0, 0, NULL}
366 | };
367 | static PyTypeObject finfo_type = {
368 | PyVarObject_HEAD_INIT(NULL, 0)
369 | .tp_name = "ddouble_finfo",
370 | .tp_basicsize = sizeof(PyDDoubleFInfo),
371 | .tp_members = finfo_members,
372 | .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
373 | .tp_doc = "finfo type"
374 | };
375 |
376 | if (PyType_Ready(&finfo_type) < 0)
377 | return -1;
378 |
379 | PyDDoubleFinfoType = &finfo_type;
380 | pyddouble_finfo = PPyDDoubleFInfo_Make();
381 | if (pyddouble_finfo == NULL)
382 | return -1;
383 |
384 | return 0;
385 | }
386 |
387 | /* ------------------------------ Descriptor ----------------------------- */
388 |
389 | static PyObject *NPyDDouble_GetItem(void *data, void *arr)
390 | {
391 | ddouble x = *(ddouble *)data;
392 | return PyDDouble_Wrap(x);
393 | MARK_UNUSED(arr);
394 | }
395 |
396 | static int NPyDDouble_SetItem(PyObject *item, void *data, void *arr)
397 | {
398 | ddouble x;
399 | if (!PyDDouble_Cast(item, &x))
400 | return -1;
401 | *(ddouble *)data = x;
402 | return 0;
403 | MARK_UNUSED(arr);
404 | }
405 |
406 | static int NPyDDouble_Compare(const void *_a, const void *_b, void *arr)
407 | {
408 | ddouble a = *(const ddouble *)_a;
409 | ddouble b = *(const ddouble *)_b;
410 |
411 | if (lessww(a, b))
412 | return -1;
413 | if (greaterww(a, b))
414 | return 1;
415 | if (isnanw(b))
416 | return 1;
417 | return 0;
418 | MARK_UNUSED(arr);
419 | }
420 |
421 | static void NPyDDouble_CopySwapN(void *_d, npy_intp sd, void *_s, npy_intp ss,
422 | npy_intp ii, int swap, void* arr)
423 | {
424 | if (_s == NULL)
425 | return;
426 | char *_cd = (char *)_d, *_cs = (char *)_s;
427 | if (swap) {
428 | for (npy_intp i = 0; i != ii; ++i, _cd += sd, _cs += ss) {
429 | ddouble *s = (ddouble *)_cs, *d = (ddouble *)_cd, tmp;
430 | tmp = *d;
431 | *d = *s;
432 | *s = tmp;
433 | }
434 | } else {
435 | for (npy_intp i = 0; i != ii; ++i, _cd += sd, _cs += ss) {
436 | ddouble *s = (ddouble *)_cs, *d = (ddouble *)_cd;
437 | *d = *s;
438 | }
439 | }
440 | MARK_UNUSED(arr);
441 | }
442 |
443 | static void NPyDDouble_CopySwap(void *_d, void *_s, int swap, void* arr)
444 | {
445 | ddouble *s = _s, *d = _d, tmp;
446 | if (_s == NULL)
447 | return;
448 | if (swap) {
449 | tmp = *d;
450 | *d = *s;
451 | *s = tmp;
452 | } else {
453 | *d = *s;
454 | }
455 | MARK_UNUSED(arr);
456 | }
457 |
458 | static npy_bool NPyDDouble_NonZero(void *data, void *arr)
459 | {
460 | ddouble x = *(ddouble *)data;
461 | return !iszerow(x);
462 | MARK_UNUSED(arr);
463 | }
464 |
465 | static int NPyDDouble_Fill(void *_buffer, npy_intp ii, void *arr)
466 | {
467 | // Fill with linear array
468 | ddouble *buffer = (ddouble *)_buffer;
469 | if (ii < 2)
470 | return -1;
471 |
472 | ddouble curr = buffer[1];
473 | ddouble step = subww(curr, buffer[0]);
474 | for (npy_intp i = 2; i != ii; ++i) {
475 | curr = addww(curr, step);
476 | buffer[i] = curr;
477 | }
478 | return 0;
479 | MARK_UNUSED(arr);
480 | }
481 |
482 | static int NPyDDouble_FillWithScalar(void *_buffer, npy_intp ii, void *_value,
483 | void *arr)
484 | {
485 | ddouble *buffer = (ddouble *)_buffer;
486 | ddouble value = *(ddouble *)_value;
487 | for (npy_intp i = 0; i < ii; ++i)
488 | buffer[i] = value;
489 | return 0;
490 | MARK_UNUSED(arr);
491 | }
492 |
493 | static void NPyDDouble_DotFunc(void *_in1, npy_intp is1, void *_in2,
494 | npy_intp is2, void *_out, npy_intp ii, void *arr)
495 | {
496 | ddouble out = Q_ZERO;
497 | char *_cin1 = (char *)_in1, *_cin2 = (char *)_in2;
498 | for (npy_intp i = 0; i < ii; ++i, _cin1 += is1, _cin2 += is2) {
499 | ddouble in1 = *(ddouble *)_cin1, in2 = *(ddouble *)_cin2;
500 | out = addww(out, mulww(in1, in2));
501 | }
502 | *(ddouble *)_out = out;
503 | MARK_UNUSED(arr);
504 | }
505 |
506 | static int NPyDDouble_ArgMax(void *_data, npy_intp n, npy_intp *max_ind,
507 | void *arr)
508 | {
509 | ddouble *data = (ddouble *)_data;
510 | ddouble max_val = negw(infw());
511 | for (npy_intp i = 0; i < n; ++i) {
512 | if (greaterww(data[i], max_val)) {
513 | max_val = data[i];
514 | *max_ind = i;
515 | }
516 | }
517 | return 0;
518 | MARK_UNUSED(arr);
519 | }
520 |
521 | static int NPyDDouble_ArgMin(void *_data, npy_intp n, npy_intp *min_ind,
522 | void *arr)
523 | {
524 | ddouble *data = (ddouble *)_data;
525 | ddouble min_val = infw();
526 | for (npy_intp i = 0; i < n; ++i) {
527 | if (lessww(data[i], min_val)) {
528 | min_val = data[i];
529 | *min_ind = i;
530 | }
531 | }
532 | return 0;
533 | MARK_UNUSED(arr);
534 | }
535 |
536 | /* This is necessary in order to ensure both 1.0 and 2.0 compatibility.
537 | * https://numpy.org/doc/stable/reference/c-api/array.html#c.PyArray_RegisterDataType
538 | */
539 | #if NPY_ABI_VERSION < 0x02000000
540 | #define PyArray_DescrProto PyArray_Descr
541 | #endif
542 |
543 | static int make_dtype()
544 | {
545 | /* Check if another module has registered a ddouble type.
546 | *
547 | * FIXME: this check is removed, let's see if it is missed ...
548 | */
549 | //type_num = PyArray_TypeNumFromName("ddouble");
550 | //if (type_num != NPY_NOTYPE) {
551 | // return type_num;
552 | //}
553 |
554 | static PyArray_ArrFuncs ddouble_arrfuncs;
555 |
556 | static PyArray_DescrProto ddouble_dtype = {
557 | PyObject_HEAD_INIT(NULL)
558 |
559 | /* We must register ddouble with a kind other than "f", because numpy
560 | * considers two types with the same kind and size to be equal, but
561 | * float128 != ddouble. The downside of this is that NumPy scalar
562 | * promotion does not work with ddoubles.
563 | */
564 | .kind = 'V',
565 | .type = 'E',
566 | .byteorder = '=',
567 |
568 | /* NPY_USE_GETITEM is not needed, since we inherit from numpy scalar,
569 | * which according to the docs means that "standard conversion" is
570 | * used. However, we still need to define and register getitem()
571 | * below, otherwise PyArray_RegisterDataType complains.
572 | */
573 | .flags = 0,
574 | .elsize = sizeof(ddouble),
575 | .alignment = alignof(ddouble),
576 | .hash = -1
577 | };
578 |
579 | ddouble_dtype.typeobj = pyddouble_type;
580 | ddouble_dtype.f = &ddouble_arrfuncs;
581 | Py_SET_TYPE(&ddouble_dtype, &PyArrayDescr_Type);
582 |
583 | PyArray_InitArrFuncs(&ddouble_arrfuncs);
584 | ddouble_arrfuncs.getitem = NPyDDouble_GetItem;
585 | ddouble_arrfuncs.setitem = NPyDDouble_SetItem;
586 | ddouble_arrfuncs.compare = NPyDDouble_Compare;
587 | ddouble_arrfuncs.copyswapn = NPyDDouble_CopySwapN;
588 | ddouble_arrfuncs.copyswap = NPyDDouble_CopySwap;
589 | ddouble_arrfuncs.nonzero = NPyDDouble_NonZero;
590 | ddouble_arrfuncs.fill = NPyDDouble_Fill;
591 | ddouble_arrfuncs.fillwithscalar = NPyDDouble_FillWithScalar;
592 | ddouble_arrfuncs.dotfunc = NPyDDouble_DotFunc;
593 | ddouble_arrfuncs.argmax = NPyDDouble_ArgMax;
594 | ddouble_arrfuncs.argmin = NPyDDouble_ArgMin;
595 |
596 | type_num = PyArray_RegisterDataType(&ddouble_dtype);
597 | return type_num;
598 | }
599 |
600 | /* ------------------------------- Casts ------------------------------ */
601 |
602 | #define NPY_CAST_FROM(func, from_type) \
603 | static void func(void *_from, void *_to, npy_intp n, \
604 | void *_arr_from, void *_arr_to) \
605 | { \
606 | ddouble *to = (ddouble *)_to; \
607 | const from_type *from = (const from_type *)_from; \
608 | for (npy_intp i = 0; i < n; ++i) \
609 | to[i] = (ddouble) { from[i], 0.0 }; \
610 | MARK_UNUSED(_arr_from); \
611 | MARK_UNUSED(_arr_to); \
612 | }
613 |
614 | #define NPY_CAST_FROM_I64(func, from_type) \
615 | static void func(void *_from, void *_to, npy_intp n, \
616 | void *_arr_from, void *_arr_to) \
617 | { \
618 | ddouble *to = (ddouble *)_to; \
619 | const from_type *from = (const from_type *)_from; \
620 | for (npy_intp i = 0; i < n; ++i) { \
621 | double hi = from[i]; \
622 | double lo = from[i] - (from_type) hi; \
623 | to[i] = (ddouble){hi, lo}; \
624 | } \
625 | MARK_UNUSED(_arr_from); \
626 | MARK_UNUSED(_arr_to); \
627 | }
628 |
629 | #define NPY_CAST_TO(func, to_type) \
630 | static void func(void *_from, void *_to, npy_intp n, \
631 | void *_arr_from, void *_arr_to) \
632 | { \
633 | to_type *to = (to_type *)_to; \
634 | const ddouble *from = (const ddouble *)_from; \
635 | for (npy_intp i = 0; i < n; ++i) \
636 | to[i] = (to_type) from[i].hi; \
637 | MARK_UNUSED(_arr_from); \
638 | MARK_UNUSED(_arr_to); \
639 | }
640 |
641 | #define NPY_CAST_TO_I64(func, to_type) \
642 | static void func(void *_from, void *_to, npy_intp n, \
643 | void *_arr_from, void *_arr_to) \
644 | { \
645 | to_type *to = (to_type *)_to; \
646 | const ddouble *from = (const ddouble *)_from; \
647 | for (npy_intp i = 0; i < n; ++i) \
648 | to[i] = (to_type) from[i].hi + (to_type) from[i].lo; \
649 | MARK_UNUSED(_arr_from); \
650 | MARK_UNUSED(_arr_to); \
651 | }
652 |
653 | // These casts are all loss-less
654 | NPY_CAST_FROM(from_double, double)
655 | NPY_CAST_FROM(from_float, float)
656 | NPY_CAST_FROM(from_bool, bool)
657 | NPY_CAST_FROM(from_int8, int8_t)
658 | NPY_CAST_FROM(from_int16, int16_t)
659 | NPY_CAST_FROM(from_int32, int32_t)
660 | NPY_CAST_FROM(from_uint8, uint8_t)
661 | NPY_CAST_FROM(from_uint16, uint16_t)
662 | NPY_CAST_FROM(from_uint32, uint32_t)
663 |
664 | // These casts are also lossless, because we have now 2*54 bits of mantissa
665 | NPY_CAST_FROM_I64(from_int64, int64_t)
666 | NPY_CAST_FROM_I64(from_uint64, uint64_t)
667 |
668 | // These casts are all lossy
669 | NPY_CAST_TO(to_double, double)
670 | NPY_CAST_TO(to_float, float)
671 | NPY_CAST_TO(to_bool, bool)
672 | NPY_CAST_TO(to_int8, int8_t)
673 | NPY_CAST_TO(to_int16, int16_t)
674 | NPY_CAST_TO(to_int32, int32_t)
675 | NPY_CAST_TO(to_uint8, uint8_t)
676 | NPY_CAST_TO(to_uint16, uint16_t)
677 | NPY_CAST_TO(to_uint32, uint32_t)
678 |
679 | // These casts can be made more accurate
680 | NPY_CAST_TO_I64(to_int64, int64_t)
681 | NPY_CAST_TO_I64(to_uint64, uint64_t)
682 |
683 |
684 | static bool register_cast(int other_type, PyArray_VectorUnaryFunc from_other,
685 | PyArray_VectorUnaryFunc to_other)
686 | {
687 | PyArray_Descr *other_descr = NULL, *ddouble_descr = NULL;
688 | int ret;
689 |
690 | other_descr = PyArray_DescrFromType(other_type);
691 | if (other_descr == NULL) goto error;
692 |
693 | ddouble_descr = PyArray_DescrFromType(type_num);
694 | if (ddouble_descr == NULL) goto error;
695 |
696 | ret = PyArray_RegisterCastFunc(other_descr, type_num, from_other);
697 | if (ret < 0) goto error;
698 |
699 | // NPY_NOSCALAR apparently implies that casting is safe?
700 | ret = PyArray_RegisterCanCast(other_descr, type_num, NPY_NOSCALAR);
701 | if (ret < 0) goto error;
702 |
703 | ret = PyArray_RegisterCastFunc(ddouble_descr, other_type, to_other);
704 | if (ret < 0) goto error;
705 | return true;
706 |
707 | error:
708 | return false;
709 | }
710 |
711 | static int register_casts()
712 | {
713 | bool ok = register_cast(NPY_DOUBLE, from_double, to_double)
714 | && register_cast(NPY_FLOAT, from_float, to_float)
715 | && register_cast(NPY_BOOL, from_bool, to_bool)
716 | && register_cast(NPY_INT8, from_int8, to_int8)
717 | && register_cast(NPY_INT16, from_int16, to_int16)
718 | && register_cast(NPY_INT32, from_int32, to_int32)
719 | && register_cast(NPY_INT64, from_int64, to_int64)
720 | && register_cast(NPY_UINT8, from_uint8, to_uint8)
721 | && register_cast(NPY_UINT16, from_uint16, to_uint16)
722 | && register_cast(NPY_UINT32, from_uint32, to_uint32)
723 | && register_cast(NPY_UINT64, from_uint64, to_uint64);
724 | return ok ? 0 : -1;
725 | }
726 |
727 | /* ------------------------------- Ufuncs ----------------------------- */
728 |
729 | #define ULOOP_UNARY(func_name, inner_func, type_out, type_in) \
730 | static void func_name(char **args, const npy_intp *dimensions, \
731 | const npy_intp *steps, void *data) \
732 | { \
733 | const npy_intp n = dimensions[0]; \
734 | const npy_intp is = steps[0] / sizeof(type_in), \
735 | os = steps[1] / sizeof(type_out); \
736 | const type_in *in = (const type_in *)args[0]; \
737 | type_out *out = (type_out *)args[1]; \
738 | \
739 | for (npy_intp i = 0; i < n; ++i) \
740 | out[i * os] = inner_func(in[i * is]); \
741 | MARK_UNUSED(data); \
742 | }
743 |
744 | #define ULOOP_BINARY(func_name, inner_func, type_out, type_a, type_b) \
745 | static void func_name(char **args, const npy_intp *dimensions, \
746 | const npy_intp* steps, void *data) \
747 | { \
748 | const npy_intp n = dimensions[0]; \
749 | const npy_intp as = steps[0] / sizeof(type_a), \
750 | bs = steps[1] / sizeof(type_b), \
751 | os = steps[2] / sizeof(type_out); \
752 | const type_a *a = (const type_a *)args[0]; \
753 | const type_b *b = (const type_b *)args[1]; \
754 | type_out *out = (type_out *)args[2]; \
755 | \
756 | for (npy_intp i = 0; i < n; ++i) { \
757 | out[i * os] = inner_func(a[i * as], b[i * bs]); \
758 | } \
759 | MARK_UNUSED(data); \
760 | }
761 |
762 | #define ULOOP_MODF(func_name, inner_func, type_out, type_a, type_b) \
763 | static void func_name(char **args, const npy_intp *dimensions, \
764 | const npy_intp* steps, void *data) \
765 | { \
766 | const npy_intp n = dimensions[0]; \
767 | const npy_intp as = steps[0] / sizeof(type_a), \
768 | bs = steps[1] / sizeof(type_b), \
769 | os = steps[2] / sizeof(type_out); \
770 | const type_a *a = (const type_a *)args[0]; \
771 | type_b *b = (type_b *)args[2]; \
772 | type_out *out = (type_out *)args[1]; \
773 | \
774 | for (npy_intp i = 0; i < n; ++i) { \
775 | out[i * os] = inner_func(a[i * as], &b[i * bs]); \
776 | } \
777 | MARK_UNUSED(data); \
778 | }
779 |
780 | ULOOP_BINARY(u_addwd, addwd, ddouble, ddouble, double)
781 | ULOOP_BINARY(u_subwd, subwd, ddouble, ddouble, double)
782 | ULOOP_BINARY(u_mulwd, mulwd, ddouble, ddouble, double)
783 | ULOOP_BINARY(u_divwd, divwd, ddouble, ddouble, double)
784 | ULOOP_BINARY(u_adddw, adddw, ddouble, double, ddouble)
785 | ULOOP_BINARY(u_subdw, subdw, ddouble, double, ddouble)
786 | ULOOP_BINARY(u_muldw, muldw, ddouble, double, ddouble)
787 | ULOOP_BINARY(u_divdw, divdw, ddouble, double, ddouble)
788 | ULOOP_BINARY(u_addww, addww, ddouble, ddouble, ddouble)
789 | ULOOP_BINARY(u_subww, subww, ddouble, ddouble, ddouble)
790 | ULOOP_BINARY(u_mulww, mulww, ddouble, ddouble, ddouble)
791 | ULOOP_BINARY(u_divww, divww, ddouble, ddouble, ddouble)
792 | ULOOP_BINARY(u_copysignww, copysignww, ddouble, ddouble, ddouble)
793 | ULOOP_BINARY(u_copysignwd, copysignwd, ddouble, ddouble, double)
794 | ULOOP_BINARY(u_copysigndw, copysigndw, ddouble, double, ddouble)
795 | ULOOP_BINARY(u_equalww, equalww, bool, ddouble, ddouble)
796 | ULOOP_BINARY(u_notequalww, notequalww, bool, ddouble, ddouble)
797 | ULOOP_BINARY(u_greaterww, greaterww, bool, ddouble, ddouble)
798 | ULOOP_BINARY(u_lessww, lessww, bool, ddouble, ddouble)
799 | ULOOP_BINARY(u_greaterequalww, greaterww, bool, ddouble, ddouble)
800 | ULOOP_BINARY(u_lessequalww, lessww, bool, ddouble, ddouble)
801 | ULOOP_BINARY(u_equalwd, equalwd, bool, ddouble, double)
802 | ULOOP_BINARY(u_notequalwd, notequalwd, bool, ddouble, double)
803 | ULOOP_BINARY(u_greaterwd, greaterwd, bool, ddouble, double)
804 | ULOOP_BINARY(u_lesswd, lesswd, bool, ddouble, double)
805 | ULOOP_BINARY(u_greaterequalwd, greaterequalwd, bool, ddouble, double)
806 | ULOOP_BINARY(u_lessequalwd, lessequalwd, bool, ddouble, double)
807 | ULOOP_BINARY(u_equaldw, equaldw, bool, double, ddouble)
808 | ULOOP_BINARY(u_notequaldw, notequaldw, bool, double, ddouble)
809 | ULOOP_BINARY(u_greaterdw, greaterdw, bool, double, ddouble)
810 | ULOOP_BINARY(u_lessdw, lessdw, bool, double, ddouble)
811 | ULOOP_BINARY(u_greaterequaldw, greaterequaldw, bool, double, ddouble)
812 | ULOOP_BINARY(u_lessequaldw, lessequaldw, bool, double, ddouble)
813 | ULOOP_BINARY(u_fminww, fminww, ddouble, ddouble, ddouble)
814 | ULOOP_BINARY(u_fmaxww, fmaxww, ddouble, ddouble, ddouble)
815 | ULOOP_BINARY(u_fminwd, fminwd, ddouble, ddouble, double)
816 | ULOOP_BINARY(u_fmaxwd, fmaxwd, ddouble, ddouble, double)
817 | ULOOP_BINARY(u_fmindw, fmindw, ddouble, double, ddouble)
818 | ULOOP_BINARY(u_fmaxdw, fmaxdw, ddouble, double, ddouble)
819 | ULOOP_BINARY(u_atan2wd, atan2wd, ddouble, ddouble, double)
820 | ULOOP_BINARY(u_atan2dw, atan2dw, ddouble, double, ddouble)
821 | ULOOP_BINARY(u_atan2ww, atan2ww, ddouble, ddouble, ddouble)
822 | ULOOP_BINARY(u_powwd, powwd, ddouble, ddouble, double)
823 | ULOOP_BINARY(u_powdw, powdw, ddouble, double, ddouble)
824 | ULOOP_BINARY(u_powww, powww, ddouble, ddouble, ddouble)
825 | ULOOP_BINARY(u_hypotww, hypotww, ddouble, ddouble, ddouble)
826 | ULOOP_BINARY(u_hypotdw, hypotdw, ddouble, double, ddouble)
827 | ULOOP_BINARY(u_hypotwd, hypotwd, ddouble, ddouble, double)
828 | ULOOP_BINARY(u_ldexpwi, ldexpwi, ddouble, ddouble, int)
829 | ULOOP_MODF(u_modfww, modfww, ddouble, ddouble, ddouble)
830 | ULOOP_UNARY(u_signbitw, signbitw, bool, ddouble)
831 | ULOOP_UNARY(u_signw, signw, ddouble, ddouble)
832 | ULOOP_UNARY(u_isfinitew, isfinitew, bool, ddouble)
833 | ULOOP_UNARY(u_isinfw, isinfw, bool, ddouble)
834 | ULOOP_UNARY(u_isnanw, isnanw, bool, ddouble)
835 | ULOOP_UNARY(u_negw, negw, ddouble, ddouble)
836 | ULOOP_UNARY(u_posw, posw, ddouble, ddouble)
837 | ULOOP_UNARY(u_absw, absw, ddouble, ddouble)
838 | ULOOP_UNARY(u_reciprocalw, reciprocalw, ddouble, ddouble)
839 | ULOOP_UNARY(u_sqrw, sqrw, ddouble, ddouble)
840 | ULOOP_UNARY(u_roundw, roundw, ddouble, ddouble)
841 | ULOOP_UNARY(u_floorw, floorw, ddouble, ddouble)
842 | ULOOP_UNARY(u_ceilw, ceilw, ddouble, ddouble)
843 | ULOOP_UNARY(u_sqrtw, sqrtw, ddouble, ddouble)
844 | ULOOP_UNARY(u_expw, expw, ddouble, ddouble)
845 | ULOOP_UNARY(u_expm1w, expm1w, ddouble, ddouble)
846 | ULOOP_UNARY(u_logw, logw, ddouble, ddouble)
847 | ULOOP_UNARY(u_sinw, sinw, ddouble, ddouble)
848 | ULOOP_UNARY(u_cosw, cosw, ddouble, ddouble)
849 | ULOOP_UNARY(u_tanw, tanw, ddouble, ddouble)
850 | ULOOP_UNARY(u_atanw, atanw, ddouble, ddouble)
851 | ULOOP_UNARY(u_acosw, acosw, ddouble, ddouble)
852 | ULOOP_UNARY(u_asinw, asinw, ddouble, ddouble)
853 | ULOOP_UNARY(u_atanhw, atanhw, ddouble, ddouble)
854 | ULOOP_UNARY(u_acoshw, acoshw, ddouble, ddouble)
855 | ULOOP_UNARY(u_asinhw, asinhw, ddouble, ddouble)
856 | ULOOP_UNARY(u_sinhw, sinhw, ddouble, ddouble)
857 | ULOOP_UNARY(u_coshw, coshw, ddouble, ddouble)
858 | ULOOP_UNARY(u_tanhw, tanhw, ddouble, ddouble)
859 |
860 | static bool register_binary(PyUFuncGenericFunction dq_func,
861 | PyUFuncGenericFunction qd_func, PyUFuncGenericFunction qq_func,
862 | int ret_dtype, const char *name)
863 | {
864 | PyUFuncObject *ufunc;
865 | int *arg_types = NULL, retcode = 0;
866 |
867 | ufunc = (PyUFuncObject *)PyObject_GetAttrString(numpy_module, name);
868 | if (ufunc == NULL) goto error;
869 |
870 | arg_types = PyMem_New(int, 3 * 3);
871 | if (arg_types == NULL) goto error;
872 |
873 | arg_types[0] = NPY_DOUBLE;
874 | arg_types[1] = type_num;
875 | arg_types[2] = ret_dtype;
876 | retcode = PyUFunc_RegisterLoopForType(ufunc, type_num,
877 | dq_func, arg_types, NULL);
878 | if (retcode < 0) goto error;
879 |
880 | arg_types[3] = type_num;
881 | arg_types[4] = NPY_DOUBLE;
882 | arg_types[5] = ret_dtype;
883 | retcode = PyUFunc_RegisterLoopForType(ufunc, type_num,
884 | qd_func, arg_types + 3, NULL);
885 | if (retcode < 0) goto error;
886 |
887 | arg_types[6] = type_num;
888 | arg_types[7] = type_num;
889 | arg_types[8] = ret_dtype;
890 | retcode = PyUFunc_RegisterLoopForType(ufunc, type_num,
891 | qq_func, arg_types + 6, NULL);
892 | if (retcode < 0) goto error;
893 | return true;
894 |
895 | error:
896 | return false;
897 | }
898 |
899 | static int register_unary(PyUFuncGenericFunction func, int ret_dtype,
900 | const char *name)
901 | {
902 | PyUFuncObject *ufunc;
903 | int *arg_types = NULL, retcode = 0;
904 |
905 | ufunc = (PyUFuncObject *)PyObject_GetAttrString(numpy_module, name);
906 | if (ufunc == NULL) goto error;
907 |
908 | arg_types = PyMem_New(int, 2);
909 | if (arg_types == NULL) goto error;
910 |
911 | arg_types[0] = type_num;
912 | arg_types[1] = ret_dtype;
913 | retcode = PyUFunc_RegisterLoopForType(ufunc, type_num,
914 | func, arg_types, NULL);
915 | if (retcode < 0) goto error;
916 | return true;
917 |
918 | error:
919 | return false;
920 | }
921 |
922 | static int register_ldexp(PyUFuncGenericFunction func, int ret_dtype,
923 | const char *name)
924 | {
925 | PyUFuncObject *ufunc;
926 | int *arg_types = NULL, retcode = 0;
927 |
928 | ufunc = (PyUFuncObject *)PyObject_GetAttrString(numpy_module, name);
929 | if (ufunc == NULL) goto error;
930 |
931 | arg_types = PyMem_New(int, 3);
932 | if (arg_types == NULL) goto error;
933 |
934 | arg_types[0] = type_num;
935 | arg_types[1] = NPY_INTP;
936 | arg_types[2] = ret_dtype;
937 | retcode = PyUFunc_RegisterLoopForType(ufunc, type_num,
938 | func, arg_types, NULL);
939 | if (retcode < 0) goto error;
940 | return true;
941 |
942 | error:
943 | return false;
944 | }
945 |
946 | static int register_modf(PyUFuncGenericFunction func, int ret_dtype,
947 | const char *name)
948 | {
949 | PyUFuncObject *ufunc;
950 | int *arg_types = NULL, retcode = 0;
951 |
952 | ufunc = (PyUFuncObject *)PyObject_GetAttrString(numpy_module, name);
953 | if (ufunc == NULL) goto error;
954 |
955 | arg_types = PyMem_New(int, 4);
956 | if (arg_types == NULL) goto error;
957 |
958 | arg_types[0] = type_num;
959 | arg_types[1] = type_num;
960 | arg_types[2] = ret_dtype;
961 | arg_types[3] = ret_dtype;
962 | retcode = PyUFunc_RegisterLoopForType(ufunc, type_num,
963 | func, arg_types, NULL);
964 | if (retcode < 0) goto error;
965 | return true;
966 |
967 | error:
968 | return false;
969 | }
970 |
971 | static int register_ufuncs()
972 | {
973 | bool ok = register_unary(u_negw, type_num, "negative")
974 | && register_unary(u_posw, type_num, "positive")
975 | && register_unary(u_absw, type_num, "absolute")
976 | && register_unary(u_reciprocalw, type_num, "reciprocal")
977 | && register_unary(u_sqrw, type_num, "square")
978 | && register_unary(u_sqrtw, type_num, "sqrt")
979 | && register_unary(u_signbitw, NPY_BOOL, "signbit")
980 | && register_unary(u_isfinitew, NPY_BOOL, "isfinite")
981 | && register_unary(u_isinfw, NPY_BOOL, "isinf")
982 | && register_unary(u_isnanw, NPY_BOOL, "isnan")
983 | && register_unary(u_roundw, type_num, "rint")
984 | && register_unary(u_floorw, type_num, "floor")
985 | && register_unary(u_ceilw, type_num, "ceil")
986 | && register_unary(u_expw, type_num, "exp")
987 | && register_unary(u_expm1w, type_num, "expm1")
988 | && register_unary(u_logw, type_num, "log")
989 | && register_unary(u_sinw, type_num, "sin")
990 | && register_unary(u_cosw, type_num, "cos")
991 | && register_unary(u_tanw, type_num, "tan")
992 | && register_unary(u_atanw, type_num, "arctan")
993 | && register_unary(u_acosw, type_num, "arccos")
994 | && register_unary(u_asinw, type_num, "arcsin")
995 | && register_unary(u_atanhw, type_num, "arctanh")
996 | && register_unary(u_acoshw, type_num, "arccosh")
997 | && register_unary(u_asinhw, type_num, "arcsinh")
998 | && register_unary(u_sinhw, type_num, "sinh")
999 | && register_unary(u_coshw, type_num, "cosh")
1000 | && register_unary(u_tanhw, type_num, "tanh")
1001 | && register_unary(u_signw, type_num, "sign")
1002 | && register_ldexp(u_ldexpwi, type_num, "ldexp")
1003 | && register_modf(u_modfww, type_num, "modf")
1004 | && register_binary(u_adddw, u_addwd, u_addww, type_num, "add")
1005 | && register_binary(u_subdw, u_subwd, u_subww, type_num, "subtract")
1006 | && register_binary(u_muldw, u_mulwd, u_mulww, type_num, "multiply")
1007 | && register_binary(u_divdw, u_divwd, u_divww, type_num, "true_divide")
1008 | && register_binary(u_powdw, u_powwd, u_powww, type_num, "power")
1009 | && register_binary(u_equaldw, u_equalwd, u_equalww, NPY_BOOL, "equal")
1010 | && register_binary(u_notequaldw, u_notequalwd, u_notequalww, NPY_BOOL,
1011 | "not_equal")
1012 | && register_binary(u_greaterdw, u_greaterwd, u_greaterww, NPY_BOOL, "greater")
1013 | && register_binary(u_lessdw, u_lesswd, u_lessww, NPY_BOOL, "less")
1014 | && register_binary(u_greaterequaldw, u_greaterequalwd, u_greaterequalww,
1015 | NPY_BOOL, "greater_equal")
1016 | && register_binary(u_lessequaldw, u_lessequalwd, u_lessequalww, NPY_BOOL,
1017 | "less_equal")
1018 | && register_binary(u_fmindw, u_fminwd, u_fminww, type_num, "fmin")
1019 | && register_binary(u_fmaxdw, u_fmaxwd, u_fmaxww, type_num, "fmax")
1020 | && register_binary(u_fmindw, u_fminwd, u_fminww, type_num, "minimum")
1021 | && register_binary(u_fmaxdw, u_fmaxwd, u_fmaxww, type_num, "maximum")
1022 | && register_binary(u_atan2dw, u_atan2wd, u_atan2ww, type_num, "arctan2")
1023 | && register_binary(u_copysigndw, u_copysignwd, u_copysignww, type_num,
1024 | "copysign")
1025 | && register_binary(u_hypotdw, u_hypotwd, u_hypotww, type_num, "hypot");
1026 | return ok ? 0 : -1;
1027 | }
1028 |
1029 | static int register_dtype_in_dicts()
1030 | {
1031 | PyObject *type_dict = NULL;
1032 |
1033 | type_dict = PyObject_GetAttrString(numpy_module, "sctypeDict");
1034 | if (type_dict == NULL) goto error;
1035 |
1036 | if (PyDict_SetItemString(type_dict, "ddouble",
1037 | (PyObject *)pyddouble_type) < 0)
1038 | goto error;
1039 | return 0;
1040 |
1041 | error:
1042 | Py_XDECREF(type_dict);
1043 | return -1;
1044 | }
1045 |
1046 | /* ----------------------- Python stuff -------------------------- */
1047 |
1048 | static PyObject *make_module()
1049 | {
1050 | // Defitions
1051 | static PyMethodDef no_methods[] = {
1052 | {NULL, NULL, 0, NULL} // No methods defined
1053 | };
1054 | static struct PyModuleDef module_def = {
1055 | PyModuleDef_HEAD_INIT,
1056 | "_dd_ufunc",
1057 | NULL,
1058 | -1,
1059 | no_methods,
1060 | NULL,
1061 | NULL,
1062 | NULL,
1063 | NULL
1064 | };
1065 |
1066 | /* Module definition */
1067 | module = PyModule_Create(&module_def);
1068 | return module;
1069 | }
1070 |
1071 | static bool constant(ddouble value, const char *name)
1072 | {
1073 | // Note that data must be allocated using malloc, not python allocators!
1074 | ddouble *data = malloc(sizeof value);
1075 | *data = value;
1076 |
1077 | PyArrayObject *array = (PyArrayObject *)
1078 | PyArray_SimpleNewFromData(0, NULL, type_num, data);
1079 | if (array == NULL) return false;
1080 |
1081 | PyArray_ENABLEFLAGS(array, NPY_ARRAY_OWNDATA);
1082 | PyArray_CLEARFLAGS(array, NPY_ARRAY_WRITEABLE);
1083 |
1084 | PyModule_AddObject(module, name, (PyObject *)array);
1085 | return true;
1086 | }
1087 |
1088 | static int register_constants()
1089 | {
1090 | bool ok = constant(Q_MAX, "MAX")
1091 | && constant(Q_MIN, "MIN")
1092 | && constant(Q_EPS, "EPS")
1093 | && constant(Q_2PI, "TWOPI")
1094 | && constant(Q_PI, "PI")
1095 | && constant(Q_PI_2, "PI_2")
1096 | && constant(Q_PI_4, "PI_4")
1097 | && constant(Q_E, "E")
1098 | && constant(Q_LOG2, "LOG2")
1099 | && constant(Q_LOG10, "LOG10")
1100 | && constant(nanw(), "NAN")
1101 | && constant(infw(), "INF");
1102 | return ok ? 0 : -1;
1103 | }
1104 |
1105 | PyMODINIT_FUNC PyInit__dd_ufunc(void)
1106 | {
1107 | /* Initialize module */
1108 | if (!make_module())
1109 | return NULL;
1110 |
1111 | /* Initialize numpy things */
1112 | import_array();
1113 | import_umath();
1114 |
1115 | if (make_ddouble_type() < 0)
1116 | return NULL;
1117 | if (make_dtype() < 0)
1118 | return NULL;
1119 | if (make_finfo() < 0)
1120 | return NULL;
1121 |
1122 | numpy_module = PyImport_ImportModule("numpy");
1123 | if (numpy_module == NULL)
1124 | return NULL;
1125 |
1126 | PyArray_Descr *dtype = PyArray_DescrFromType(type_num);
1127 | PyModule_AddObject(module, "dtype", (PyObject *)dtype);
1128 |
1129 | /* Casts need to be defined before ufuncs, because numpy >= 1.21 caches
1130 | * casts/ufuncs in a way that is non-trivial... one should consider casts
1131 | * to be "more basic".
1132 | * See: https://github.com/numpy/numpy/issues/20009
1133 | */
1134 | if (register_casts() < 0)
1135 | return NULL;
1136 | if (register_ufuncs() < 0)
1137 | return NULL;
1138 | if (register_dtype_in_dicts() < 0)
1139 | return NULL;
1140 | if (register_constants() < 0)
1141 | return NULL;
1142 |
1143 | /* Module is ready */
1144 | return module;
1145 | }
1146 |
--------------------------------------------------------------------------------