├── .github
└── workflows
│ └── tests.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── bench
├── Makefile
├── bench.jl
├── bench_array1d.py
├── bench_cpy_vs_hpy.py
├── make_bench_piconumpy.py
├── profile_piconumpy.py
└── without_numpy
│ ├── julia_callback.jl
│ └── purepython_callback.py
├── piconumpy
├── __init__.py
├── _piconumpy_cpython_capi.c
├── _piconumpy_cython.pyx
├── _piconumpy_hpy.c
├── bench.py
├── purepy.py
├── purepy_array.py
├── test_cpython_capi.py
├── test_cython.py
├── test_hpy_cpy_abi.py
├── test_hpy_universal.py
├── test_purepy.py
└── test_purepy_array.py
├── pyproject.toml
└── setup.py
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on: [pull_request]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | max-parallel: 5
10 | matrix:
11 | python-version: ['3.8', '3.9', '3.10']
12 |
13 | steps:
14 |
15 | - name: Setup Julia
16 | uses: julia-actions/setup-julia@v1
17 |
18 | - name: Set up Python ${{ matrix.python-version }}
19 | uses: actions/setup-python@v4
20 | with:
21 | python-version: ${{ matrix.python-version }}
22 |
23 | - name: Install dependencies
24 | run: |
25 | # install HPy from source if depending on a dev version
26 | # git clone -b master --single-branch https://github.com/hpyproject/hpy
27 | # git checkout 1234abcd
28 | # cd hpy
29 | # pip install .
30 | pip install numpy cython pytest transonic pythran 'setuptools>=60.2' 'hpy>=0.9.0rc1'
31 |
32 | - name: Checkout
33 | uses: actions/checkout@v3
34 | with:
35 | fetch-depth: 0
36 |
37 | - name: build
38 | run: |
39 | python setup.py develop
40 | python setup.py --hpy-abi=universal develop
41 |
42 | - name: Run tests
43 | run: |
44 | pytest -s
45 |
46 | - name: Run bench
47 | run: |
48 | cd bench
49 | make bench_hpy
50 | make
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *.egg-info/*
3 | */__pycache__/*
4 | build
5 | .eggs
6 |
7 | *.so
8 | .vscode
9 |
10 | **/tmp*.*
11 | **/tmp*.*
12 |
13 | *_cython.c
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2020, Pierre Augier
4 | Copyright (c) 2021, 2023, Oracle and/or it's affiliates
5 | All rights reserved.
6 |
7 | Redistribution and use in source and binary forms, with or without
8 | modification, are permitted provided that the following conditions are met:
9 |
10 | 1. Redistributions of source code must retain the above copyright notice, this
11 | list of conditions and the following disclaimer.
12 |
13 | 2. Redistributions in binary form must reproduce the above copyright notice,
14 | this list of conditions and the following disclaimer in the documentation
15 | and/or other materials provided with the distribution.
16 |
17 | 3. Neither the name of the copyright holder nor the names of its
18 | contributors may be used to endorse or promote products derived from
19 | this software without specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | ifeq ($(PYTHON),)
3 | PYTHON := python
4 | endif
5 |
6 | all:
7 | make develop_universal
8 | ifeq ($(PYTHON),python)
9 | make build_ext
10 | endif
11 |
12 | develop:
13 | $(PYTHON) setup.py develop
14 |
15 | develop_universal:
16 | $(PYTHON) setup.py --hpy-abi=universal develop
17 | rm -f piconumpy/_piconumpy_hpy.py
18 |
19 | pip:
20 | $(PYTHON) -m pip install -e .[dev]
21 |
22 | build_ext_universal:
23 | $(PYTHON) setup.py --hpy-abi=universal build_ext -if
24 |
25 | build_ext:
26 | $(PYTHON) setup.py build_ext -if
27 |
28 | full:
29 | $(PYTHON) -m pip install -e .[full]
30 |
31 | format:
32 | black -l 82 setup.py piconumpy/*.py
33 | clang-format-7 -i piconumpy/*cpython_capi.c
34 |
35 | tests:
36 | $(PYTHON) -m pytest piconumpy -s
37 |
38 | clean:
39 | rm -f piconumpy/*.so
40 | rm -rf build dist piconumpy.egg-info
41 |
42 | black:
43 | black -l 82 .
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PicoNumpy
2 |
3 | [](https://travis-ci.org/paugier/piconumpy)
4 |
5 | **An experiment about Numpy and HPy**
6 |
7 | The C API of CPython is one of the cause of the success of Python in scientific
8 | computing. In particular, Numpy (and all the Python scientific stack) is built
9 | on top of this API. However, some characteristics of this API start to be an
10 | issue for the future of scientific Python (see [1], [2], [HPy]).
11 |
12 | [1]: https://faster-cpython.readthedocs.io/
13 | [2]: https://morepypy.blogspot.com/2019/12/hpy-kick-off-sprint-report.html
14 | [HPy]: https://github.com/hpyproject/hpy
15 |
16 | [HPy] is a very ambitious and promissing project to design a new and better C
17 | API for interacting with Python interpreters. It should allow people to write
18 | Python extensions efficient on different interpreters (CPython, PyPy, Jython,
19 | IronPython, GraalPython, RustPython, etc.).
20 |
21 | PyPy would be especially useful for some scientific applications. For example
22 | for Integration and ODEs
23 | ([scipy.integrate](https://docs.scipy.org/doc/scipy/reference/integrate.html)),
24 | for which there are a lot of callbacks of very small functions. This repository
25 | contains [a tiny benchmark](bench/without_numpy) showing that as long as Numpy
26 | is not used, PyPy is very efficient for such task. Unfortunately, as soon as
27 | Numpy is used, PyPy becomes very slow!
28 |
29 | [bench/without_numpy]: https://github.com/paugier/piconumpy/blob/master/bench/without_numpy/
30 |
31 | With PicoNumpy, I'd like to study if [HPy] could help for codes using Numpy and
32 | callbacks of small Python functions.
33 |
34 | We start by a [simple but realistic benchmark](bench/bench_array1d.py) (the
35 | slow loops only involve pure-Python and very simple Numpy). We then wrote a
36 | tiny ("pico") implementation of a Numpy like object (just sufficient to run the
37 | benchmark).
38 |
39 | The next task is to reimplement PicoNumpy using [HPy] and to check if PyPy
40 | could efficiently accelerate [our main benchmark](bench/bench_array1d.py).
41 |
42 | PicoNumpy is really tiny. It just provides an `array` class (one-dimensional)
43 | supporting:
44 |
45 | - Instantiation from a list of floats
46 | - Elementwise multiplication and division by a float
47 | - Elementwise addition (of 2 arrays)
48 | - Indexing
49 | - `len`
50 |
51 | A good acceleration by PyPy of our example would be a great proof that the
52 | scientific Python community has to invest time and energy on [HPy].
53 |
54 | In the script [bench_array1d.py](bench/bench_array1d.py), Transonic is used for
55 | the benchmark and comparison. With Transonic-Pythran, we typically get a 50
56 | speedup compared to CPython (and ~400 versus PyPy, which is still very slow for
57 | such codes using Numpy).
58 |
59 | [bench/bench_array1d.py]: https://github.com/paugier/piconumpy/blob/master/bench/bench_array1d.py
60 |
61 | ## Install and run the benchmarks
62 |
63 | **Warning:** PicoNumpy now depends on HPy, which still has to be installed from
64 | the [Git repository](https://github.com/hpyproject/hpy). For now, the
65 | installation is a bit more complex that what is described here (more about this
66 | [here](#more-precise-notes-on-how-to-install-and-run-the-benchmarks-with-PyPy)).
67 |
68 | `make` should install the package in editable mode. `cd bench; make` should run
69 | the benchmarks. For the benchmarks, Julia is used for a good comparison point
70 | so the command `julia` has to be available.
71 |
72 | For PyPy, the Makefiles are sensible to the environment variable `PYTHON`, so
73 | you could do:
74 |
75 | ```bash
76 | export PYTHON=pypy3
77 | make
78 | cd bench
79 | make
80 | ```
81 |
82 | The benchmark code can be profiled for the different implementations with the
83 | commands (you need gprof2dot and graphviz):
84 |
85 | ```bash
86 | cd bench
87 | make profile METHOD="cpython-c-api"
88 | make profile METHOD="purepy_array"
89 | make profile METHOD="purepy"
90 | make profile METHOD="cython"
91 | ```
92 |
93 | ### More precise notes on how to install and run the benchmarks with PyPy
94 |
95 | Download and extract a nightly PyPy build
96 | . Add to the `PATH` environment variable
97 | the path of the directory containing the `pypy` executable (something like
98 | `~/opt/pypy-c-jit-101190-b661dc329618-linux64/bin`). Then, you should be able
99 | to run:
100 |
101 | ```bash
102 | pypy -m ensurepip
103 | pypy -m pip install pip -U
104 | pypy -m pip install numpy cython pytest transonic pythran
105 | ```
106 |
107 | We need to install the correct version of HPy for the version of PyPy we are using:
108 |
109 | ```bash
110 | pypy -c "import hpy.universal as u; print(u.get_version())"
111 | ```
112 |
113 | gives `('0.0.2rc2.dev12+gc9660c2', 'c9660c2')`.
114 |
115 | ```bash
116 | cd ~/Dev/hpy
117 | # update to the correct commit
118 | pypy setup.py develop
119 | ```
120 |
121 | Now we can build-install PicoNumpy:
122 |
123 | ```bash
124 | cd ~/Dev/piconumpy
125 | pypy setup.py --hpy-abi=universal develop
126 | ```
127 |
128 | And run the benchmarks with:
129 |
130 | ```bash
131 | export PYTHON="pypy"
132 | make clean
133 | make bench_hpy
134 | make
135 | ```
136 |
137 | ## Few results
138 |
139 | As of today (6 July 2021), HPy is not yet ready for high performance, but at
140 | least (with HPy 0.0.2) it runs !
141 |
142 | ### At home (Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz)
143 |
144 | - With CPython
145 |
146 | ```
147 | Julia : 1 * norm = 0.00196 s
148 | PicoNumpy (CPython C-API) : 9.42 * norm
149 | PicoNumpy (HPy CPy ABI) : 9.95 * norm
150 | PicoNumpy (HPy Universal) : 10.4 * norm
151 | Transonic-Pythran : 0.497 * norm
152 | Numpy : 27.5 * norm
153 | PicoNumpy (purepy) : 37.3 * norm
154 | PicoNumpy (purepy_array) : 37.7 * norm
155 | PicoNumpy (Cython) : 28.9 * norm
156 | ```
157 |
158 | - With PyPy3
159 |
160 | ```
161 | Julia : 1 * norm = 0.00196 s
162 | PicoNumpy (CPython C-API) : 34.1 * norm
163 | PicoNumpy (HPy Universal) : 12.8 * norm
164 | Transonic-Pythran : 0.539 * norm
165 | Numpy : 232 * norm
166 | PicoNumpy (purepy) : 4.39 * norm
167 | PicoNumpy (purepy_array) : 6.33 * norm
168 | PicoNumpy (Cython) : 274 * norm
169 | ```
170 |
171 | #### Simpler benchmarks (bench/bench_cpy_vs_hpy.py)
172 |
173 | - With CPython
174 |
175 | ```
176 | CPython C-API: 1.92 seconds
177 | HPy [Universal]: 2.08 seconds
178 | HPy [CPy ABI]: 2.02 seconds
179 | ```
180 |
181 | - With PyPy3
182 |
183 | ```
184 | CPython C-API: 5.75 seconds
185 | HPy [Universal]: 2.11 seconds
186 | ```
187 |
--------------------------------------------------------------------------------
/bench/Makefile:
--------------------------------------------------------------------------------
1 |
2 | ifeq ($(PYTHON),)
3 | PYTHON := python
4 | endif
5 |
6 | ifeq ($(METHOD),)
7 | METHOD := cpython-c-api
8 | endif
9 |
10 | all: tmp.py tmp_result_julia.txt
11 | $(PYTHON) tmp.py
12 |
13 | tmp.py: bench_array1d.py make_bench_piconumpy.py
14 | $(PYTHON) make_bench_piconumpy.py
15 |
16 | clean:
17 | transonic -cc bench_array1d.py -f
18 | rm -f tmp*.py tmp*.txt
19 |
20 | tmp_result_julia.txt:
21 | julia bench.jl > tmp_result_julia.txt
22 |
23 | profile: tmp.py
24 | $(PYTHON) profile_piconumpy.py $(METHOD)
25 | # with gprof2dot and graphviz (command dot)
26 | gprof2dot -f pstats tmp.pstats | dot -Tpng -o tmp_$(METHOD).png
27 | eog tmp_$(METHOD).png
28 |
29 | bench_hpy:
30 | $(PYTHON) bench_cpy_vs_hpy.py
31 |
--------------------------------------------------------------------------------
/bench/bench.jl:
--------------------------------------------------------------------------------
1 | using Statistics
2 |
3 | function board(X_0::Array, b::Float64)
4 |
5 | x0 = copy(X_0[1])
6 | y0 = copy(X_0[2])
7 | u0 = copy(X_0[3])
8 | v0 = copy(X_0[4])
9 |
10 | g = 9.81
11 | c = 0.5
12 | a = 0.25
13 | p = (2*π)/10.0
14 | q = (2*π)/4.0
15 |
16 | H_x = -a + b*p*sin(p*x0)*cos(q*y0)
17 | H_xx = b*p^2 * cos(p*x0)*cos(q*y0)
18 | H_y = b*q*cos(p*x0)*sin(q*y0)
19 | H_yy = b*q^2 * cos(p*x0)*cos(q*y0)
20 | H_xy = -b*q*p*sin(p*x0)*sin(q*y0)
21 |
22 | F = (g + H_xx*u0^2 + 2*H_xy*u0*v0 + H_yy*v0^2)/(1 + H_x^2 + H_y^2)
23 |
24 | dU = -F*H_x - c*u0
25 | dV = -F*H_y - c*v0
26 |
27 | return [u0, v0, dU, dV]
28 |
29 | end
30 |
31 |
32 | function runge_kutta_step(f::Function, x0, dt::Float64, t=nothing)
33 |
34 | k1 = f(x0, t) * dt
35 | k2 = f(x0 + k1/2, t) * dt
36 | k3 = f(x0 + k2/2, t) * dt
37 | k4 = f(x0 + k3, t) * dt
38 | x_new = x0 + (k1 + 2*k2 + 2*k3 + k4)/6
39 |
40 | return x_new
41 | end
42 |
43 |
44 | function solver(f::Function, x0::Array, y0::Array, u0::Array, v0::Array, dt::Float64, N_t::Int, b = 0.5)
45 | solutions = []
46 | for k in 1:length(x0)
47 | values_one_step = [x0[k], y0[k], u0[k], v0[k]]
48 | for i in 2:N_t + 1
49 | values_one_step = runge_kutta_step(f, values_one_step, dt, b)
50 | end
51 | solutions = vcat(solutions, values_one_step)
52 | end
53 |
54 | return solutions
55 | end
56 |
57 |
58 | function bench(n_sleds, n_time)
59 | x_init = zeros(n_sleds)
60 | y_init = rand(n_sleds)
61 | u_init = zeros(n_sleds)
62 | v_init = zeros(n_sleds) .+ 3.5
63 |
64 | return solver(board, x_init, y_init, u_init, v_init, 0.01, n_time)
65 | end
66 |
67 |
68 | n_sleds = 10
69 | n_time = 200
70 |
71 | nb_runs = 200
72 |
73 | times = zeros(nb_runs)
74 |
75 | for irun in 1:nb_runs
76 | times[irun] = @elapsed bench(n_sleds, n_time)
77 | end
78 |
79 | println(median(times))
80 |
--------------------------------------------------------------------------------
/bench/bench_array1d.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from numpy import array
4 | from math import pi, cos, sin
5 |
6 | from transonic import jit
7 |
8 | # begin code functions (don't remove this line)
9 |
10 |
11 | def runge_kutta_step(f, x0, dt, t=None):
12 | k1 = f(t, x0) * dt
13 | k2 = f(t, x0 + k1 / 2) * dt
14 | k3 = f(t, x0 + k2 / 2) * dt
15 | k4 = f(t, x0 + k3) * dt
16 | # workaround for a pypy bug
17 | # see https://foss.heptapod.net/pypy/pypy/-/issues/3509
18 | # x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6
19 | x_new = x0 + (k1 + k2 * 2 + k3 * 2 + k4) / 6
20 | return x_new
21 |
22 |
23 | def board(t, X_0):
24 | x0 = X_0[0]
25 | y0 = X_0[1]
26 | u0 = X_0[2]
27 | v0 = X_0[3]
28 |
29 | g = 9.81
30 | c = 0.5
31 | a = 0.25
32 | b = 0.5
33 | p = (2 * pi) / 10.0
34 | q = (2 * pi) / 4.0
35 |
36 | H_x = -a + b * p * sin(p * x0) * cos(q * y0)
37 | H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0)
38 | H_y = b * q * cos(p * x0) * sin(q * y0)
39 | H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0)
40 | H_xy = -b * q * p * sin(p * x0) * sin(q * y0)
41 |
42 | F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / (
43 | 1 + H_x ** 2 + H_y ** 2
44 | )
45 |
46 | dU = -F * H_x - c * u0
47 | dV = -F * H_y - c * v0
48 |
49 | return array([u0, v0, dU, dV])
50 |
51 |
52 | def solver(f, x0, y0, u0, v0, dt, N_t, b=0.5):
53 | x0 = array(x0.tolist())
54 | y0 = array(y0.tolist())
55 | u0 = array(u0.tolist())
56 | v0 = array(v0.tolist())
57 |
58 | solutions = []
59 | for k in range(len(x0)):
60 | values_one_step = array([x0[k], y0[k], u0[k], v0[k]])
61 | for _ in range(N_t):
62 | values_one_step = runge_kutta_step(f, values_one_step, dt, b)
63 | solutions.append(values_one_step)
64 | return solutions
65 |
66 |
67 | def bench(n_sleds, n_time):
68 | x_init = np.zeros(n_sleds)
69 | y_init = np.random.rand(n_sleds)
70 | v_init = np.zeros(n_sleds)
71 | u_init = np.zeros(n_sleds) + 3.5
72 |
73 | solver(board, x_init, y_init, u_init, v_init, 0.01, n_time)
74 |
75 |
76 | # end code functions (don't remove this line)
77 |
78 |
79 | bench_pythran = jit(bench)
80 | # Numba does not support this code...
81 | # bench_numba = jit(backend="numba")(bench)
82 | from transonic import wait_for_all_extensions
83 |
84 | # warmup (compilation of the Pythran extension)
85 | bench_pythran(1, 1)
86 | wait_for_all_extensions()
87 |
88 | if __name__ == "__main__":
89 |
90 | from transonic.util import timeit_verbose as timeit
91 |
92 | n_sleds = 10
93 | n_time = 200
94 |
95 | g = locals()
96 | norm = timeit("bench(n_sleds, n_time)", globals=g)
97 | timeit("bench_pythran(n_sleds, n_time)", globals=g, norm=norm)
98 | # timeit("bench_numba(n_sleds, n_time)", globals=g, norm=norm)
99 |
--------------------------------------------------------------------------------
/bench/bench_cpy_vs_hpy.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import time
3 | import random
4 | from math import pi, cos, sin
5 | from pathlib import Path
6 |
7 | here = Path(__file__).absolute().parent
8 |
9 |
10 | def my_randn(mod, n):
11 | result = mod.empty(n)
12 | for i in range(n):
13 | result[i] = random.normalvariate(0, 1)
14 | return result
15 |
16 |
17 | IS_PYPY = hasattr(sys, "pypy_version_info")
18 |
19 |
20 | def runge_kutta_step(mod, f, x0, dt, t=None):
21 | k1 = f(mod, t, x0) * dt
22 | k2 = f(mod, t, x0 + k1 / 2) * dt
23 | k3 = f(mod, t, x0 + k2 / 2) * dt
24 | k4 = f(mod, t, x0 + k3) * dt
25 | # workaround for a pypy bug
26 | # see https://foss.heptapod.net/pypy/pypy/-/issues/3509
27 | # x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6
28 | x_new = x0 + (k1 + k2 * 2 + k3 * 2 + k4) / 6
29 | return x_new
30 |
31 |
32 | def board(mod, t, X_0):
33 | x0 = X_0[0]
34 | y0 = X_0[1]
35 | u0 = X_0[2]
36 | v0 = X_0[3]
37 |
38 | g = 9.81
39 | c = 0.5
40 | a = 0.25
41 | b = 0.5
42 | p = (2 * pi) / 10.0
43 | q = (2 * pi) / 4.0
44 |
45 | H_x = -a + b * p * sin(p * x0) * cos(q * y0)
46 | H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0)
47 | H_y = b * q * cos(p * x0) * sin(q * y0)
48 | H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0)
49 | H_xy = -b * q * p * sin(p * x0) * sin(q * y0)
50 |
51 | F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / (
52 | 1 + H_x ** 2 + H_y ** 2
53 | )
54 |
55 | dU = -F * H_x - c * u0
56 | dV = -F * H_y - c * v0
57 |
58 | return mod.array([u0, v0, dU, dV])
59 |
60 |
61 | def solver(mod, f, x0, y0, u0, v0, dt, N_t, b=0.5):
62 | solutions = []
63 | for k in range(len(x0)):
64 | values_one_step = mod.array([x0[k], y0[k], u0[k], v0[k]])
65 | for _ in range(N_t):
66 | values_one_step = runge_kutta_step(mod, f, values_one_step, dt, b)
67 | solutions.append(values_one_step)
68 | return solutions
69 |
70 |
71 | def bench(mod, n_sleds, n_time):
72 | x_init = mod.zeros(n_sleds)
73 | y_init = my_randn(mod, n_sleds)
74 | v_init = mod.zeros(n_sleds)
75 | u_init = mod.zeros(n_sleds)
76 | for i in range(n_sleds):
77 | u_init[i] += 3.5
78 | start = time.time()
79 | solver(mod, board, x_init, y_init, u_init, v_init, 0.01, n_time)
80 | end = time.time()
81 | return end - start
82 |
83 |
84 | N_SLEDS = 100
85 | N_TIME = 2000
86 |
87 |
88 | def import_piconumpy_hpy_universal():
89 | import hpy.universal
90 | from importlib.util import spec_from_file_location
91 |
92 | spec = spec_from_file_location("_piconumpy_hpy",
93 | str(here.parent / "piconumpy/_piconumpy_hpy.hpy0.so"))
94 | return hpy.universal.load(
95 | "_piconumpy_hpy", str(here.parent / "piconumpy/_piconumpy_hpy.hpy0.so"),
96 | spec
97 | )
98 |
99 |
100 | def main():
101 |
102 | import piconumpy._piconumpy_cpython_capi as pnp_capi
103 |
104 | t = bench(pnp_capi, N_SLEDS, N_TIME)
105 | print(f"CPython C-API: {t:.2f} seconds")
106 |
107 | pnp_hpy_universal = import_piconumpy_hpy_universal()
108 | t = bench(pnp_hpy_universal, N_SLEDS, N_TIME)
109 | print(f"HPy [Universal]: {t:.2f} seconds")
110 |
111 | if not IS_PYPY:
112 | import piconumpy._piconumpy_hpy as pnp_hpy
113 |
114 | t = bench(pnp_hpy, N_SLEDS, N_TIME)
115 | print(f"HPy [CPy ABI]: {t:.2f} seconds")
116 |
117 |
118 | if __name__ == "__main__":
119 | main()
120 |
--------------------------------------------------------------------------------
/bench/make_bench_piconumpy.py:
--------------------------------------------------------------------------------
1 | with open("bench_array1d.py") as file:
2 | code = file.read()
3 |
4 | code = code.split("# begin code functions (don't remove this line)")[1]
5 | code_functions = code.split("# end code functions (don't remove this line)")[0]
6 |
7 |
8 | def create_tmp_file(name_module):
9 |
10 | if name_module == "_piconumpy_hpy_universal":
11 | code_import = """
12 | from piconumpy import _piconumpy_hpy
13 | array = _piconumpy_hpy.array
14 | """
15 | else:
16 | code_import = f"from piconumpy.{name_module} import array"
17 |
18 | code = (
19 | f"""
20 | from math import pi, cos, sin
21 | import numpy as np
22 | {code_import}
23 | """
24 | + code_functions
25 | )
26 |
27 | if name_module.startswith("_piconumpy_"):
28 | name = name_module[len("_piconumpy_") :]
29 | else:
30 | name = name_module
31 |
32 | with open(f"tmp_{name}.py", "w") as file:
33 | file.write(code)
34 |
35 |
36 | create_tmp_file("_piconumpy_hpy_universal")
37 | create_tmp_file("purepy")
38 | create_tmp_file("purepy_array")
39 | create_tmp_file("_piconumpy_cython")
40 | create_tmp_file("_piconumpy_hpy")
41 |
42 |
43 | code = (
44 | """
45 | import sys
46 | import numpy as np
47 | from piconumpy import array
48 | from math import pi, cos, sin
49 |
50 | IS_PYPY = hasattr(sys, 'pypy_version_info')
51 | """
52 | + code_functions
53 | + """
54 |
55 | from piconumpy.bench import timeit_verbose
56 |
57 | from bench_array1d import bench as bench_numpy, bench_pythran
58 |
59 | from tmp_hpy_universal import bench as bench_hpy_universal
60 | from tmp_purepy import bench as bench_piconumpy_purepy
61 | from tmp_purepy_array import bench as bench_piconumpy_purepy_array
62 | from tmp_cython import bench as bench_cython
63 |
64 | if not IS_PYPY:
65 | from tmp_hpy import bench as bench_hpy
66 |
67 | # get norm from Julia benchmark
68 | with open("tmp_result_julia.txt") as file:
69 | norm = float(file.read())
70 |
71 | max_length_name = len("piconumpy (CPython C-API)") + 2
72 |
73 | fmt_name = f"{{:{max_length_name}s}}"
74 | name = fmt_name.format("Julia")
75 | print(f"{name}: 1 * norm = {norm:4.3g} s")
76 |
77 | n_sleds = 10
78 | n_time = 200
79 |
80 | g = locals()
81 |
82 | def timeit(name_func, name):
83 | return timeit_verbose(
84 | name_func + "(n_sleds, n_time)",
85 | globals=g,
86 | name=name,
87 | print_time=False,
88 | norm=norm,
89 | max_length_name=max_length_name,
90 | )
91 |
92 | timeit("bench", name="PicoNumpy (CPython C-API)")
93 | if not IS_PYPY:
94 | timeit("bench_hpy", name="PicoNumpy (HPy CPy ABI)")
95 | timeit("bench_hpy_universal", name="PicoNumpy (HPy Universal)")
96 | timeit("bench_pythran", name="Transonic-Pythran")
97 | timeit("bench_numpy", name="Numpy")
98 | timeit(
99 | "bench_piconumpy_purepy", name="PicoNumpy (purepy)",
100 | )
101 | timeit(
102 | "bench_piconumpy_purepy_array", name="PicoNumpy (purepy_array)",
103 | )
104 | timeit("bench_cython", name="PicoNumpy (Cython)")
105 | """
106 | )
107 |
108 | with open("tmp.py", "w") as file:
109 | file.write(code)
110 |
--------------------------------------------------------------------------------
/bench/profile_piconumpy.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import cProfile
4 | import pstats
5 |
6 | import bench_array1d
7 | import tmp_purepy
8 | import tmp_purepy_array
9 | import tmp_cython
10 |
11 | methods = {
12 | "cpython-c-api": bench_array1d,
13 | "purepy": tmp_purepy,
14 | "purepy_array": tmp_purepy_array,
15 | "cython": tmp_cython,
16 | }
17 |
18 | module = methods.get(sys.argv[-1], bench_array1d)
19 |
20 | n_sleds = 10
21 | n_time = 200
22 |
23 | cProfile.runctx(
24 | "module.bench(n_sleds, n_time)", globals(), locals(), "tmp.pstats"
25 | )
26 |
27 | s = pstats.Stats("tmp.pstats")
28 | s.strip_dirs().sort_stats("time").print_stats(10)
29 |
--------------------------------------------------------------------------------
/bench/without_numpy/julia_callback.jl:
--------------------------------------------------------------------------------
1 | #=
2 |
3 | julia julia_callback.jl
4 | 2.457 ms (100002 allocations: 10.68 MiB)
5 |
6 | =#
7 | using BenchmarkTools
8 |
9 | function rober(u)
10 | k1 = 0.04
11 | k2 = 3e7
12 | k3 = 1e4
13 | y1, y2, y3 = u
14 | dy1 = -k1 * y1 + k3 * y2 * y3
15 | dy2 = k1 * y1 - k2 * y2 * y2 - k3 * y2 * y3
16 | dy3 = k2 * y2 * y2
17 | return [dy1, dy2, dy3]
18 | end
19 |
20 | rober([1.0, 0.0, 0.0])
21 |
22 | function call_function(func)
23 | u = [1.0, 0.0, 0.0]
24 | for time in (0:1e5)
25 | u = func(u)
26 | end
27 | return u
28 | end
29 |
30 | @btime call_function(rober)
--------------------------------------------------------------------------------
/bench/without_numpy/purepython_callback.py:
--------------------------------------------------------------------------------
1 | """
2 | pypy3 purepython_callback.py
3 | bench : 1.000 * norm
4 | norm = 4.24e-04 s
5 | bench_pythran : 1.061 * norm
6 | time pythran: 0.450 ms
7 |
8 | python purepython_callback.py
9 | bench : 1.000 * norm
10 | norm = 2.57e-02 s
11 | bench_pythran : 0.017 * norm
12 | time pythran: 0.449 ms
13 |
14 | - PyPy is 60 times faster than CPython.
15 | - PyPy is a bit faster than Pythran and Julia!
16 |
17 | """
18 |
19 | from transonic.util import timeit_verbose as tiv
20 | from transonic import jit, wait_for_all_extensions
21 |
22 |
23 | def rober(u):
24 | k1 = 0.04
25 | k2 = 3e7
26 | k3 = 1e4
27 | y1, y2, y3 = u
28 | dy1 = -k1 * y1 + k3 * y2 * y3
29 | dy2 = k1 * y1 - k2 * y2 * y2 - k3 * y2 * y3
30 | dy3 = k2 * y2 * y2
31 | return dy1, dy2, dy3
32 |
33 |
34 | def call_function(func):
35 | u = (1.0, 0.0, 0.0)
36 | for _ in range(int(1e5)):
37 | u = func(u)
38 | return u
39 |
40 |
41 | def bench():
42 | return call_function(rober)
43 |
44 |
45 | bench_pythran = jit(bench)
46 |
47 | bench()
48 | bench_pythran()
49 |
50 | wait_for_all_extensions()
51 |
52 | g = globals()
53 | duration = 10
54 | norm = tiv("bench()", globals=g, total_duration=duration)
55 | time_pythran = tiv(
56 | "bench_pythran()", globals=g, total_duration=duration, norm=norm
57 | )
58 | print(f"time pythran: {time_pythran*1000:.3f} ms")
59 |
--------------------------------------------------------------------------------
/piconumpy/__init__.py:
--------------------------------------------------------------------------------
1 | from ._piconumpy_cpython_capi import array
2 |
--------------------------------------------------------------------------------
/piconumpy/_piconumpy_cpython_capi.c:
--------------------------------------------------------------------------------
1 | #define PY_SSIZE_T_CLEAN
2 | #include
3 |
4 | #include "structmember.h"
5 |
6 | typedef struct {
7 | PyObject_HEAD
8 | /* Type-specific fields go here. */
9 | double *data;
10 | int size;
11 | } ArrayObject;
12 |
13 | static void Array_dealloc(ArrayObject *self) {
14 | free(self->data);
15 | Py_TYPE(self)->tp_free((PyObject *)self);
16 | }
17 |
18 | static int Array_init(ArrayObject *self, PyObject *args, PyObject *kwds) {
19 | static char *kwlist[] = {"data", NULL};
20 | int index;
21 | PyObject *data = NULL, *item;
22 |
23 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, &data))
24 | return -1;
25 |
26 | if (!PyList_Check(data)) {
27 | PyErr_SetString(PyExc_TypeError, "parameter must be a list");
28 | return -1;
29 | }
30 |
31 | self->size = (int)PyList_Size(data);
32 |
33 | self->data = (double *)malloc(self->size * sizeof(double));
34 | if (self->data == NULL) {
35 | PyErr_NoMemory();
36 | return -1;
37 | }
38 |
39 | for (index = 0; index < self->size; index++) {
40 | item = PyList_GET_ITEM(data, index);
41 | self->data[index] = PyFloat_AsDouble(item);
42 | }
43 |
44 | return 0;
45 | }
46 |
47 | static PyMemberDef Array_members[] = {
48 | {"size", T_INT, offsetof(ArrayObject, size), 0, "size of the array"},
49 | {NULL} /* Sentinel */
50 | };
51 |
52 | static PyObject *Array_tolist(ArrayObject *self, PyObject *Py_UNUSED(ignored)) {
53 | int index;
54 | PyObject *result, *item;
55 | result = PyList_New(self->size);
56 | for (index = 0; index < self->size; index++) {
57 | item = PyFloat_FromDouble(self->data[index]);
58 | PyList_SetItem(result, index, item);
59 | }
60 | return result;
61 | };
62 |
63 | static ArrayObject *Array_empty(int size);
64 |
65 | static ArrayObject *Array_multiply(PyObject *o1, PyObject *o2) {
66 | int index;
67 | double number;
68 | PyObject *obj_number = NULL;
69 | ArrayObject *result = NULL, *arr = NULL;
70 |
71 | if (PyNumber_Check(o2)) {
72 | obj_number = o2;
73 | arr = (ArrayObject *)o1;
74 | } else if (PyNumber_Check(o1)) {
75 | obj_number = o1;
76 | arr = (ArrayObject *)o2;
77 | }
78 |
79 | if (PyNumber_Check(o1) | PyNumber_Check(o2)) {
80 | number = PyFloat_AsDouble(obj_number);
81 | result = Array_empty(arr->size);
82 | for (index = 0; index < arr->size; index++) {
83 | result->data[index] = arr->data[index] * number;
84 | }
85 | }
86 |
87 | return result;
88 | };
89 |
90 | static ArrayObject *Array_add(PyObject *o1, PyObject *o2) {
91 | int index;
92 | ArrayObject *result = NULL, *a1, *a2;
93 | a1 = (ArrayObject *)o1;
94 | a2 = (ArrayObject *)o2;
95 |
96 | if (a1->size != a2->size)
97 | return result;
98 |
99 | result = Array_empty(a1->size);
100 | for (index = 0; index < a1->size; index++) {
101 | result->data[index] = a1->data[index] + a2->data[index];
102 | }
103 |
104 | return result;
105 | };
106 |
107 | static ArrayObject *Array_divide(PyObject *o1, PyObject *o2) {
108 | int index;
109 | double number;
110 | ArrayObject *result = NULL, *a1;
111 |
112 | if (!PyNumber_Check(o2)) {
113 | return result;
114 | }
115 | a1 = (ArrayObject *)o1;
116 | number = PyFloat_AsDouble(o2);
117 | result = Array_empty(a1->size);
118 | for (index = 0; index < a1->size; index++) {
119 | result->data[index] = a1->data[index] / number;
120 | }
121 |
122 | return result;
123 | };
124 |
125 | Py_ssize_t Array_length(ArrayObject *arr) {
126 | Py_ssize_t result = (Py_ssize_t)arr->size;
127 | return result;
128 | };
129 |
130 | PyObject *Array_item(ArrayObject *arr, Py_ssize_t index) {
131 | if (index < 0 || index >= arr->size) {
132 | PyErr_SetString(PyExc_IndexError, "index out of range");
133 | return NULL;
134 | }
135 | return PyFloat_FromDouble(arr->data[index]);
136 | };
137 |
138 | int Array_setitem(ArrayObject *arr, Py_ssize_t index, PyObject *item) {
139 | if (index < 0 || index >= arr->size) {
140 | PyErr_SetString(PyExc_IndexError, "index out of range");
141 | return -1;
142 | }
143 | double value = PyFloat_AsDouble(item);
144 | if (PyErr_Occurred())
145 | return -1;
146 | arr->data[index] = value;
147 | return 0;
148 | }
149 |
150 |
151 | static PyMethodDef Array_methods[] = {
152 | {"tolist", (PyCFunction)Array_tolist, METH_NOARGS,
153 | "Return the data as a list"},
154 | {NULL} /* Sentinel */
155 | };
156 |
157 | static PyType_Slot Array_type_slots[] = {
158 | {Py_tp_new, PyType_GenericNew},
159 | {Py_tp_init, (initproc)Array_init},
160 | {Py_tp_dealloc, (destructor)Array_dealloc},
161 | {Py_tp_members, Array_members},
162 | {Py_tp_methods, Array_methods},
163 | {Py_nb_multiply, (binaryfunc)Array_multiply},
164 | {Py_nb_add, (binaryfunc)Array_add},
165 | {Py_nb_true_divide, (binaryfunc)Array_divide},
166 | {Py_sq_length, (lenfunc)Array_length},
167 | {Py_sq_item, (ssizeargfunc)Array_item},
168 | {Py_sq_ass_item, (ssizeobjargproc)Array_setitem},
169 | {0, NULL},
170 | };
171 |
172 | static PyType_Spec Array_type_spec = {
173 | .name = "_piconumpy_cpython_capi.array",
174 | .basicsize = sizeof(ArrayObject),
175 | .itemsize = 0,
176 | .flags = Py_TPFLAGS_DEFAULT,
177 | .slots = Array_type_slots,
178 | };
179 |
180 | PyTypeObject *ptr_ArrayType;
181 |
182 | static ArrayObject *Array_empty(int size) {
183 | ArrayObject *new_array = NULL;
184 | new_array = PyObject_New(ArrayObject, ptr_ArrayType);
185 | new_array->size = size;
186 | new_array->data = (double *)malloc(size * sizeof(double));
187 | if (new_array->data == NULL) {
188 | PyErr_NoMemory();
189 | return NULL;
190 | }
191 | return new_array;
192 | };
193 |
194 | static ArrayObject *empty(PyObject *module, PyObject *arg) {
195 | int size;
196 | size = (int)PyLong_AsLong(arg);
197 | return Array_empty(size);
198 | };
199 |
200 | static ArrayObject *zeros(PyObject *module, PyObject *arg) {
201 | int size;
202 | size = (int)PyLong_AsLong(arg);
203 | ArrayObject *result = Array_empty(size);
204 | for(int i=0; idata[i] = 0;
206 | return result;
207 | };
208 |
209 |
210 | static PyMethodDef module_methods[] = {
211 | {"empty", (PyCFunction)empty, METH_O, "Create an empty array."},
212 | {"zeros", (PyCFunction)zeros, METH_O, "Createa zero-filled array."},
213 | {NULL, NULL, 0, NULL} /* Sentinel */
214 | };
215 |
216 | static PyModuleDef piconumpymodule = {
217 | PyModuleDef_HEAD_INIT, .m_name = "_piconumpy_cpython_capi",
218 | .m_doc = "piconumpy implemented with the CPython C-API.", .m_size = -1,
219 | module_methods};
220 |
221 | PyMODINIT_FUNC PyInit__piconumpy_cpython_capi(void) {
222 | PyObject *m;
223 |
224 | m = PyModule_Create(&piconumpymodule);
225 | if (m == NULL)
226 | return NULL;
227 |
228 | ptr_ArrayType = (PyTypeObject *)PyType_FromSpec(&Array_type_spec);
229 | if (PyModule_AddObject(m, "array", (PyObject *)ptr_ArrayType) < 0) {
230 | Py_DECREF(ptr_ArrayType);
231 | Py_DECREF(m);
232 | return NULL;
233 | }
234 |
235 | return m;
236 | }
237 |
--------------------------------------------------------------------------------
/piconumpy/_piconumpy_cython.pyx:
--------------------------------------------------------------------------------
1 |
2 |
3 | __all__ = ["array"]
4 |
5 | import array as _array
6 |
7 |
8 | # class array(_array.array):
9 |
10 | # def __new__(cls, *args):
11 | # return super().__new__(cls, "f", *args)
12 |
13 | # def __init__(self, data):
14 | # self.size = len(self)
15 |
16 | # def __add__(self, other):
17 | # return self.__class__(number + other[index] for index, number in enumerate(self))
18 |
19 | # def __mul__(self, other):
20 | # return self.__class__(other * number for number in self)
21 |
22 | # __rmul__ = __mul__
23 |
24 | # def __truediv__(self, other):
25 | # return self.__class__(number / other for number in self)
26 |
27 |
28 | class array:
29 | __slots__ = ["data", "size"]
30 |
31 | def __init__(self, data):
32 | self.data = _array.array("f", data)
33 | self.size = len(self.data)
34 |
35 | def __add__(self, other):
36 | return array(
37 | number + other.data[index] for index, number in enumerate(self.data)
38 | )
39 |
40 | def __mul__(self, other):
41 | return array(other * number for number in self.data)
42 |
43 | __rmul__ = __mul__
44 |
45 | def __truediv__(self, other):
46 | return array(number / other for number in self.data)
47 |
48 | def tolist(self):
49 | return list(self.data)
50 |
51 | def __len__(self):
52 | return len(self.data)
53 |
54 | def __getitem__(self, index):
55 | return self.data[index]
56 |
57 | def __setitem__(self, index, value):
58 | self.data[index] = value
59 |
60 | cpdef empty(size):
61 | return array([0]*size)
62 |
63 | cpdef zeros(size):
64 | return array([0]*size)
65 |
--------------------------------------------------------------------------------
/piconumpy/_piconumpy_hpy.c:
--------------------------------------------------------------------------------
1 | #include "hpy.h"
2 |
3 | typedef struct {
4 | /* Type-specific fields go here. */
5 | double *data;
6 | int size;
7 | } ArrayObject;
8 |
9 | HPyType_HELPERS(ArrayObject)
10 |
11 | HPyDef_SLOT(Array_destroy, HPy_tp_destroy)
12 | static void Array_destroy_impl(void *obj) {
13 | ArrayObject *self = (ArrayObject *)obj;
14 | free(self->data);
15 | }
16 |
17 | HPyDef_SLOT(Array_init, HPy_tp_init)
18 | static int Array_init_impl(HPyContext *ctx, HPy h_self, const HPy *args,
19 | HPy_ssize_t nargs, HPy kw) {
20 | static const char *kwlist[] = {"data", NULL};
21 | ArrayObject *self = ArrayObject_AsStruct(ctx, h_self);
22 | int index;
23 | HPy h_data = HPy_NULL;
24 | HPyTracker ht;
25 |
26 | if (!HPyArg_ParseKeywordsDict(ctx, &ht, args, nargs, kw, "|O", kwlist, &h_data)) {
27 | return -1;
28 | }
29 |
30 | if (!HPyList_Check(ctx, h_data)) {
31 | HPyTracker_Close(ctx, ht);
32 | HPyErr_SetString(ctx, ctx->h_TypeError, "parameter must be a list");
33 | return -1;
34 | }
35 |
36 | self->size = (int)HPy_Length(ctx, h_data);
37 |
38 | self->data = (double *)malloc(self->size * sizeof(double));
39 | if (self->data == NULL) {
40 | HPyTracker_Close(ctx, ht); // done with h_data
41 | HPyErr_NoMemory(ctx);
42 | return -1;
43 | }
44 |
45 | // XXX: this is not doing any error check (but the original C-API version
46 | // doesn't either :shrug:
47 | for (index = 0; index < self->size; index++) {
48 | HPy h_item = HPy_GetItem_i(ctx, h_data, index);
49 | self->data[index] = HPyFloat_AsDouble(ctx, h_item);
50 | HPy_Close(ctx, h_item);
51 | }
52 |
53 | HPyTracker_Close(ctx, ht); // done with h_data
54 | return 0;
55 | }
56 |
57 | HPyDef_MEMBER(Array_size, "size", HPyMember_INT, offsetof(ArrayObject, size),
58 | .doc = "size of the array")
59 |
60 | HPyDef_METH(Array_tolist, "tolist", HPyFunc_NOARGS,
61 | .doc = "Return the data as a list")
62 | static HPy Array_tolist_impl(HPyContext *ctx, HPy h_self) {
63 | ArrayObject *self = ArrayObject_AsStruct(ctx, h_self);
64 | int index;
65 | HPyListBuilder builder = HPyListBuilder_New(ctx, self->size);
66 | for (index = 0; index < self->size; index++) {
67 | HPy h_item = HPyFloat_FromDouble(ctx, self->data[index]);
68 | HPyListBuilder_Set(ctx, builder, index, h_item);
69 | HPy_Close(ctx, h_item);
70 | }
71 | return HPyListBuilder_Build(ctx, builder);
72 | };
73 |
74 | static HPy Array_empty(HPyContext *ctx, int size, ArrayObject **result);
75 |
76 | HPyDef_SLOT(Array_multiply, HPy_nb_multiply)
77 | static HPy Array_multiply_impl(HPyContext *ctx, HPy h1, HPy h2) {
78 | int index;
79 | double number;
80 | HPy h_number = HPy_NULL;
81 | ArrayObject *result = NULL, *arr = NULL;
82 | HPy h_result = HPy_NULL;
83 |
84 | if (HPyNumber_Check(ctx, h2)) {
85 | h_number = h2;
86 | arr = ArrayObject_AsStruct(ctx, h1);
87 | } else if (HPyNumber_Check(ctx, h1)) {
88 | h_number = h1;
89 | arr = ArrayObject_AsStruct(ctx, h2);
90 | }
91 |
92 | if (HPyNumber_Check(ctx, h1) || HPyNumber_Check(ctx, h2)) {
93 | number = HPyFloat_AsDouble(ctx, h_number);
94 | h_result = Array_empty(ctx, arr->size, &result);
95 | for (index = 0; index < arr->size; index++) {
96 | result->data[index] = arr->data[index] * number;
97 | }
98 | }
99 | /* XXX exception if result is still NULL here */
100 | return h_result;
101 | };
102 |
103 | HPyDef_SLOT(Array_add, HPy_nb_add)
104 | static HPy Array_add_impl(HPyContext *ctx, HPy h1, HPy h2) {
105 | int index;
106 | ArrayObject *result = NULL, *a1, *a2;
107 | HPy h_result = HPy_NULL;
108 | a1 = ArrayObject_AsStruct(ctx, h1);
109 | a2 = ArrayObject_AsStruct(ctx, h2);
110 |
111 | if (a1->size != a2->size)
112 | return HPy_NULL; /* XXX should raise an exception */
113 |
114 | h_result = Array_empty(ctx, a1->size, &result);
115 | for (index = 0; index < a1->size; index++) {
116 | result->data[index] = a1->data[index] + a2->data[index];
117 | }
118 | return h_result;
119 | };
120 |
121 | HPyDef_SLOT(Array_divide, HPy_nb_true_divide)
122 | static HPy Array_divide_impl(HPyContext *ctx, HPy h1, HPy h2) {
123 | int index;
124 | double number;
125 | ArrayObject *result = NULL, *a1;
126 | HPy h_result = HPy_NULL;
127 |
128 | if (!HPyNumber_Check(ctx, h2)) {
129 | return HPy_NULL;
130 | }
131 | a1 = ArrayObject_AsStruct(ctx, h1);
132 | number = HPyFloat_AsDouble(ctx, h2);
133 | h_result = Array_empty(ctx, a1->size, &result);
134 | for (index = 0; index < a1->size; index++) {
135 | result->data[index] = a1->data[index] / number;
136 | }
137 | return h_result;
138 | };
139 |
140 |
141 | HPyDef_SLOT(Array_length, HPy_sq_length)
142 | HPy_ssize_t Array_length_impl(HPyContext *ctx, HPy h_arr) {
143 | ArrayObject *arr = ArrayObject_AsStruct(ctx, h_arr);
144 | HPy_ssize_t result = (HPy_ssize_t)arr->size;
145 | return result;
146 | };
147 |
148 |
149 | HPyDef_SLOT(Array_item, HPy_sq_item)
150 | HPy Array_item_impl(HPyContext *ctx, HPy h_arr, HPy_ssize_t index) {
151 | ArrayObject *arr = ArrayObject_AsStruct(ctx, h_arr);
152 | if (index < 0 || index >= arr->size) {
153 | HPyErr_SetString(ctx, ctx->h_IndexError, "index out of range");
154 | return HPy_NULL;
155 | }
156 | HPy item = HPyFloat_FromDouble(ctx, arr->data[index]);
157 | return item;
158 | };
159 |
160 | HPyDef_SLOT(Array_setitem, HPy_sq_ass_item)
161 | int Array_setitem_impl(HPyContext *ctx, HPy h_arr, HPy_ssize_t index, HPy h_item) {
162 | ArrayObject *arr = ArrayObject_AsStruct(ctx, h_arr);
163 | if (index < 0 || index >= arr->size) {
164 | HPyErr_SetString(ctx, ctx->h_IndexError, "index out of range");
165 | return -1;
166 | }
167 | double value = HPyFloat_AsDouble(ctx, h_item);
168 | if (HPyErr_Occurred(ctx))
169 | return -1;
170 | arr->data[index] = value;
171 | return 0;
172 | };
173 |
174 |
175 | HPyDef_SLOT_IMPL(Array_new, HPyType_GenericNew, HPy_tp_new)
176 |
177 | static HPyDef *Array_defines[] = {
178 | // slots
179 | &Array_new,
180 | &Array_init,
181 | &Array_destroy,
182 | &Array_add,
183 | &Array_multiply,
184 | &Array_divide,
185 | &Array_item,
186 | &Array_setitem,
187 | &Array_length,
188 | // members
189 | &Array_size,
190 | // methods
191 | &Array_tolist,
192 | NULL
193 | };
194 |
195 | static HPyType_Spec Array_type_spec = {
196 | .name = "_piconumpy_hpy.array",
197 | .basicsize = sizeof(ArrayObject),
198 | .itemsize = 0,
199 | .flags = HPy_TPFLAGS_DEFAULT,
200 | .defines = Array_defines,
201 | };
202 |
203 | static HPyGlobal ArrayType;
204 |
205 | static HPy Array_empty(HPyContext *ctx, int size, ArrayObject **result) {
206 | ArrayObject *new_array;
207 | HPy h_ArrayType = HPyGlobal_Load(ctx, ArrayType);
208 | HPy h_new_array = HPy_New(ctx, h_ArrayType, &new_array);
209 | HPy_Close(ctx, h_ArrayType);
210 | new_array->size = size;
211 | new_array->data = (double *)malloc(size * sizeof(double));
212 | if (new_array->data == NULL) {
213 | return HPyErr_NoMemory(ctx);
214 | }
215 | *result = new_array;
216 | return h_new_array;
217 | };
218 |
219 | HPyDef_METH(empty, "empty", HPyFunc_O, .doc = "Create an empty array")
220 | static HPy empty_impl(HPyContext *ctx, HPy module, HPy arg) {
221 | int size;
222 | ArrayObject *result;
223 | size = (int)HPyLong_AsLong(ctx, arg);
224 | return Array_empty(ctx, size, &result);
225 | };
226 |
227 | HPyDef_METH(zeros, "zeros", HPyFunc_O, .doc = "Create a zero-filled array")
228 | static HPy zeros_impl(HPyContext *ctx, HPy module, HPy arg) {
229 | int size;
230 | ArrayObject *result = NULL;
231 | size = (int)HPyLong_AsLong(ctx, arg);
232 | HPy h_result = Array_empty(ctx, size, &result);
233 | if (HPy_IsNull(h_result))
234 | return HPy_NULL;
235 | for(int i=0; idata[i] = 0;
237 | return h_result;
238 | };
239 |
240 | HPyDef_SLOT(_piconumpy_hpy_exec, HPy_mod_exec)
241 | static int _piconumpy_hpy_exec_impl(HPyContext *ctx, HPy hm) {
242 | HPy h_ArrayType = HPyType_FromSpec(ctx, &Array_type_spec, NULL);
243 | if (HPy_IsNull(h_ArrayType)) {
244 | return 1;
245 | }
246 | if (HPy_SetAttr_s(ctx, hm, "array", h_ArrayType) != 0) {
247 | HPy_Close(ctx, h_ArrayType);
248 | return 1;
249 | }
250 | HPyGlobal_Store(ctx, &ArrayType, h_ArrayType);
251 | return 0;
252 | }
253 |
254 | static HPyDef *module_defines[] = {
255 | &_piconumpy_hpy_exec,
256 | &empty,
257 | &zeros,
258 | NULL
259 | };
260 |
261 | static HPyGlobal *module_globals[] = {
262 | &ArrayType,
263 | NULL
264 | };
265 |
266 | static HPyModuleDef piconumpymodule = {
267 | .doc = "piconumpy implemented with the HPy API.",
268 | .defines = module_defines,
269 | .globals = module_globals,
270 | };
271 |
272 | HPy_MODINIT(_piconumpy_hpy, piconumpymodule)
273 |
--------------------------------------------------------------------------------
/piconumpy/bench.py:
--------------------------------------------------------------------------------
1 | from transonic.util import timeit
2 |
3 |
4 | def timeit_verbose(
5 | stmt,
6 | setup="pass",
7 | total_duration=2,
8 | globals=None,
9 | norm=None,
10 | name=None,
11 | print_time=False,
12 | max_length_name=33,
13 | ):
14 | result = timeit(
15 | stmt, setup=setup, total_duration=total_duration, globals=globals
16 | )
17 | if norm is None:
18 | norm = result
19 | norm_given = False
20 | else:
21 | norm_given = True
22 |
23 | if name is None:
24 | name = stmt.split("(")[0]
25 |
26 | fmt_name = f"{{:{max_length_name}s}}"
27 | name = fmt_name.format(name)
28 |
29 | if print_time:
30 | raw_time = f" = {result:7.3g} s"
31 | else:
32 | raw_time = ""
33 |
34 | print(f"{name}: {result/norm:5.3g} * norm{raw_time}")
35 | if not norm_given and not print_time:
36 | print(f"norm = {norm:5.3g} s")
37 |
38 | return result
39 |
--------------------------------------------------------------------------------
/piconumpy/purepy.py:
--------------------------------------------------------------------------------
1 | class array:
2 | __slots__ = ["data", "size"]
3 |
4 | def __init__(self, data):
5 | self.data = list(float(number) for number in data)
6 | self.size = len(self.data)
7 |
8 | def __add__(self, other):
9 | return array(
10 | number + other.data[index] for index, number in enumerate(self.data)
11 | )
12 |
13 | def __mul__(self, other):
14 | return array(other * number for number in self.data)
15 |
16 | __rmul__ = __mul__
17 |
18 | def __truediv__(self, other):
19 | return array(number / other for number in self.data)
20 |
21 | def tolist(self):
22 | return list(self.data)
23 |
24 | def __len__(self):
25 | return len(self.data)
26 |
27 | def __getitem__(self, index):
28 | return self.data[index]
29 |
30 | def __setitem__(self, index, value):
31 | self.data[index] = value
32 |
33 | def empty(size):
34 | return array([0]*size)
35 |
36 | def zeros(size):
37 | return array([0]*size)
38 |
39 |
--------------------------------------------------------------------------------
/piconumpy/purepy_array.py:
--------------------------------------------------------------------------------
1 | import array as _array
2 |
3 |
4 | class array(_array.array):
5 | __slots__ = ["size"]
6 |
7 | def __new__(cls, *args):
8 | return super().__new__(cls, "f", *args)
9 |
10 | def __init__(self, data):
11 | self.size = len(self)
12 |
13 | def __add__(self, other):
14 | return self.__class__(
15 | number + other[index] for index, number in enumerate(self)
16 | )
17 |
18 | def __mul__(self, other):
19 | return self.__class__(other * number for number in self)
20 |
21 | __rmul__ = __mul__
22 |
23 | def __truediv__(self, other):
24 | return self.__class__(number / other for number in self)
25 |
26 | def empty(size):
27 | return array([0]*size)
28 |
29 | def zeros(size):
30 | return array([0]*size)
31 |
--------------------------------------------------------------------------------
/piconumpy/test_cpython_capi.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from . import _piconumpy_cpython_capi
5 |
6 |
7 | class Tests:
8 | piconumpy = _piconumpy_cpython_capi
9 | def _array(self, *args):
10 | return self.piconumpy.array(*args)
11 |
12 | def test_empty(self):
13 | a = self.piconumpy.empty(12)
14 | assert isinstance(a, self.piconumpy.array)
15 | assert a.size == 12
16 |
17 | def test_zeros(self):
18 | a = self.piconumpy.zeros(5)
19 | assert isinstance(a, self.piconumpy.array)
20 | assert a.size == 5
21 | assert a.tolist() == [0, 0, 0, 0, 0]
22 |
23 | def test_init_array(self):
24 | a = self._array([1.0, 2.0])
25 | assert a.size == 2
26 |
27 | def test_getitem_setitem(self):
28 | a = self._array([12.0, 34.0])
29 | assert a[0] == 12.0
30 | assert a[1] == 34.0
31 | a[0] = 56
32 | a[1] = 78
33 | assert a[0] == 56.0
34 | assert a[1] == 78.0
35 | #
36 | with pytest.raises(IndexError):
37 | a[2]
38 | with pytest.raises(IndexError):
39 | a[2] = 3
40 |
41 | def test_init_array_numpy(self):
42 | np_a = np.array([1.0, 2.0, 0.0, 0.0])
43 | a = self._array(np_a.tolist())
44 | assert a.size == np_a.size
45 | assert a.tolist() == np_a.tolist()
46 |
47 | def test_multiply(self):
48 | a = self._array([1.0, 2.0])
49 | # works with PyPy 7.3.6-alpha0
50 | assert (a * 3).tolist() == [3.0, 6.0]
51 | # error with PyPy 7.3.6-alpha0
52 | assert (2 * a).tolist() == [2.0, 4.0]
53 |
54 | def test_add(self):
55 | a = self._array([1.0, 2.0])
56 | # works with PyPy 7.3.6-alpha0
57 | assert (a + a * 2).tolist() == [3.0, 6.0]
58 | # error with PyPy 7.3.6-alpha0
59 | assert (a + 2 * a).tolist() == [3.0, 6.0]
60 |
61 | def test_divide(self):
62 | a = self._array([1.0, 2.0])
63 | assert (a / 2).tolist() == [0.5, 1.0]
64 |
65 | def test_sequence(self):
66 | a = self._array([1.0, 2.0])
67 | assert len(a) == 2
68 | assert a[1] == 2.0
69 |
--------------------------------------------------------------------------------
/piconumpy/test_cython.py:
--------------------------------------------------------------------------------
1 | from .test_cpython_capi import Tests as _Tests
2 |
3 | class Tests(_Tests):
4 | from . import _piconumpy_cython as piconumpy
5 |
--------------------------------------------------------------------------------
/piconumpy/test_hpy_cpy_abi.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from .test_cpython_capi import Tests as _Tests
4 |
5 | try:
6 | from . import _piconumpy_hpy as piconumpy_cpy_abi
7 | except ImportError:
8 | piconumpy_cpy_abi = False
9 |
10 |
11 | @pytest.mark.skipif(not piconumpy_cpy_abi, reason="ImportError _piconumpy_hpy")
12 | class TestsCPyABI(_Tests):
13 | piconumpy = piconumpy_cpy_abi
14 |
--------------------------------------------------------------------------------
/piconumpy/test_hpy_universal.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | from .test_cpython_capi import Tests as _Tests
5 |
6 | try:
7 | from . import _piconumpy_hpy
8 | except ImportError:
9 | _piconumpy_hpy = False
10 |
11 |
12 | @pytest.mark.skipif(
13 | not _piconumpy_hpy, reason="ImportError piconumpy HPy Universal"
14 | )
15 | class TestsCPyABI(_Tests):
16 | piconumpy = _piconumpy_hpy
17 |
--------------------------------------------------------------------------------
/piconumpy/test_purepy.py:
--------------------------------------------------------------------------------
1 | from .test_cpython_capi import Tests as _Tests
2 |
3 | class Tests(_Tests):
4 | from . import purepy as piconumpy
5 |
--------------------------------------------------------------------------------
/piconumpy/test_purepy_array.py:
--------------------------------------------------------------------------------
1 | from .test_cpython_capi import Tests as _Tests
2 |
3 | class Tests(_Tests):
4 | from . import purepy_array as piconumpy
5 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "piconumpy"
3 | version = "0.0.0"
4 | description = "An experiment about Numpy and pyhandle/hpy."
5 | authors = [
6 | {name = "Pierre Augier", email = "pierre.augier@univ-grenoble-alpes.fr"},
7 | ]
8 | license = {text = "BSD 3-Clause"}
9 | readme = "README.md"
10 | keywords = ["numpy", "hpy", "PyPy"]
11 | requires-python = ">=3.8"
12 |
13 | [project.urls]
14 | homepage = "https://github.com/paugier/piconumpy"
15 | repository = "https://github.com/paugier/piconumpy"
16 | documentation = "https://github.com/paugier/piconumpy"
17 |
18 | [project.optional-dependencies]
19 | dev = ['transonic', 'numpy', 'pytest', 'pythran']
20 | full = ['black']
21 |
22 | [build-system]
23 | requires = [
24 | "setuptools >= 35.0.2",
25 | "wheel",
26 | "cython",
27 | "hpy >= 0.9.0"
28 | ]
29 |
30 | [tool.black]
31 | line-length = 82
32 | target_version = ['py310']
33 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages, Extension
2 | from Cython.Build import cythonize
3 |
4 | setup(
5 | name="piconumpy",
6 | packages=find_packages(exclude=["bench"]),
7 | # Workaround: HPy adds files to the sources list and uses absolute paths.
8 | # Newer setuptools complain about that if package data should be included.
9 | # Therefore, we explicitly disable this here.
10 | include_package_data=False,
11 | ext_modules=[
12 | Extension(
13 | "piconumpy._piconumpy_cpython_capi",
14 | ["piconumpy/_piconumpy_cpython_capi.c"],
15 | extra_compile_args=[
16 | "-Wfatal-errors", # stop after one error (unrelated to warnings)
17 | "-Werror", # turn warnings into errors (all, for now)
18 | ],
19 | ),
20 | *cythonize("piconumpy/_piconumpy_cython.pyx"),
21 | ],
22 | hpy_ext_modules=[
23 | Extension(
24 | "piconumpy._piconumpy_hpy",
25 | ["piconumpy/_piconumpy_hpy.c"],
26 | extra_compile_args=[
27 | "-Wfatal-errors",
28 | "-Werror",
29 | ],
30 | ),
31 | ],
32 | )
33 |
--------------------------------------------------------------------------------