├── .github └── workflows │ └── tests.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── bench ├── Makefile ├── bench.jl ├── bench_array1d.py ├── bench_cpy_vs_hpy.py ├── make_bench_piconumpy.py ├── profile_piconumpy.py └── without_numpy │ ├── julia_callback.jl │ └── purepython_callback.py ├── piconumpy ├── __init__.py ├── _piconumpy_cpython_capi.c ├── _piconumpy_cython.pyx ├── _piconumpy_hpy.c ├── bench.py ├── purepy.py ├── purepy_array.py ├── test_cpython_capi.py ├── test_cython.py ├── test_hpy_cpy_abi.py ├── test_hpy_universal.py ├── test_purepy.py └── test_purepy_array.py ├── pyproject.toml └── setup.py /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | max-parallel: 5 10 | matrix: 11 | python-version: ['3.8', '3.9', '3.10'] 12 | 13 | steps: 14 | 15 | - name: Setup Julia 16 | uses: julia-actions/setup-julia@v1 17 | 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | 23 | - name: Install dependencies 24 | run: | 25 | # install HPy from source if depending on a dev version 26 | # git clone -b master --single-branch https://github.com/hpyproject/hpy 27 | # git checkout 1234abcd 28 | # cd hpy 29 | # pip install . 30 | pip install numpy cython pytest transonic pythran 'setuptools>=60.2' 'hpy>=0.9.0rc1' 31 | 32 | - name: Checkout 33 | uses: actions/checkout@v3 34 | with: 35 | fetch-depth: 0 36 | 37 | - name: build 38 | run: | 39 | python setup.py develop 40 | python setup.py --hpy-abi=universal develop 41 | 42 | - name: Run tests 43 | run: | 44 | pytest -s 45 | 46 | - name: Run bench 47 | run: | 48 | cd bench 49 | make bench_hpy 50 | make 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.egg-info/* 3 | */__pycache__/* 4 | build 5 | .eggs 6 | 7 | *.so 8 | .vscode 9 | 10 | **/tmp*.* 11 | **/tmp*.* 12 | 13 | *_cython.c -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, Pierre Augier 4 | Copyright (c) 2021, 2023, Oracle and/or it's affiliates 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | 1. Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | 2. Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 3. Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | ifeq ($(PYTHON),) 3 | PYTHON := python 4 | endif 5 | 6 | all: 7 | make develop_universal 8 | ifeq ($(PYTHON),python) 9 | make build_ext 10 | endif 11 | 12 | develop: 13 | $(PYTHON) setup.py develop 14 | 15 | develop_universal: 16 | $(PYTHON) setup.py --hpy-abi=universal develop 17 | rm -f piconumpy/_piconumpy_hpy.py 18 | 19 | pip: 20 | $(PYTHON) -m pip install -e .[dev] 21 | 22 | build_ext_universal: 23 | $(PYTHON) setup.py --hpy-abi=universal build_ext -if 24 | 25 | build_ext: 26 | $(PYTHON) setup.py build_ext -if 27 | 28 | full: 29 | $(PYTHON) -m pip install -e .[full] 30 | 31 | format: 32 | black -l 82 setup.py piconumpy/*.py 33 | clang-format-7 -i piconumpy/*cpython_capi.c 34 | 35 | tests: 36 | $(PYTHON) -m pytest piconumpy -s 37 | 38 | clean: 39 | rm -f piconumpy/*.so 40 | rm -rf build dist piconumpy.egg-info 41 | 42 | black: 43 | black -l 82 . -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PicoNumpy 2 | 3 | [![Build Status](https://travis-ci.org/paugier/piconumpy.svg?branch=master)](https://travis-ci.org/paugier/piconumpy) 4 | 5 | **An experiment about Numpy and HPy** 6 | 7 | The C API of CPython is one of the cause of the success of Python in scientific 8 | computing. In particular, Numpy (and all the Python scientific stack) is built 9 | on top of this API. However, some characteristics of this API start to be an 10 | issue for the future of scientific Python (see [1], [2], [HPy]). 11 | 12 | [1]: https://faster-cpython.readthedocs.io/ 13 | [2]: https://morepypy.blogspot.com/2019/12/hpy-kick-off-sprint-report.html 14 | [HPy]: https://github.com/hpyproject/hpy 15 | 16 | [HPy] is a very ambitious and promissing project to design a new and better C 17 | API for interacting with Python interpreters. It should allow people to write 18 | Python extensions efficient on different interpreters (CPython, PyPy, Jython, 19 | IronPython, GraalPython, RustPython, etc.). 20 | 21 | PyPy would be especially useful for some scientific applications. For example 22 | for Integration and ODEs 23 | ([scipy.integrate](https://docs.scipy.org/doc/scipy/reference/integrate.html)), 24 | for which there are a lot of callbacks of very small functions. This repository 25 | contains [a tiny benchmark](bench/without_numpy) showing that as long as Numpy 26 | is not used, PyPy is very efficient for such task. Unfortunately, as soon as 27 | Numpy is used, PyPy becomes very slow! 28 | 29 | [bench/without_numpy]: https://github.com/paugier/piconumpy/blob/master/bench/without_numpy/ 30 | 31 | With PicoNumpy, I'd like to study if [HPy] could help for codes using Numpy and 32 | callbacks of small Python functions. 33 | 34 | We start by a [simple but realistic benchmark](bench/bench_array1d.py) (the 35 | slow loops only involve pure-Python and very simple Numpy). We then wrote a 36 | tiny ("pico") implementation of a Numpy like object (just sufficient to run the 37 | benchmark). 38 | 39 | The next task is to reimplement PicoNumpy using [HPy] and to check if PyPy 40 | could efficiently accelerate [our main benchmark](bench/bench_array1d.py). 41 | 42 | PicoNumpy is really tiny. It just provides an `array` class (one-dimensional) 43 | supporting: 44 | 45 | - Instantiation from a list of floats 46 | - Elementwise multiplication and division by a float 47 | - Elementwise addition (of 2 arrays) 48 | - Indexing 49 | - `len` 50 | 51 | A good acceleration by PyPy of our example would be a great proof that the 52 | scientific Python community has to invest time and energy on [HPy]. 53 | 54 | In the script [bench_array1d.py](bench/bench_array1d.py), Transonic is used for 55 | the benchmark and comparison. With Transonic-Pythran, we typically get a 50 56 | speedup compared to CPython (and ~400 versus PyPy, which is still very slow for 57 | such codes using Numpy). 58 | 59 | [bench/bench_array1d.py]: https://github.com/paugier/piconumpy/blob/master/bench/bench_array1d.py 60 | 61 | ## Install and run the benchmarks 62 | 63 | **Warning:** PicoNumpy now depends on HPy, which still has to be installed from 64 | the [Git repository](https://github.com/hpyproject/hpy). For now, the 65 | installation is a bit more complex that what is described here (more about this 66 | [here](#more-precise-notes-on-how-to-install-and-run-the-benchmarks-with-PyPy)). 67 | 68 | `make` should install the package in editable mode. `cd bench; make` should run 69 | the benchmarks. For the benchmarks, Julia is used for a good comparison point 70 | so the command `julia` has to be available. 71 | 72 | For PyPy, the Makefiles are sensible to the environment variable `PYTHON`, so 73 | you could do: 74 | 75 | ```bash 76 | export PYTHON=pypy3 77 | make 78 | cd bench 79 | make 80 | ``` 81 | 82 | The benchmark code can be profiled for the different implementations with the 83 | commands (you need gprof2dot and graphviz): 84 | 85 | ```bash 86 | cd bench 87 | make profile METHOD="cpython-c-api" 88 | make profile METHOD="purepy_array" 89 | make profile METHOD="purepy" 90 | make profile METHOD="cython" 91 | ``` 92 | 93 | ### More precise notes on how to install and run the benchmarks with PyPy 94 | 95 | Download and extract a nightly PyPy build 96 | . Add to the `PATH` environment variable 97 | the path of the directory containing the `pypy` executable (something like 98 | `~/opt/pypy-c-jit-101190-b661dc329618-linux64/bin`). Then, you should be able 99 | to run: 100 | 101 | ```bash 102 | pypy -m ensurepip 103 | pypy -m pip install pip -U 104 | pypy -m pip install numpy cython pytest transonic pythran 105 | ``` 106 | 107 | We need to install the correct version of HPy for the version of PyPy we are using: 108 | 109 | ```bash 110 | pypy -c "import hpy.universal as u; print(u.get_version())" 111 | ``` 112 | 113 | gives `('0.0.2rc2.dev12+gc9660c2', 'c9660c2')`. 114 | 115 | ```bash 116 | cd ~/Dev/hpy 117 | # update to the correct commit 118 | pypy setup.py develop 119 | ``` 120 | 121 | Now we can build-install PicoNumpy: 122 | 123 | ```bash 124 | cd ~/Dev/piconumpy 125 | pypy setup.py --hpy-abi=universal develop 126 | ``` 127 | 128 | And run the benchmarks with: 129 | 130 | ```bash 131 | export PYTHON="pypy" 132 | make clean 133 | make bench_hpy 134 | make 135 | ``` 136 | 137 | ## Few results 138 | 139 | As of today (6 July 2021), HPy is not yet ready for high performance, but at 140 | least (with HPy 0.0.2) it runs ! 141 | 142 | ### At home (Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz) 143 | 144 | - With CPython 145 | 146 | ``` 147 | Julia : 1 * norm = 0.00196 s 148 | PicoNumpy (CPython C-API) : 9.42 * norm 149 | PicoNumpy (HPy CPy ABI) : 9.95 * norm 150 | PicoNumpy (HPy Universal) : 10.4 * norm 151 | Transonic-Pythran : 0.497 * norm 152 | Numpy : 27.5 * norm 153 | PicoNumpy (purepy) : 37.3 * norm 154 | PicoNumpy (purepy_array) : 37.7 * norm 155 | PicoNumpy (Cython) : 28.9 * norm 156 | ``` 157 | 158 | - With PyPy3 159 | 160 | ``` 161 | Julia : 1 * norm = 0.00196 s 162 | PicoNumpy (CPython C-API) : 34.1 * norm 163 | PicoNumpy (HPy Universal) : 12.8 * norm 164 | Transonic-Pythran : 0.539 * norm 165 | Numpy : 232 * norm 166 | PicoNumpy (purepy) : 4.39 * norm 167 | PicoNumpy (purepy_array) : 6.33 * norm 168 | PicoNumpy (Cython) : 274 * norm 169 | ``` 170 | 171 | #### Simpler benchmarks (bench/bench_cpy_vs_hpy.py) 172 | 173 | - With CPython 174 | 175 | ``` 176 | CPython C-API: 1.92 seconds 177 | HPy [Universal]: 2.08 seconds 178 | HPy [CPy ABI]: 2.02 seconds 179 | ``` 180 | 181 | - With PyPy3 182 | 183 | ``` 184 | CPython C-API: 5.75 seconds 185 | HPy [Universal]: 2.11 seconds 186 | ``` 187 | -------------------------------------------------------------------------------- /bench/Makefile: -------------------------------------------------------------------------------- 1 | 2 | ifeq ($(PYTHON),) 3 | PYTHON := python 4 | endif 5 | 6 | ifeq ($(METHOD),) 7 | METHOD := cpython-c-api 8 | endif 9 | 10 | all: tmp.py tmp_result_julia.txt 11 | $(PYTHON) tmp.py 12 | 13 | tmp.py: bench_array1d.py make_bench_piconumpy.py 14 | $(PYTHON) make_bench_piconumpy.py 15 | 16 | clean: 17 | transonic -cc bench_array1d.py -f 18 | rm -f tmp*.py tmp*.txt 19 | 20 | tmp_result_julia.txt: 21 | julia bench.jl > tmp_result_julia.txt 22 | 23 | profile: tmp.py 24 | $(PYTHON) profile_piconumpy.py $(METHOD) 25 | # with gprof2dot and graphviz (command dot) 26 | gprof2dot -f pstats tmp.pstats | dot -Tpng -o tmp_$(METHOD).png 27 | eog tmp_$(METHOD).png 28 | 29 | bench_hpy: 30 | $(PYTHON) bench_cpy_vs_hpy.py 31 | -------------------------------------------------------------------------------- /bench/bench.jl: -------------------------------------------------------------------------------- 1 | using Statistics 2 | 3 | function board(X_0::Array, b::Float64) 4 | 5 | x0 = copy(X_0[1]) 6 | y0 = copy(X_0[2]) 7 | u0 = copy(X_0[3]) 8 | v0 = copy(X_0[4]) 9 | 10 | g = 9.81 11 | c = 0.5 12 | a = 0.25 13 | p = (2*π)/10.0 14 | q = (2*π)/4.0 15 | 16 | H_x = -a + b*p*sin(p*x0)*cos(q*y0) 17 | H_xx = b*p^2 * cos(p*x0)*cos(q*y0) 18 | H_y = b*q*cos(p*x0)*sin(q*y0) 19 | H_yy = b*q^2 * cos(p*x0)*cos(q*y0) 20 | H_xy = -b*q*p*sin(p*x0)*sin(q*y0) 21 | 22 | F = (g + H_xx*u0^2 + 2*H_xy*u0*v0 + H_yy*v0^2)/(1 + H_x^2 + H_y^2) 23 | 24 | dU = -F*H_x - c*u0 25 | dV = -F*H_y - c*v0 26 | 27 | return [u0, v0, dU, dV] 28 | 29 | end 30 | 31 | 32 | function runge_kutta_step(f::Function, x0, dt::Float64, t=nothing) 33 | 34 | k1 = f(x0, t) * dt 35 | k2 = f(x0 + k1/2, t) * dt 36 | k3 = f(x0 + k2/2, t) * dt 37 | k4 = f(x0 + k3, t) * dt 38 | x_new = x0 + (k1 + 2*k2 + 2*k3 + k4)/6 39 | 40 | return x_new 41 | end 42 | 43 | 44 | function solver(f::Function, x0::Array, y0::Array, u0::Array, v0::Array, dt::Float64, N_t::Int, b = 0.5) 45 | solutions = [] 46 | for k in 1:length(x0) 47 | values_one_step = [x0[k], y0[k], u0[k], v0[k]] 48 | for i in 2:N_t + 1 49 | values_one_step = runge_kutta_step(f, values_one_step, dt, b) 50 | end 51 | solutions = vcat(solutions, values_one_step) 52 | end 53 | 54 | return solutions 55 | end 56 | 57 | 58 | function bench(n_sleds, n_time) 59 | x_init = zeros(n_sleds) 60 | y_init = rand(n_sleds) 61 | u_init = zeros(n_sleds) 62 | v_init = zeros(n_sleds) .+ 3.5 63 | 64 | return solver(board, x_init, y_init, u_init, v_init, 0.01, n_time) 65 | end 66 | 67 | 68 | n_sleds = 10 69 | n_time = 200 70 | 71 | nb_runs = 200 72 | 73 | times = zeros(nb_runs) 74 | 75 | for irun in 1:nb_runs 76 | times[irun] = @elapsed bench(n_sleds, n_time) 77 | end 78 | 79 | println(median(times)) 80 | -------------------------------------------------------------------------------- /bench/bench_array1d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from numpy import array 4 | from math import pi, cos, sin 5 | 6 | from transonic import jit 7 | 8 | # begin code functions (don't remove this line) 9 | 10 | 11 | def runge_kutta_step(f, x0, dt, t=None): 12 | k1 = f(t, x0) * dt 13 | k2 = f(t, x0 + k1 / 2) * dt 14 | k3 = f(t, x0 + k2 / 2) * dt 15 | k4 = f(t, x0 + k3) * dt 16 | # workaround for a pypy bug 17 | # see https://foss.heptapod.net/pypy/pypy/-/issues/3509 18 | # x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6 19 | x_new = x0 + (k1 + k2 * 2 + k3 * 2 + k4) / 6 20 | return x_new 21 | 22 | 23 | def board(t, X_0): 24 | x0 = X_0[0] 25 | y0 = X_0[1] 26 | u0 = X_0[2] 27 | v0 = X_0[3] 28 | 29 | g = 9.81 30 | c = 0.5 31 | a = 0.25 32 | b = 0.5 33 | p = (2 * pi) / 10.0 34 | q = (2 * pi) / 4.0 35 | 36 | H_x = -a + b * p * sin(p * x0) * cos(q * y0) 37 | H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0) 38 | H_y = b * q * cos(p * x0) * sin(q * y0) 39 | H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0) 40 | H_xy = -b * q * p * sin(p * x0) * sin(q * y0) 41 | 42 | F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / ( 43 | 1 + H_x ** 2 + H_y ** 2 44 | ) 45 | 46 | dU = -F * H_x - c * u0 47 | dV = -F * H_y - c * v0 48 | 49 | return array([u0, v0, dU, dV]) 50 | 51 | 52 | def solver(f, x0, y0, u0, v0, dt, N_t, b=0.5): 53 | x0 = array(x0.tolist()) 54 | y0 = array(y0.tolist()) 55 | u0 = array(u0.tolist()) 56 | v0 = array(v0.tolist()) 57 | 58 | solutions = [] 59 | for k in range(len(x0)): 60 | values_one_step = array([x0[k], y0[k], u0[k], v0[k]]) 61 | for _ in range(N_t): 62 | values_one_step = runge_kutta_step(f, values_one_step, dt, b) 63 | solutions.append(values_one_step) 64 | return solutions 65 | 66 | 67 | def bench(n_sleds, n_time): 68 | x_init = np.zeros(n_sleds) 69 | y_init = np.random.rand(n_sleds) 70 | v_init = np.zeros(n_sleds) 71 | u_init = np.zeros(n_sleds) + 3.5 72 | 73 | solver(board, x_init, y_init, u_init, v_init, 0.01, n_time) 74 | 75 | 76 | # end code functions (don't remove this line) 77 | 78 | 79 | bench_pythran = jit(bench) 80 | # Numba does not support this code... 81 | # bench_numba = jit(backend="numba")(bench) 82 | from transonic import wait_for_all_extensions 83 | 84 | # warmup (compilation of the Pythran extension) 85 | bench_pythran(1, 1) 86 | wait_for_all_extensions() 87 | 88 | if __name__ == "__main__": 89 | 90 | from transonic.util import timeit_verbose as timeit 91 | 92 | n_sleds = 10 93 | n_time = 200 94 | 95 | g = locals() 96 | norm = timeit("bench(n_sleds, n_time)", globals=g) 97 | timeit("bench_pythran(n_sleds, n_time)", globals=g, norm=norm) 98 | # timeit("bench_numba(n_sleds, n_time)", globals=g, norm=norm) 99 | -------------------------------------------------------------------------------- /bench/bench_cpy_vs_hpy.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import random 4 | from math import pi, cos, sin 5 | from pathlib import Path 6 | 7 | here = Path(__file__).absolute().parent 8 | 9 | 10 | def my_randn(mod, n): 11 | result = mod.empty(n) 12 | for i in range(n): 13 | result[i] = random.normalvariate(0, 1) 14 | return result 15 | 16 | 17 | IS_PYPY = hasattr(sys, "pypy_version_info") 18 | 19 | 20 | def runge_kutta_step(mod, f, x0, dt, t=None): 21 | k1 = f(mod, t, x0) * dt 22 | k2 = f(mod, t, x0 + k1 / 2) * dt 23 | k3 = f(mod, t, x0 + k2 / 2) * dt 24 | k4 = f(mod, t, x0 + k3) * dt 25 | # workaround for a pypy bug 26 | # see https://foss.heptapod.net/pypy/pypy/-/issues/3509 27 | # x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6 28 | x_new = x0 + (k1 + k2 * 2 + k3 * 2 + k4) / 6 29 | return x_new 30 | 31 | 32 | def board(mod, t, X_0): 33 | x0 = X_0[0] 34 | y0 = X_0[1] 35 | u0 = X_0[2] 36 | v0 = X_0[3] 37 | 38 | g = 9.81 39 | c = 0.5 40 | a = 0.25 41 | b = 0.5 42 | p = (2 * pi) / 10.0 43 | q = (2 * pi) / 4.0 44 | 45 | H_x = -a + b * p * sin(p * x0) * cos(q * y0) 46 | H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0) 47 | H_y = b * q * cos(p * x0) * sin(q * y0) 48 | H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0) 49 | H_xy = -b * q * p * sin(p * x0) * sin(q * y0) 50 | 51 | F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / ( 52 | 1 + H_x ** 2 + H_y ** 2 53 | ) 54 | 55 | dU = -F * H_x - c * u0 56 | dV = -F * H_y - c * v0 57 | 58 | return mod.array([u0, v0, dU, dV]) 59 | 60 | 61 | def solver(mod, f, x0, y0, u0, v0, dt, N_t, b=0.5): 62 | solutions = [] 63 | for k in range(len(x0)): 64 | values_one_step = mod.array([x0[k], y0[k], u0[k], v0[k]]) 65 | for _ in range(N_t): 66 | values_one_step = runge_kutta_step(mod, f, values_one_step, dt, b) 67 | solutions.append(values_one_step) 68 | return solutions 69 | 70 | 71 | def bench(mod, n_sleds, n_time): 72 | x_init = mod.zeros(n_sleds) 73 | y_init = my_randn(mod, n_sleds) 74 | v_init = mod.zeros(n_sleds) 75 | u_init = mod.zeros(n_sleds) 76 | for i in range(n_sleds): 77 | u_init[i] += 3.5 78 | start = time.time() 79 | solver(mod, board, x_init, y_init, u_init, v_init, 0.01, n_time) 80 | end = time.time() 81 | return end - start 82 | 83 | 84 | N_SLEDS = 100 85 | N_TIME = 2000 86 | 87 | 88 | def import_piconumpy_hpy_universal(): 89 | import hpy.universal 90 | from importlib.util import spec_from_file_location 91 | 92 | spec = spec_from_file_location("_piconumpy_hpy", 93 | str(here.parent / "piconumpy/_piconumpy_hpy.hpy0.so")) 94 | return hpy.universal.load( 95 | "_piconumpy_hpy", str(here.parent / "piconumpy/_piconumpy_hpy.hpy0.so"), 96 | spec 97 | ) 98 | 99 | 100 | def main(): 101 | 102 | import piconumpy._piconumpy_cpython_capi as pnp_capi 103 | 104 | t = bench(pnp_capi, N_SLEDS, N_TIME) 105 | print(f"CPython C-API: {t:.2f} seconds") 106 | 107 | pnp_hpy_universal = import_piconumpy_hpy_universal() 108 | t = bench(pnp_hpy_universal, N_SLEDS, N_TIME) 109 | print(f"HPy [Universal]: {t:.2f} seconds") 110 | 111 | if not IS_PYPY: 112 | import piconumpy._piconumpy_hpy as pnp_hpy 113 | 114 | t = bench(pnp_hpy, N_SLEDS, N_TIME) 115 | print(f"HPy [CPy ABI]: {t:.2f} seconds") 116 | 117 | 118 | if __name__ == "__main__": 119 | main() 120 | -------------------------------------------------------------------------------- /bench/make_bench_piconumpy.py: -------------------------------------------------------------------------------- 1 | with open("bench_array1d.py") as file: 2 | code = file.read() 3 | 4 | code = code.split("# begin code functions (don't remove this line)")[1] 5 | code_functions = code.split("# end code functions (don't remove this line)")[0] 6 | 7 | 8 | def create_tmp_file(name_module): 9 | 10 | if name_module == "_piconumpy_hpy_universal": 11 | code_import = """ 12 | from piconumpy import _piconumpy_hpy 13 | array = _piconumpy_hpy.array 14 | """ 15 | else: 16 | code_import = f"from piconumpy.{name_module} import array" 17 | 18 | code = ( 19 | f""" 20 | from math import pi, cos, sin 21 | import numpy as np 22 | {code_import} 23 | """ 24 | + code_functions 25 | ) 26 | 27 | if name_module.startswith("_piconumpy_"): 28 | name = name_module[len("_piconumpy_") :] 29 | else: 30 | name = name_module 31 | 32 | with open(f"tmp_{name}.py", "w") as file: 33 | file.write(code) 34 | 35 | 36 | create_tmp_file("_piconumpy_hpy_universal") 37 | create_tmp_file("purepy") 38 | create_tmp_file("purepy_array") 39 | create_tmp_file("_piconumpy_cython") 40 | create_tmp_file("_piconumpy_hpy") 41 | 42 | 43 | code = ( 44 | """ 45 | import sys 46 | import numpy as np 47 | from piconumpy import array 48 | from math import pi, cos, sin 49 | 50 | IS_PYPY = hasattr(sys, 'pypy_version_info') 51 | """ 52 | + code_functions 53 | + """ 54 | 55 | from piconumpy.bench import timeit_verbose 56 | 57 | from bench_array1d import bench as bench_numpy, bench_pythran 58 | 59 | from tmp_hpy_universal import bench as bench_hpy_universal 60 | from tmp_purepy import bench as bench_piconumpy_purepy 61 | from tmp_purepy_array import bench as bench_piconumpy_purepy_array 62 | from tmp_cython import bench as bench_cython 63 | 64 | if not IS_PYPY: 65 | from tmp_hpy import bench as bench_hpy 66 | 67 | # get norm from Julia benchmark 68 | with open("tmp_result_julia.txt") as file: 69 | norm = float(file.read()) 70 | 71 | max_length_name = len("piconumpy (CPython C-API)") + 2 72 | 73 | fmt_name = f"{{:{max_length_name}s}}" 74 | name = fmt_name.format("Julia") 75 | print(f"{name}: 1 * norm = {norm:4.3g} s") 76 | 77 | n_sleds = 10 78 | n_time = 200 79 | 80 | g = locals() 81 | 82 | def timeit(name_func, name): 83 | return timeit_verbose( 84 | name_func + "(n_sleds, n_time)", 85 | globals=g, 86 | name=name, 87 | print_time=False, 88 | norm=norm, 89 | max_length_name=max_length_name, 90 | ) 91 | 92 | timeit("bench", name="PicoNumpy (CPython C-API)") 93 | if not IS_PYPY: 94 | timeit("bench_hpy", name="PicoNumpy (HPy CPy ABI)") 95 | timeit("bench_hpy_universal", name="PicoNumpy (HPy Universal)") 96 | timeit("bench_pythran", name="Transonic-Pythran") 97 | timeit("bench_numpy", name="Numpy") 98 | timeit( 99 | "bench_piconumpy_purepy", name="PicoNumpy (purepy)", 100 | ) 101 | timeit( 102 | "bench_piconumpy_purepy_array", name="PicoNumpy (purepy_array)", 103 | ) 104 | timeit("bench_cython", name="PicoNumpy (Cython)") 105 | """ 106 | ) 107 | 108 | with open("tmp.py", "w") as file: 109 | file.write(code) 110 | -------------------------------------------------------------------------------- /bench/profile_piconumpy.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import cProfile 4 | import pstats 5 | 6 | import bench_array1d 7 | import tmp_purepy 8 | import tmp_purepy_array 9 | import tmp_cython 10 | 11 | methods = { 12 | "cpython-c-api": bench_array1d, 13 | "purepy": tmp_purepy, 14 | "purepy_array": tmp_purepy_array, 15 | "cython": tmp_cython, 16 | } 17 | 18 | module = methods.get(sys.argv[-1], bench_array1d) 19 | 20 | n_sleds = 10 21 | n_time = 200 22 | 23 | cProfile.runctx( 24 | "module.bench(n_sleds, n_time)", globals(), locals(), "tmp.pstats" 25 | ) 26 | 27 | s = pstats.Stats("tmp.pstats") 28 | s.strip_dirs().sort_stats("time").print_stats(10) 29 | -------------------------------------------------------------------------------- /bench/without_numpy/julia_callback.jl: -------------------------------------------------------------------------------- 1 | #= 2 | 3 | julia julia_callback.jl 4 | 2.457 ms (100002 allocations: 10.68 MiB) 5 | 6 | =# 7 | using BenchmarkTools 8 | 9 | function rober(u) 10 | k1 = 0.04 11 | k2 = 3e7 12 | k3 = 1e4 13 | y1, y2, y3 = u 14 | dy1 = -k1 * y1 + k3 * y2 * y3 15 | dy2 = k1 * y1 - k2 * y2 * y2 - k3 * y2 * y3 16 | dy3 = k2 * y2 * y2 17 | return [dy1, dy2, dy3] 18 | end 19 | 20 | rober([1.0, 0.0, 0.0]) 21 | 22 | function call_function(func) 23 | u = [1.0, 0.0, 0.0] 24 | for time in (0:1e5) 25 | u = func(u) 26 | end 27 | return u 28 | end 29 | 30 | @btime call_function(rober) -------------------------------------------------------------------------------- /bench/without_numpy/purepython_callback.py: -------------------------------------------------------------------------------- 1 | """ 2 | pypy3 purepython_callback.py 3 | bench : 1.000 * norm 4 | norm = 4.24e-04 s 5 | bench_pythran : 1.061 * norm 6 | time pythran: 0.450 ms 7 | 8 | python purepython_callback.py 9 | bench : 1.000 * norm 10 | norm = 2.57e-02 s 11 | bench_pythran : 0.017 * norm 12 | time pythran: 0.449 ms 13 | 14 | - PyPy is 60 times faster than CPython. 15 | - PyPy is a bit faster than Pythran and Julia! 16 | 17 | """ 18 | 19 | from transonic.util import timeit_verbose as tiv 20 | from transonic import jit, wait_for_all_extensions 21 | 22 | 23 | def rober(u): 24 | k1 = 0.04 25 | k2 = 3e7 26 | k3 = 1e4 27 | y1, y2, y3 = u 28 | dy1 = -k1 * y1 + k3 * y2 * y3 29 | dy2 = k1 * y1 - k2 * y2 * y2 - k3 * y2 * y3 30 | dy3 = k2 * y2 * y2 31 | return dy1, dy2, dy3 32 | 33 | 34 | def call_function(func): 35 | u = (1.0, 0.0, 0.0) 36 | for _ in range(int(1e5)): 37 | u = func(u) 38 | return u 39 | 40 | 41 | def bench(): 42 | return call_function(rober) 43 | 44 | 45 | bench_pythran = jit(bench) 46 | 47 | bench() 48 | bench_pythran() 49 | 50 | wait_for_all_extensions() 51 | 52 | g = globals() 53 | duration = 10 54 | norm = tiv("bench()", globals=g, total_duration=duration) 55 | time_pythran = tiv( 56 | "bench_pythran()", globals=g, total_duration=duration, norm=norm 57 | ) 58 | print(f"time pythran: {time_pythran*1000:.3f} ms") 59 | -------------------------------------------------------------------------------- /piconumpy/__init__.py: -------------------------------------------------------------------------------- 1 | from ._piconumpy_cpython_capi import array 2 | -------------------------------------------------------------------------------- /piconumpy/_piconumpy_cpython_capi.c: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | 4 | #include "structmember.h" 5 | 6 | typedef struct { 7 | PyObject_HEAD 8 | /* Type-specific fields go here. */ 9 | double *data; 10 | int size; 11 | } ArrayObject; 12 | 13 | static void Array_dealloc(ArrayObject *self) { 14 | free(self->data); 15 | Py_TYPE(self)->tp_free((PyObject *)self); 16 | } 17 | 18 | static int Array_init(ArrayObject *self, PyObject *args, PyObject *kwds) { 19 | static char *kwlist[] = {"data", NULL}; 20 | int index; 21 | PyObject *data = NULL, *item; 22 | 23 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, &data)) 24 | return -1; 25 | 26 | if (!PyList_Check(data)) { 27 | PyErr_SetString(PyExc_TypeError, "parameter must be a list"); 28 | return -1; 29 | } 30 | 31 | self->size = (int)PyList_Size(data); 32 | 33 | self->data = (double *)malloc(self->size * sizeof(double)); 34 | if (self->data == NULL) { 35 | PyErr_NoMemory(); 36 | return -1; 37 | } 38 | 39 | for (index = 0; index < self->size; index++) { 40 | item = PyList_GET_ITEM(data, index); 41 | self->data[index] = PyFloat_AsDouble(item); 42 | } 43 | 44 | return 0; 45 | } 46 | 47 | static PyMemberDef Array_members[] = { 48 | {"size", T_INT, offsetof(ArrayObject, size), 0, "size of the array"}, 49 | {NULL} /* Sentinel */ 50 | }; 51 | 52 | static PyObject *Array_tolist(ArrayObject *self, PyObject *Py_UNUSED(ignored)) { 53 | int index; 54 | PyObject *result, *item; 55 | result = PyList_New(self->size); 56 | for (index = 0; index < self->size; index++) { 57 | item = PyFloat_FromDouble(self->data[index]); 58 | PyList_SetItem(result, index, item); 59 | } 60 | return result; 61 | }; 62 | 63 | static ArrayObject *Array_empty(int size); 64 | 65 | static ArrayObject *Array_multiply(PyObject *o1, PyObject *o2) { 66 | int index; 67 | double number; 68 | PyObject *obj_number = NULL; 69 | ArrayObject *result = NULL, *arr = NULL; 70 | 71 | if (PyNumber_Check(o2)) { 72 | obj_number = o2; 73 | arr = (ArrayObject *)o1; 74 | } else if (PyNumber_Check(o1)) { 75 | obj_number = o1; 76 | arr = (ArrayObject *)o2; 77 | } 78 | 79 | if (PyNumber_Check(o1) | PyNumber_Check(o2)) { 80 | number = PyFloat_AsDouble(obj_number); 81 | result = Array_empty(arr->size); 82 | for (index = 0; index < arr->size; index++) { 83 | result->data[index] = arr->data[index] * number; 84 | } 85 | } 86 | 87 | return result; 88 | }; 89 | 90 | static ArrayObject *Array_add(PyObject *o1, PyObject *o2) { 91 | int index; 92 | ArrayObject *result = NULL, *a1, *a2; 93 | a1 = (ArrayObject *)o1; 94 | a2 = (ArrayObject *)o2; 95 | 96 | if (a1->size != a2->size) 97 | return result; 98 | 99 | result = Array_empty(a1->size); 100 | for (index = 0; index < a1->size; index++) { 101 | result->data[index] = a1->data[index] + a2->data[index]; 102 | } 103 | 104 | return result; 105 | }; 106 | 107 | static ArrayObject *Array_divide(PyObject *o1, PyObject *o2) { 108 | int index; 109 | double number; 110 | ArrayObject *result = NULL, *a1; 111 | 112 | if (!PyNumber_Check(o2)) { 113 | return result; 114 | } 115 | a1 = (ArrayObject *)o1; 116 | number = PyFloat_AsDouble(o2); 117 | result = Array_empty(a1->size); 118 | for (index = 0; index < a1->size; index++) { 119 | result->data[index] = a1->data[index] / number; 120 | } 121 | 122 | return result; 123 | }; 124 | 125 | Py_ssize_t Array_length(ArrayObject *arr) { 126 | Py_ssize_t result = (Py_ssize_t)arr->size; 127 | return result; 128 | }; 129 | 130 | PyObject *Array_item(ArrayObject *arr, Py_ssize_t index) { 131 | if (index < 0 || index >= arr->size) { 132 | PyErr_SetString(PyExc_IndexError, "index out of range"); 133 | return NULL; 134 | } 135 | return PyFloat_FromDouble(arr->data[index]); 136 | }; 137 | 138 | int Array_setitem(ArrayObject *arr, Py_ssize_t index, PyObject *item) { 139 | if (index < 0 || index >= arr->size) { 140 | PyErr_SetString(PyExc_IndexError, "index out of range"); 141 | return -1; 142 | } 143 | double value = PyFloat_AsDouble(item); 144 | if (PyErr_Occurred()) 145 | return -1; 146 | arr->data[index] = value; 147 | return 0; 148 | } 149 | 150 | 151 | static PyMethodDef Array_methods[] = { 152 | {"tolist", (PyCFunction)Array_tolist, METH_NOARGS, 153 | "Return the data as a list"}, 154 | {NULL} /* Sentinel */ 155 | }; 156 | 157 | static PyType_Slot Array_type_slots[] = { 158 | {Py_tp_new, PyType_GenericNew}, 159 | {Py_tp_init, (initproc)Array_init}, 160 | {Py_tp_dealloc, (destructor)Array_dealloc}, 161 | {Py_tp_members, Array_members}, 162 | {Py_tp_methods, Array_methods}, 163 | {Py_nb_multiply, (binaryfunc)Array_multiply}, 164 | {Py_nb_add, (binaryfunc)Array_add}, 165 | {Py_nb_true_divide, (binaryfunc)Array_divide}, 166 | {Py_sq_length, (lenfunc)Array_length}, 167 | {Py_sq_item, (ssizeargfunc)Array_item}, 168 | {Py_sq_ass_item, (ssizeobjargproc)Array_setitem}, 169 | {0, NULL}, 170 | }; 171 | 172 | static PyType_Spec Array_type_spec = { 173 | .name = "_piconumpy_cpython_capi.array", 174 | .basicsize = sizeof(ArrayObject), 175 | .itemsize = 0, 176 | .flags = Py_TPFLAGS_DEFAULT, 177 | .slots = Array_type_slots, 178 | }; 179 | 180 | PyTypeObject *ptr_ArrayType; 181 | 182 | static ArrayObject *Array_empty(int size) { 183 | ArrayObject *new_array = NULL; 184 | new_array = PyObject_New(ArrayObject, ptr_ArrayType); 185 | new_array->size = size; 186 | new_array->data = (double *)malloc(size * sizeof(double)); 187 | if (new_array->data == NULL) { 188 | PyErr_NoMemory(); 189 | return NULL; 190 | } 191 | return new_array; 192 | }; 193 | 194 | static ArrayObject *empty(PyObject *module, PyObject *arg) { 195 | int size; 196 | size = (int)PyLong_AsLong(arg); 197 | return Array_empty(size); 198 | }; 199 | 200 | static ArrayObject *zeros(PyObject *module, PyObject *arg) { 201 | int size; 202 | size = (int)PyLong_AsLong(arg); 203 | ArrayObject *result = Array_empty(size); 204 | for(int i=0; idata[i] = 0; 206 | return result; 207 | }; 208 | 209 | 210 | static PyMethodDef module_methods[] = { 211 | {"empty", (PyCFunction)empty, METH_O, "Create an empty array."}, 212 | {"zeros", (PyCFunction)zeros, METH_O, "Createa zero-filled array."}, 213 | {NULL, NULL, 0, NULL} /* Sentinel */ 214 | }; 215 | 216 | static PyModuleDef piconumpymodule = { 217 | PyModuleDef_HEAD_INIT, .m_name = "_piconumpy_cpython_capi", 218 | .m_doc = "piconumpy implemented with the CPython C-API.", .m_size = -1, 219 | module_methods}; 220 | 221 | PyMODINIT_FUNC PyInit__piconumpy_cpython_capi(void) { 222 | PyObject *m; 223 | 224 | m = PyModule_Create(&piconumpymodule); 225 | if (m == NULL) 226 | return NULL; 227 | 228 | ptr_ArrayType = (PyTypeObject *)PyType_FromSpec(&Array_type_spec); 229 | if (PyModule_AddObject(m, "array", (PyObject *)ptr_ArrayType) < 0) { 230 | Py_DECREF(ptr_ArrayType); 231 | Py_DECREF(m); 232 | return NULL; 233 | } 234 | 235 | return m; 236 | } 237 | -------------------------------------------------------------------------------- /piconumpy/_piconumpy_cython.pyx: -------------------------------------------------------------------------------- 1 | 2 | 3 | __all__ = ["array"] 4 | 5 | import array as _array 6 | 7 | 8 | # class array(_array.array): 9 | 10 | # def __new__(cls, *args): 11 | # return super().__new__(cls, "f", *args) 12 | 13 | # def __init__(self, data): 14 | # self.size = len(self) 15 | 16 | # def __add__(self, other): 17 | # return self.__class__(number + other[index] for index, number in enumerate(self)) 18 | 19 | # def __mul__(self, other): 20 | # return self.__class__(other * number for number in self) 21 | 22 | # __rmul__ = __mul__ 23 | 24 | # def __truediv__(self, other): 25 | # return self.__class__(number / other for number in self) 26 | 27 | 28 | class array: 29 | __slots__ = ["data", "size"] 30 | 31 | def __init__(self, data): 32 | self.data = _array.array("f", data) 33 | self.size = len(self.data) 34 | 35 | def __add__(self, other): 36 | return array( 37 | number + other.data[index] for index, number in enumerate(self.data) 38 | ) 39 | 40 | def __mul__(self, other): 41 | return array(other * number for number in self.data) 42 | 43 | __rmul__ = __mul__ 44 | 45 | def __truediv__(self, other): 46 | return array(number / other for number in self.data) 47 | 48 | def tolist(self): 49 | return list(self.data) 50 | 51 | def __len__(self): 52 | return len(self.data) 53 | 54 | def __getitem__(self, index): 55 | return self.data[index] 56 | 57 | def __setitem__(self, index, value): 58 | self.data[index] = value 59 | 60 | cpdef empty(size): 61 | return array([0]*size) 62 | 63 | cpdef zeros(size): 64 | return array([0]*size) 65 | -------------------------------------------------------------------------------- /piconumpy/_piconumpy_hpy.c: -------------------------------------------------------------------------------- 1 | #include "hpy.h" 2 | 3 | typedef struct { 4 | /* Type-specific fields go here. */ 5 | double *data; 6 | int size; 7 | } ArrayObject; 8 | 9 | HPyType_HELPERS(ArrayObject) 10 | 11 | HPyDef_SLOT(Array_destroy, HPy_tp_destroy) 12 | static void Array_destroy_impl(void *obj) { 13 | ArrayObject *self = (ArrayObject *)obj; 14 | free(self->data); 15 | } 16 | 17 | HPyDef_SLOT(Array_init, HPy_tp_init) 18 | static int Array_init_impl(HPyContext *ctx, HPy h_self, const HPy *args, 19 | HPy_ssize_t nargs, HPy kw) { 20 | static const char *kwlist[] = {"data", NULL}; 21 | ArrayObject *self = ArrayObject_AsStruct(ctx, h_self); 22 | int index; 23 | HPy h_data = HPy_NULL; 24 | HPyTracker ht; 25 | 26 | if (!HPyArg_ParseKeywordsDict(ctx, &ht, args, nargs, kw, "|O", kwlist, &h_data)) { 27 | return -1; 28 | } 29 | 30 | if (!HPyList_Check(ctx, h_data)) { 31 | HPyTracker_Close(ctx, ht); 32 | HPyErr_SetString(ctx, ctx->h_TypeError, "parameter must be a list"); 33 | return -1; 34 | } 35 | 36 | self->size = (int)HPy_Length(ctx, h_data); 37 | 38 | self->data = (double *)malloc(self->size * sizeof(double)); 39 | if (self->data == NULL) { 40 | HPyTracker_Close(ctx, ht); // done with h_data 41 | HPyErr_NoMemory(ctx); 42 | return -1; 43 | } 44 | 45 | // XXX: this is not doing any error check (but the original C-API version 46 | // doesn't either :shrug: 47 | for (index = 0; index < self->size; index++) { 48 | HPy h_item = HPy_GetItem_i(ctx, h_data, index); 49 | self->data[index] = HPyFloat_AsDouble(ctx, h_item); 50 | HPy_Close(ctx, h_item); 51 | } 52 | 53 | HPyTracker_Close(ctx, ht); // done with h_data 54 | return 0; 55 | } 56 | 57 | HPyDef_MEMBER(Array_size, "size", HPyMember_INT, offsetof(ArrayObject, size), 58 | .doc = "size of the array") 59 | 60 | HPyDef_METH(Array_tolist, "tolist", HPyFunc_NOARGS, 61 | .doc = "Return the data as a list") 62 | static HPy Array_tolist_impl(HPyContext *ctx, HPy h_self) { 63 | ArrayObject *self = ArrayObject_AsStruct(ctx, h_self); 64 | int index; 65 | HPyListBuilder builder = HPyListBuilder_New(ctx, self->size); 66 | for (index = 0; index < self->size; index++) { 67 | HPy h_item = HPyFloat_FromDouble(ctx, self->data[index]); 68 | HPyListBuilder_Set(ctx, builder, index, h_item); 69 | HPy_Close(ctx, h_item); 70 | } 71 | return HPyListBuilder_Build(ctx, builder); 72 | }; 73 | 74 | static HPy Array_empty(HPyContext *ctx, int size, ArrayObject **result); 75 | 76 | HPyDef_SLOT(Array_multiply, HPy_nb_multiply) 77 | static HPy Array_multiply_impl(HPyContext *ctx, HPy h1, HPy h2) { 78 | int index; 79 | double number; 80 | HPy h_number = HPy_NULL; 81 | ArrayObject *result = NULL, *arr = NULL; 82 | HPy h_result = HPy_NULL; 83 | 84 | if (HPyNumber_Check(ctx, h2)) { 85 | h_number = h2; 86 | arr = ArrayObject_AsStruct(ctx, h1); 87 | } else if (HPyNumber_Check(ctx, h1)) { 88 | h_number = h1; 89 | arr = ArrayObject_AsStruct(ctx, h2); 90 | } 91 | 92 | if (HPyNumber_Check(ctx, h1) || HPyNumber_Check(ctx, h2)) { 93 | number = HPyFloat_AsDouble(ctx, h_number); 94 | h_result = Array_empty(ctx, arr->size, &result); 95 | for (index = 0; index < arr->size; index++) { 96 | result->data[index] = arr->data[index] * number; 97 | } 98 | } 99 | /* XXX exception if result is still NULL here */ 100 | return h_result; 101 | }; 102 | 103 | HPyDef_SLOT(Array_add, HPy_nb_add) 104 | static HPy Array_add_impl(HPyContext *ctx, HPy h1, HPy h2) { 105 | int index; 106 | ArrayObject *result = NULL, *a1, *a2; 107 | HPy h_result = HPy_NULL; 108 | a1 = ArrayObject_AsStruct(ctx, h1); 109 | a2 = ArrayObject_AsStruct(ctx, h2); 110 | 111 | if (a1->size != a2->size) 112 | return HPy_NULL; /* XXX should raise an exception */ 113 | 114 | h_result = Array_empty(ctx, a1->size, &result); 115 | for (index = 0; index < a1->size; index++) { 116 | result->data[index] = a1->data[index] + a2->data[index]; 117 | } 118 | return h_result; 119 | }; 120 | 121 | HPyDef_SLOT(Array_divide, HPy_nb_true_divide) 122 | static HPy Array_divide_impl(HPyContext *ctx, HPy h1, HPy h2) { 123 | int index; 124 | double number; 125 | ArrayObject *result = NULL, *a1; 126 | HPy h_result = HPy_NULL; 127 | 128 | if (!HPyNumber_Check(ctx, h2)) { 129 | return HPy_NULL; 130 | } 131 | a1 = ArrayObject_AsStruct(ctx, h1); 132 | number = HPyFloat_AsDouble(ctx, h2); 133 | h_result = Array_empty(ctx, a1->size, &result); 134 | for (index = 0; index < a1->size; index++) { 135 | result->data[index] = a1->data[index] / number; 136 | } 137 | return h_result; 138 | }; 139 | 140 | 141 | HPyDef_SLOT(Array_length, HPy_sq_length) 142 | HPy_ssize_t Array_length_impl(HPyContext *ctx, HPy h_arr) { 143 | ArrayObject *arr = ArrayObject_AsStruct(ctx, h_arr); 144 | HPy_ssize_t result = (HPy_ssize_t)arr->size; 145 | return result; 146 | }; 147 | 148 | 149 | HPyDef_SLOT(Array_item, HPy_sq_item) 150 | HPy Array_item_impl(HPyContext *ctx, HPy h_arr, HPy_ssize_t index) { 151 | ArrayObject *arr = ArrayObject_AsStruct(ctx, h_arr); 152 | if (index < 0 || index >= arr->size) { 153 | HPyErr_SetString(ctx, ctx->h_IndexError, "index out of range"); 154 | return HPy_NULL; 155 | } 156 | HPy item = HPyFloat_FromDouble(ctx, arr->data[index]); 157 | return item; 158 | }; 159 | 160 | HPyDef_SLOT(Array_setitem, HPy_sq_ass_item) 161 | int Array_setitem_impl(HPyContext *ctx, HPy h_arr, HPy_ssize_t index, HPy h_item) { 162 | ArrayObject *arr = ArrayObject_AsStruct(ctx, h_arr); 163 | if (index < 0 || index >= arr->size) { 164 | HPyErr_SetString(ctx, ctx->h_IndexError, "index out of range"); 165 | return -1; 166 | } 167 | double value = HPyFloat_AsDouble(ctx, h_item); 168 | if (HPyErr_Occurred(ctx)) 169 | return -1; 170 | arr->data[index] = value; 171 | return 0; 172 | }; 173 | 174 | 175 | HPyDef_SLOT_IMPL(Array_new, HPyType_GenericNew, HPy_tp_new) 176 | 177 | static HPyDef *Array_defines[] = { 178 | // slots 179 | &Array_new, 180 | &Array_init, 181 | &Array_destroy, 182 | &Array_add, 183 | &Array_multiply, 184 | &Array_divide, 185 | &Array_item, 186 | &Array_setitem, 187 | &Array_length, 188 | // members 189 | &Array_size, 190 | // methods 191 | &Array_tolist, 192 | NULL 193 | }; 194 | 195 | static HPyType_Spec Array_type_spec = { 196 | .name = "_piconumpy_hpy.array", 197 | .basicsize = sizeof(ArrayObject), 198 | .itemsize = 0, 199 | .flags = HPy_TPFLAGS_DEFAULT, 200 | .defines = Array_defines, 201 | }; 202 | 203 | static HPyGlobal ArrayType; 204 | 205 | static HPy Array_empty(HPyContext *ctx, int size, ArrayObject **result) { 206 | ArrayObject *new_array; 207 | HPy h_ArrayType = HPyGlobal_Load(ctx, ArrayType); 208 | HPy h_new_array = HPy_New(ctx, h_ArrayType, &new_array); 209 | HPy_Close(ctx, h_ArrayType); 210 | new_array->size = size; 211 | new_array->data = (double *)malloc(size * sizeof(double)); 212 | if (new_array->data == NULL) { 213 | return HPyErr_NoMemory(ctx); 214 | } 215 | *result = new_array; 216 | return h_new_array; 217 | }; 218 | 219 | HPyDef_METH(empty, "empty", HPyFunc_O, .doc = "Create an empty array") 220 | static HPy empty_impl(HPyContext *ctx, HPy module, HPy arg) { 221 | int size; 222 | ArrayObject *result; 223 | size = (int)HPyLong_AsLong(ctx, arg); 224 | return Array_empty(ctx, size, &result); 225 | }; 226 | 227 | HPyDef_METH(zeros, "zeros", HPyFunc_O, .doc = "Create a zero-filled array") 228 | static HPy zeros_impl(HPyContext *ctx, HPy module, HPy arg) { 229 | int size; 230 | ArrayObject *result = NULL; 231 | size = (int)HPyLong_AsLong(ctx, arg); 232 | HPy h_result = Array_empty(ctx, size, &result); 233 | if (HPy_IsNull(h_result)) 234 | return HPy_NULL; 235 | for(int i=0; idata[i] = 0; 237 | return h_result; 238 | }; 239 | 240 | HPyDef_SLOT(_piconumpy_hpy_exec, HPy_mod_exec) 241 | static int _piconumpy_hpy_exec_impl(HPyContext *ctx, HPy hm) { 242 | HPy h_ArrayType = HPyType_FromSpec(ctx, &Array_type_spec, NULL); 243 | if (HPy_IsNull(h_ArrayType)) { 244 | return 1; 245 | } 246 | if (HPy_SetAttr_s(ctx, hm, "array", h_ArrayType) != 0) { 247 | HPy_Close(ctx, h_ArrayType); 248 | return 1; 249 | } 250 | HPyGlobal_Store(ctx, &ArrayType, h_ArrayType); 251 | return 0; 252 | } 253 | 254 | static HPyDef *module_defines[] = { 255 | &_piconumpy_hpy_exec, 256 | &empty, 257 | &zeros, 258 | NULL 259 | }; 260 | 261 | static HPyGlobal *module_globals[] = { 262 | &ArrayType, 263 | NULL 264 | }; 265 | 266 | static HPyModuleDef piconumpymodule = { 267 | .doc = "piconumpy implemented with the HPy API.", 268 | .defines = module_defines, 269 | .globals = module_globals, 270 | }; 271 | 272 | HPy_MODINIT(_piconumpy_hpy, piconumpymodule) 273 | -------------------------------------------------------------------------------- /piconumpy/bench.py: -------------------------------------------------------------------------------- 1 | from transonic.util import timeit 2 | 3 | 4 | def timeit_verbose( 5 | stmt, 6 | setup="pass", 7 | total_duration=2, 8 | globals=None, 9 | norm=None, 10 | name=None, 11 | print_time=False, 12 | max_length_name=33, 13 | ): 14 | result = timeit( 15 | stmt, setup=setup, total_duration=total_duration, globals=globals 16 | ) 17 | if norm is None: 18 | norm = result 19 | norm_given = False 20 | else: 21 | norm_given = True 22 | 23 | if name is None: 24 | name = stmt.split("(")[0] 25 | 26 | fmt_name = f"{{:{max_length_name}s}}" 27 | name = fmt_name.format(name) 28 | 29 | if print_time: 30 | raw_time = f" = {result:7.3g} s" 31 | else: 32 | raw_time = "" 33 | 34 | print(f"{name}: {result/norm:5.3g} * norm{raw_time}") 35 | if not norm_given and not print_time: 36 | print(f"norm = {norm:5.3g} s") 37 | 38 | return result 39 | -------------------------------------------------------------------------------- /piconumpy/purepy.py: -------------------------------------------------------------------------------- 1 | class array: 2 | __slots__ = ["data", "size"] 3 | 4 | def __init__(self, data): 5 | self.data = list(float(number) for number in data) 6 | self.size = len(self.data) 7 | 8 | def __add__(self, other): 9 | return array( 10 | number + other.data[index] for index, number in enumerate(self.data) 11 | ) 12 | 13 | def __mul__(self, other): 14 | return array(other * number for number in self.data) 15 | 16 | __rmul__ = __mul__ 17 | 18 | def __truediv__(self, other): 19 | return array(number / other for number in self.data) 20 | 21 | def tolist(self): 22 | return list(self.data) 23 | 24 | def __len__(self): 25 | return len(self.data) 26 | 27 | def __getitem__(self, index): 28 | return self.data[index] 29 | 30 | def __setitem__(self, index, value): 31 | self.data[index] = value 32 | 33 | def empty(size): 34 | return array([0]*size) 35 | 36 | def zeros(size): 37 | return array([0]*size) 38 | 39 | -------------------------------------------------------------------------------- /piconumpy/purepy_array.py: -------------------------------------------------------------------------------- 1 | import array as _array 2 | 3 | 4 | class array(_array.array): 5 | __slots__ = ["size"] 6 | 7 | def __new__(cls, *args): 8 | return super().__new__(cls, "f", *args) 9 | 10 | def __init__(self, data): 11 | self.size = len(self) 12 | 13 | def __add__(self, other): 14 | return self.__class__( 15 | number + other[index] for index, number in enumerate(self) 16 | ) 17 | 18 | def __mul__(self, other): 19 | return self.__class__(other * number for number in self) 20 | 21 | __rmul__ = __mul__ 22 | 23 | def __truediv__(self, other): 24 | return self.__class__(number / other for number in self) 25 | 26 | def empty(size): 27 | return array([0]*size) 28 | 29 | def zeros(size): 30 | return array([0]*size) 31 | -------------------------------------------------------------------------------- /piconumpy/test_cpython_capi.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from . import _piconumpy_cpython_capi 5 | 6 | 7 | class Tests: 8 | piconumpy = _piconumpy_cpython_capi 9 | def _array(self, *args): 10 | return self.piconumpy.array(*args) 11 | 12 | def test_empty(self): 13 | a = self.piconumpy.empty(12) 14 | assert isinstance(a, self.piconumpy.array) 15 | assert a.size == 12 16 | 17 | def test_zeros(self): 18 | a = self.piconumpy.zeros(5) 19 | assert isinstance(a, self.piconumpy.array) 20 | assert a.size == 5 21 | assert a.tolist() == [0, 0, 0, 0, 0] 22 | 23 | def test_init_array(self): 24 | a = self._array([1.0, 2.0]) 25 | assert a.size == 2 26 | 27 | def test_getitem_setitem(self): 28 | a = self._array([12.0, 34.0]) 29 | assert a[0] == 12.0 30 | assert a[1] == 34.0 31 | a[0] = 56 32 | a[1] = 78 33 | assert a[0] == 56.0 34 | assert a[1] == 78.0 35 | # 36 | with pytest.raises(IndexError): 37 | a[2] 38 | with pytest.raises(IndexError): 39 | a[2] = 3 40 | 41 | def test_init_array_numpy(self): 42 | np_a = np.array([1.0, 2.0, 0.0, 0.0]) 43 | a = self._array(np_a.tolist()) 44 | assert a.size == np_a.size 45 | assert a.tolist() == np_a.tolist() 46 | 47 | def test_multiply(self): 48 | a = self._array([1.0, 2.0]) 49 | # works with PyPy 7.3.6-alpha0 50 | assert (a * 3).tolist() == [3.0, 6.0] 51 | # error with PyPy 7.3.6-alpha0 52 | assert (2 * a).tolist() == [2.0, 4.0] 53 | 54 | def test_add(self): 55 | a = self._array([1.0, 2.0]) 56 | # works with PyPy 7.3.6-alpha0 57 | assert (a + a * 2).tolist() == [3.0, 6.0] 58 | # error with PyPy 7.3.6-alpha0 59 | assert (a + 2 * a).tolist() == [3.0, 6.0] 60 | 61 | def test_divide(self): 62 | a = self._array([1.0, 2.0]) 63 | assert (a / 2).tolist() == [0.5, 1.0] 64 | 65 | def test_sequence(self): 66 | a = self._array([1.0, 2.0]) 67 | assert len(a) == 2 68 | assert a[1] == 2.0 69 | -------------------------------------------------------------------------------- /piconumpy/test_cython.py: -------------------------------------------------------------------------------- 1 | from .test_cpython_capi import Tests as _Tests 2 | 3 | class Tests(_Tests): 4 | from . import _piconumpy_cython as piconumpy 5 | -------------------------------------------------------------------------------- /piconumpy/test_hpy_cpy_abi.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from .test_cpython_capi import Tests as _Tests 4 | 5 | try: 6 | from . import _piconumpy_hpy as piconumpy_cpy_abi 7 | except ImportError: 8 | piconumpy_cpy_abi = False 9 | 10 | 11 | @pytest.mark.skipif(not piconumpy_cpy_abi, reason="ImportError _piconumpy_hpy") 12 | class TestsCPyABI(_Tests): 13 | piconumpy = piconumpy_cpy_abi 14 | -------------------------------------------------------------------------------- /piconumpy/test_hpy_universal.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | from .test_cpython_capi import Tests as _Tests 5 | 6 | try: 7 | from . import _piconumpy_hpy 8 | except ImportError: 9 | _piconumpy_hpy = False 10 | 11 | 12 | @pytest.mark.skipif( 13 | not _piconumpy_hpy, reason="ImportError piconumpy HPy Universal" 14 | ) 15 | class TestsCPyABI(_Tests): 16 | piconumpy = _piconumpy_hpy 17 | -------------------------------------------------------------------------------- /piconumpy/test_purepy.py: -------------------------------------------------------------------------------- 1 | from .test_cpython_capi import Tests as _Tests 2 | 3 | class Tests(_Tests): 4 | from . import purepy as piconumpy 5 | -------------------------------------------------------------------------------- /piconumpy/test_purepy_array.py: -------------------------------------------------------------------------------- 1 | from .test_cpython_capi import Tests as _Tests 2 | 3 | class Tests(_Tests): 4 | from . import purepy_array as piconumpy 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "piconumpy" 3 | version = "0.0.0" 4 | description = "An experiment about Numpy and pyhandle/hpy." 5 | authors = [ 6 | {name = "Pierre Augier", email = "pierre.augier@univ-grenoble-alpes.fr"}, 7 | ] 8 | license = {text = "BSD 3-Clause"} 9 | readme = "README.md" 10 | keywords = ["numpy", "hpy", "PyPy"] 11 | requires-python = ">=3.8" 12 | 13 | [project.urls] 14 | homepage = "https://github.com/paugier/piconumpy" 15 | repository = "https://github.com/paugier/piconumpy" 16 | documentation = "https://github.com/paugier/piconumpy" 17 | 18 | [project.optional-dependencies] 19 | dev = ['transonic', 'numpy', 'pytest', 'pythran'] 20 | full = ['black'] 21 | 22 | [build-system] 23 | requires = [ 24 | "setuptools >= 35.0.2", 25 | "wheel", 26 | "cython", 27 | "hpy >= 0.9.0" 28 | ] 29 | 30 | [tool.black] 31 | line-length = 82 32 | target_version = ['py310'] 33 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages, Extension 2 | from Cython.Build import cythonize 3 | 4 | setup( 5 | name="piconumpy", 6 | packages=find_packages(exclude=["bench"]), 7 | # Workaround: HPy adds files to the sources list and uses absolute paths. 8 | # Newer setuptools complain about that if package data should be included. 9 | # Therefore, we explicitly disable this here. 10 | include_package_data=False, 11 | ext_modules=[ 12 | Extension( 13 | "piconumpy._piconumpy_cpython_capi", 14 | ["piconumpy/_piconumpy_cpython_capi.c"], 15 | extra_compile_args=[ 16 | "-Wfatal-errors", # stop after one error (unrelated to warnings) 17 | "-Werror", # turn warnings into errors (all, for now) 18 | ], 19 | ), 20 | *cythonize("piconumpy/_piconumpy_cython.pyx"), 21 | ], 22 | hpy_ext_modules=[ 23 | Extension( 24 | "piconumpy._piconumpy_hpy", 25 | ["piconumpy/_piconumpy_hpy.c"], 26 | extra_compile_args=[ 27 | "-Wfatal-errors", 28 | "-Werror", 29 | ], 30 | ), 31 | ], 32 | ) 33 | --------------------------------------------------------------------------------