├── .gitignore ├── .travis.yml ├── ChangeLog.rst ├── HACKING.rst ├── LICENSE ├── MANIFEST.in ├── README ├── README.rst ├── ceygen ├── __init__.py ├── core.pxd ├── core.pyx ├── dispatch.h ├── dispatch.pxd ├── dtype.pxd ├── dtype.pyx ├── eigen_cpp.h ├── eigen_cython.pxd ├── elemwise.pxd ├── elemwise.pyx ├── llt.pxd ├── llt.pyx ├── lu.pxd ├── lu.pyx ├── reductions.pxd ├── reductions.pyx └── tests │ ├── __init__.py │ ├── __main__.py │ ├── bench.pyx │ ├── support.py │ ├── test_core.pyx │ ├── test_dispatch.pyx │ ├── test_dtype.pyx │ ├── test_elemwise.pyx │ ├── test_llt.pyx │ ├── test_lu.pyx │ └── test_reductions.pyx ├── doc ├── .gitignore ├── ChangeLog.rst ├── HACKING.rst ├── Makefile ├── README.rst ├── conf.py ├── core.rst ├── definitions.rst ├── elemwise.rst ├── index.rst ├── llt.rst ├── lu.rst ├── make.bat └── reductions.rst ├── setup.py └── support ├── __init__.py ├── compare_ceygen_numpy.py ├── dist.py ├── dist_cmd_build_ext.py ├── dist_cmd_test.py └── visualize_stats.py /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | *.cpp 3 | *.pyc 4 | *.html 5 | /setup.cfg 6 | /MANIFEST 7 | /dist/ 8 | *.pickle 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - 2.6 5 | - 2.7 6 | - 3.2 7 | - 3.3 8 | 9 | install: 10 | - sudo apt-get install -qq libeigen3-dev 11 | - pip install cython==0.19.2 --use-mirrors 12 | - cython --version 13 | 14 | script: 15 | - BENCHMARK=1 python setup.py -v test 16 | -------------------------------------------------------------------------------- /ChangeLog.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Ceygen Change Log 3 | ================= 4 | 5 | This file mentions changes between Ceygen versions that are important for its users. Most 6 | recent versions and changes are mentioned on top. 7 | 8 | .. currentmodule:: ceygen 9 | 10 | Changes in 0.4 since 0.3 11 | ======================== 12 | 13 | Changes in 0.3 since 0.2 14 | ======================== 15 | 16 | * :func:`~core.eigen_version` function introduced to get Eigen version. 17 | * :mod:`~ceygen.llt` module introduced with Cholesky matrix decomposition. 18 | * :obj:`~dtype.dtype` enhanced to provide C char, short, int, long and float types in 19 | addition to C double type. :obj:`~dtype.nonint_dtype` introduced for non-integer 20 | numeric types. If you get *no suitable method found* or *Invalid use of fused types, 21 | type cannot be specialized* Cython errors, specify the specialization explicitly: 22 | ``ceygen.elemwise.add_vv[double](np.array(...), np.array(...))``. This unfortunately 23 | slows down compilation and makes resulting modules bigger, but doesn't affect 24 | performance and makes Ceygen more generic. 25 | * :func:`~elemwise.power_vs` and :func:`~elemwise.power_ms` functions were added to the 26 | :mod:`~ceygen.reductions` module. 27 | 28 | Changes in 0.2 since 0.1 29 | ======================== 30 | 31 | * :mod:`~ceygen.reductions` module was added with vector, matrix, row-wise and column-wise 32 | sums. 33 | * Simple benchmarks for many functions have been added, define ``BENCHMARK`` or 34 | ``BENCHMARK_NUMPY`` environment variable during test execution to run them; define 35 | ``SAVE`` environment variable to save timings into ``.pickle`` files that can be 36 | visualized by ``support/visualize_stats.py``. 37 | * Added code paths optimized for C-contiguous and F-contiguous matrices and vectors using 38 | fancy C++ dispatching code. Rougly 40% speed gains in :func:`core.dot_mm` (for common 39 | matrix sizes), 300% gains for :func:`core.dot_mv` and :func:`core.dot_vm` starting with 40 | 16\*16, 30% gains for vector-vector operations and slight gains at other places. 41 | * Internal Ceygen .pxd files (e.g. ``eigen_cython.pxd``) are no longer installed. 42 | * ``-fopenmp`` is now added by default to `build_ext` ``cflags`` and ``ldflags`` to 43 | enable parellelising :func:`core.dot_mm` in Eigen; speedups are noticeable for matrices 44 | 64\*64 and bigger. Can be easily disabled. 45 | * :func:`dtype.vector` and :func:`dtype.matrix` convenience functions added; their usage 46 | in other modules leads to speedups because it circumvents Cython shortcoming. 47 | * :func:`core.set_is_malloc_allowed` added to aid in debugging and tests. 48 | -------------------------------------------------------------------------------- /HACKING.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | Ceygen Development 3 | ================== 4 | 5 | This document should serve as a reminder to me and other possible Ceygen 6 | hackers about Ceygen coding style and conventions. 7 | 8 | Development Guidelines 9 | ====================== 10 | 11 | Some special and important files: 12 | 13 | * ``eigen_cpp.h`` - low-level implementation of a tiny C++ Eigen subclass that 14 | is used to create wrappers around Cython arrays. 15 | * ``eigen_cython.pxd`` - exports BaseMap C++ class defined in `eigen_cpp.h` 16 | to Cython along with other Eigen methods. 17 | * ``dtype.{pxd,pyx}`` - defines the base scalar fused (template-like) type 18 | that all other functions use, along with functions to create vectors and 19 | matrices. 20 | * ``dispatch.{h,pxd}`` - contains fancy code and Cython declarations for 21 | so-called dispatchers: tiny helpers that call more optimized Eigen functions 22 | (in fact, the same functions with different template parameters) for 23 | column-contiguous, row-contiguous matrices and contiguous vectors. 24 | 25 | All other \*.{pxd,pyx} are public Ceygen modules. 26 | 27 | Please always use appropriate \*Dispatcher from `dispatch.pxd` instead of 28 | calling methods from `eigen_cython.pxd` directly, because declarations from 29 | `eigen_cython.pxd` don't contain ``except +`` keyword for performance reasons 30 | (i.e. you would leak C++ exceptions raised by Eigen code without converting 31 | them to Python exceptions). 32 | 33 | Tests and Stress Tests 34 | ====================== 35 | 36 | All public functions should have a unit test. Suppose you have a module 37 | ``ceygen/modname.pyx``, then unit tests for all functions in ``modname.pyx`` 38 | should go into ``ceygen/tests/test_modname.py``. There is a couple of 39 | "standard" environment variables recognized in tests: 40 | 41 | * ``BENCHMARK`` - run potentially time-consuming benchmarks of Ceygen code 42 | * ``BENCHMARK_NUMPY`` - also run some benchmarks with NumPy backend to see 43 | difference 44 | * ``SAVE`` - save timings into ``.pickle`` files that can be visualized by 45 | ``support/visualize_stats.py``. 46 | 47 | Releasing Ceygen 48 | ================ 49 | 50 | Things to do when releasing new version (let it be **X.Y**) of Ceygen: 51 | 52 | Before Tagging 53 | -------------- 54 | 55 | 1. Set version to **X.Y** in `setup.py` (around line 37) 56 | #. Ensure `ChangeLog.rst` mentions all important changes 57 | #. Ensure that `README.rst` is up-to-date 58 | #. (Optional) update **short description** in `setup.py` 59 | #. (Optional) update **long description** `README.rst` 60 | 61 | Tagging & Publishing 62 | -------------------- 63 | 64 | 1. Do ``./setup.py sdist`` and check contents, unpack somewhere, run tests incl. 65 | benchmarks 66 | #. git tag -s **vX.Y** 67 | #. ./setup.py register sdist upload --sign 68 | #. Build and upload docs: ``cd ../ceygen-doc && ./synchronise.sh`` 69 | #. If **short description** changed, update it manually at following places: 70 | 71 | * https://github.com/strohel/Ceygen 72 | #. If **long description** changed, update it manually at following places: 73 | 74 | * http://scipy.org/Topical_Software 75 | * http://www.ohloh.net/p/ceygen 76 | 77 | After 78 | ----- 79 | 80 | 1. Set version to **$NEXT_VERSION-pre** in `setup.py` 81 | #. Add header for the next version into `ChangeLog.rst` 82 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Matěj Laitl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include ceygen *.pxd *.h 2 | recursive-include doc *.py *.rst *.bat Makefile 3 | recursive-include support *.py 4 | include *.rst 5 | exclude setup.cfg 6 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | README.rst -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ====== 2 | Ceygen 3 | ====== 4 | 5 | *Note: this software is not maintained since 2015.* 6 | 7 | About 8 | ===== 9 | 10 | Ceygen is a binary Python extension module for linear algebra with Cython_ `typed 11 | memoryviews`_. Ceygen is built atop the `Eigen C++ library`_. Ceygen is **not** a Cython 12 | wrapper or an interface to Eigen! 13 | 14 | The name Ceygen is a rather poor wordplay on Cython + Eigen; it has nothing to do 15 | with software piracy. Ceygen is distributed under the `MIT license`_. 16 | 17 | Cython was developed by Matěj Laitl with support from the `Institute of Information 18 | Theory and Automation, Academy of Sciences of the Czech Republic`_. Feel free to send me 19 | a mail to matej at laitl dot cz. 20 | 21 | .. _Cython: http://cython.org/ 22 | .. _`typed memoryviews`: http://docs.cython.org/src/userguide/memoryviews.html 23 | .. _`Eigen C++ library`: http://eigen.tuxfamily.org/ 24 | .. _`MIT license`: https://choosealicense.com/licenses/mit/ 25 | .. _`Institute of Information Theory and Automation, Academy of Sciences of the Czech Republic`: 26 | http://www.utia.cas.cz/ 27 | 28 | Features 29 | ======== 30 | 31 | Ceygen... 32 | 33 | * **is fast** - Ceygen's primary raison d'être is to provide overhead-free algebraic 34 | operations for Cython projects that work with `typed memoryviews`_ (especially 35 | small-sized). For every function there is a code-path where no Python function is 36 | called, no memory is allocated on heap and no data is copied. 37 | `Eigen itself performs rather well`_, too. 38 | * **is documented** - see `Documentation`_ or hop directly to `on-line docs`_. 39 | * **supports various data types** - Ceygen uses Cython `fused types`_ (a.k.a. wannabe 40 | templates) along with Eigen's template nature to support various data types without 41 | duplicating code. While just a few types are pre-defined (float, double, ...), adding 42 | a new type is a matter of adding 3 lines and rebuilding Ceygen. 43 | * **is extensively tested** - Ceygen's test suite validates every its public method, 44 | including errors raised on invalid input. Thanks to Travis CI, `every push is 45 | automatically tested`_ against **Python 2.6**, **2.7**, **3.2** and **3.3**. 46 | * **is multithreading-friendly** - Every Ceygen function doesn't acquire the GIL_ 47 | unless it needs to create a Python object (always avoidable); all functions are 48 | declared nogil_ so that you can call them in prange_ blocks without losing parallelism. 49 | * **provides descriptive error messages** - Care is taken to propagate all errors 50 | properly (down from Eigen) so that you are not stuck debugging your program. Ceygen 51 | functions don't crash on invalid input but rather raise reasonable errors. 52 | * works well with NumPy_, but doesn't depend on it. You don't need NumPy to build or run 53 | Ceygen, but thanks to Cython, `Cython memoryviews and NumPy arrays`_ are fully 54 | interchangeable without copying the data (where it is possible). The test suite 55 | currently makes use of NumPy because of our laziness. :-) 56 | 57 | .. _`Eigen itself performs rather well`: http://eigen.tuxfamily.org/index.php?title=Benchmark 58 | .. _`on-line docs`: http://strohel.github.com/Ceygen-doc/ 59 | .. _`fused types`: http://docs.cython.org/src/userguide/fusedtypes.html 60 | .. _`every push is automatically tested`: https://travis-ci.org/strohel/Ceygen 61 | .. _GIL: http://docs.python.org/glossary.html#term-global-interpreter-lock 62 | .. _nogil: http://docs.cython.org/src/userguide/external_C_code.html#declaring-a-function-as-callable-without-the-gil 63 | .. _prange: http://docs.cython.org/src/userguide/parallelism.html 64 | .. _NumPy: http://www.numpy.org/ 65 | .. _`Cython memoryviews and NumPy arrays`: http://docs.cython.org/src/userguide/memoryviews.html#coercion-to-numpy 66 | 67 | On the other hand, Ceygen... 68 | 69 | * **depends on Eigen build-time**. Ceygen expects *Eigen 3* headers to be installed under 70 | ``/usr/lib/eigen3`` when it is being built. Installing Eigen is a matter of unpacking 71 | it, because it is a pure template library defined solely in the headers. Ceygen doesn't 72 | reference Eigen at all at runtime because all code is complited in. 73 | * **still provides a very little subset of Eigen functionality**. We add new functions 74 | only as we need them in another projects, but we believe that the hard part is the 75 | infrastructure - implementing a new function should be rather straightforward (with 76 | decent Cython and C++ knowledge). We're very open to pull requests! 77 | (do include unit tests in them) 78 | * **needs recent Cython** (currently at least 0.19.1) to compile. If this is a problem, 79 | you can distribute .cpp files or final Python extension module instead. 80 | * **doesn't bring Eigen's elegance to Cython** - if you think of lazy evaluation and 81 | advanced expessions, stop dreaming. Ceygen will make your code faster, not nicer. 82 | `Array expessions`_ will help here. 83 | 84 | .. _`Array expessions`: https://github.com/cython/cython/pull/144 85 | 86 | A simple example to compute matrix product within a big matrix may look like 87 | 88 | >>> cdef double[:, :] big = np.array([[1., 2., 2., 0., 0., 0.], 89 | >>> [3., 4., 0., -2., 0., 0.]]) 90 | >>> ceygen.core.dot_mm(big[:, 0:2], big[:, 2:4], big[:, 4:6]) 91 | [[ 2. -4.] 92 | [ 6. -8.]] 93 | >>> big 94 | [[ 1. 2. 2. 0. 2. -4.] 95 | [ 3. 4. 0. -2. 6. -8.]], 96 | 97 | where the `dot_mm`_ call above doesn't copy any data, allocates no memory on heap, doesn't 98 | need the GIL_ and uses vectorization (SSE, AltiVec...) to get the best out of your 99 | processor. 100 | 101 | .. _`dot_mm`: http://strohel.github.com/Ceygen-doc/core.html#ceygen.core.dot_mm 102 | 103 | Obtaining 104 | ========= 105 | 106 | Ceygen development happens in `its github repository`_, ``git clone 107 | git@github.com:strohel/Ceygen.git`` -ing is the preferred way to get it as you'll have 108 | the latest & greatest version (which shouldn't break thanks to continuous integration). 109 | Released versions are available from `Ceygen's PyPI page`_. 110 | 111 | .. _`its github repository`: https://github.com/strohel/Ceygen 112 | .. _`Ceygen's PyPI page`: http://pypi.python.org/pypi/Ceygen 113 | 114 | Building 115 | ======== 116 | 117 | Ceygen uses standard Distutils to build, test and install itself, simply run: 118 | 119 | * ``python setup.py build`` to build Ceygen 120 | * ``python setup.py test`` to test it (inside build directory) 121 | * ``python setup.py install`` to install it 122 | * ``python setup.py clean`` to clean generated object, .cpp and .html files (perhaps to 123 | force recompilation) 124 | 125 | Commands can be combined, automatically call dependent commands and can take options, 126 | the recommended combo to safely install Ceygen is therefore ``python setup.py -v test install``. 127 | 128 | Building Options 129 | ---------------- 130 | 131 | You can set various build options as it is usual with distutils, see 132 | ``python setup.py --help``. Notable is the `build_ext` command and its `--include-dirs` 133 | (standard) and following additional options (whose are Ceygen extensions): 134 | 135 | --include-dirs 136 | defaults to `/usr/include/eigen3` and must be specified if you've installed Eigen 3 137 | to a non-standard directory, 138 | 139 | --cflags 140 | defaults to `-O2 -march=native -fopenmp`. Please note that it is important to enable 141 | optimizations and generation of appropriate MMX/SSE/altivec-enabled code as the actual 142 | computation code from Eigen is built along with the boilerplate Ceygen code, 143 | 144 | --ldflags 145 | additional flags to pass to linker, defaults to `-fopenmp`. Use standard `--libraries` 146 | for specifying extra libraries to link against, 147 | 148 | --annotate 149 | pass `--annotate` to Cython to produce annotated HTML files during compiling. Only 150 | useful during Ceygen development. 151 | 152 | You may want to remove `-fopenmp` from `cflags` and `ldflags` if you are already 153 | parallelising above Ceygen. The resulting command could look like ``python setup.py -v 154 | build_ext --include-dirs=/usr/local/include/eigen3 --cflags="-O3 -march=core2" --ldflags= 155 | test``. The same could be achieved by putting the options to a `setup.cfg` file:: 156 | 157 | [build_ext] 158 | include_dirs = /usr/local/include/eigen3 159 | cflags = -O3 -march=core2 160 | ldflags = 161 | 162 | Documentation 163 | ============= 164 | 165 | Ceygen documentation is maintained in reStructuredText_ format under ``doc/`` directory 166 | and can be exported into a variety of formats using Sphinx_ (version at least 1.0 needed). 167 | Just type ``make`` in that directory to see a list of supported formats and for example 168 | ``make html`` to build HTML pages with the documentation. 169 | 170 | See ``ChangeLog.rst`` file for changes between versions or `view it online`_. 171 | 172 | **On-line documentation** is available at http://strohel.github.com/Ceygen-doc/ 173 | 174 | .. _reStructuredText: http://sphinx-doc.org/rest.html 175 | .. _Sphinx: http://sphinx-doc.org/ 176 | .. _`view it online`: http://strohel.github.com/Ceygen-doc/ChangeLog.html 177 | 178 | Bugs 179 | ==== 180 | 181 | Please report any bugs you find and suggestions you may have to `Ceygen's github Issue 182 | Tracker`_. 183 | 184 | .. _`Ceygen's github Issue Tracker`: https://github.com/strohel/Ceygen/issues 185 | -------------------------------------------------------------------------------- /ceygen/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """TODO: docs""" 4 | -------------------------------------------------------------------------------- /ceygen/core.pxd: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from dtype cimport dtype 4 | 5 | 6 | cpdef bint set_is_malloc_allowed(bint allowed) nogil 7 | cpdef tuple eigen_version() 8 | 9 | cdef dtype dot_vv(dtype[:] x, dtype[:] y) nogil except * 10 | cdef dtype[:] dot_mv(dtype[:, :] x, dtype[:] y, dtype[:] out = *) nogil 11 | cdef dtype[:] dot_vm(dtype[:] x, dtype[:, :] y, dtype[:] out = *) nogil 12 | cdef dtype[:, :] dot_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = *) nogil 13 | -------------------------------------------------------------------------------- /ceygen/core.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | cimport cython 4 | 5 | from eigen_cython cimport * 6 | from dispatch cimport * 7 | from dtype cimport vector, matrix 8 | 9 | 10 | cpdef bint set_is_malloc_allowed(bint allowed) nogil: 11 | c_set_is_malloc_allowed(allowed) 12 | 13 | cpdef tuple eigen_version(): 14 | return (EIGEN_WORLD_VERSION, EIGEN_MAJOR_VERSION, EIGEN_MINOR_VERSION) 15 | 16 | 17 | cdef void dot_vv_worker( 18 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 19 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YVectorContiguity y_dummy, 20 | dtype *o) nogil: 21 | cdef VectorMap[dtype, XVectorContiguity] x 22 | cdef VectorMap[dtype, YVectorContiguity] y 23 | x.init(x_data, x_shape, x_strides) 24 | y.init(y_data, y_shape, y_strides) 25 | o[0] = x.dot(y) 26 | 27 | @cython.boundscheck(False) 28 | @cython.wraparound(False) 29 | cdef dtype dot_vv(dtype[:] x, dtype[:] y) nogil except *: 30 | cdef VVSDispatcher[dtype] dispatcher 31 | cdef dtype out 32 | dispatcher.run(&x[0], x.shape, x.strides, &y[0], y.shape, y.strides, &out, 33 | dot_vv_worker, dot_vv_worker, dot_vv_worker, dot_vv_worker) 34 | return out 35 | 36 | 37 | cdef void dot_mv_worker( 38 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 39 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YVectorContiguity y_dummy, 40 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy) nogil: 41 | cdef MatrixMap[dtype, XMatrixContiguity] x 42 | cdef VectorMap[dtype, YVectorContiguity] y 43 | cdef VectorMap[dtype, OVectorContiguity] o 44 | x.init(x_data, x_shape, x_strides) 45 | y.init(y_data, y_shape, y_strides) 46 | o.init(o_data, o_shape, o_strides) 47 | o.noalias_assign(x * y) 48 | 49 | @cython.boundscheck(False) 50 | @cython.wraparound(False) 51 | cdef dtype[:] dot_mv(dtype[:, :] x, dtype[:] y, dtype[:] out = None) nogil: 52 | cdef MVVDispatcher[dtype] dispatcher 53 | if out is None: 54 | out = vector(x.shape[0], &y[0]) 55 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0], y.shape, y.strides, 56 | &out[0], out.shape, out.strides, 57 | dot_mv_worker, dot_mv_worker, dot_mv_worker, dot_mv_worker, dot_mv_worker, dot_mv_worker, 58 | dot_mv_worker, dot_mv_worker, dot_mv_worker, dot_mv_worker, dot_mv_worker, dot_mv_worker) 59 | return out 60 | 61 | 62 | cdef void dot_vm_worker( 63 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YMatrixContiguity y_dummy, 64 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 65 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy) nogil: 66 | cdef RowVectorMap[dtype, XVectorContiguity] x 67 | cdef MatrixMap[dtype, YMatrixContiguity] y 68 | cdef RowVectorMap[dtype, OVectorContiguity] o 69 | x.init(x_data, x_shape, x_strides) 70 | y.init(y_data, y_shape, y_strides) 71 | o.init(o_data, o_shape, o_strides) 72 | o.noalias_assign(x * y) 73 | 74 | @cython.boundscheck(False) 75 | @cython.wraparound(False) 76 | cdef dtype[:] dot_vm(dtype[:] x, dtype[:, :] y, dtype[:] out = None) nogil: 77 | # we coul've just called dotmv(y.T, x, out), but y.T segfaults if y is uninitialized 78 | # memoryview, also we fear overhead in memoryview.transpose() and another function call 79 | cdef MVVDispatcher[dtype] dispatcher 80 | if out is None: 81 | out = vector(y.shape[1], &x[0]) 82 | # warning: we swap x and y here so that we can share dispather with dot_mv! 83 | dispatcher.run(&y[0, 0], y.shape, y.strides, &x[0], x.shape, x.strides, 84 | &out[0], out.shape, out.strides, 85 | dot_vm_worker, dot_vm_worker, dot_vm_worker, dot_vm_worker, dot_vm_worker, dot_vm_worker, 86 | dot_vm_worker, dot_vm_worker, dot_vm_worker, dot_vm_worker, dot_vm_worker, dot_vm_worker) 87 | return out 88 | 89 | 90 | cdef void dot_mm_worker( 91 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 92 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YMatrixContiguity y_dummy, 93 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy) nogil: 94 | cdef MatrixMap[dtype, XMatrixContiguity] x 95 | cdef MatrixMap[dtype, YMatrixContiguity] y 96 | cdef MatrixMap[dtype, OMatrixContiguity] o 97 | x.init(x_data, x_shape, x_strides) 98 | y.init(y_data, y_shape, y_strides) 99 | o.init(o_data, o_shape, o_strides) 100 | o.noalias_assign(x * y) 101 | 102 | @cython.boundscheck(False) 103 | @cython.wraparound(False) 104 | cdef dtype[:, :] dot_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = None) nogil: 105 | cdef MMMDispatcher[dtype] dispatcher 106 | if out is None: 107 | out = matrix(x.shape[0], y.shape[1], &x[0, 0]) 108 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0, 0], y.shape, y.strides, 109 | &out[0, 0], out.shape, out.strides, dot_mm_worker, dot_mm_worker, dot_mm_worker, 110 | dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, 111 | dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, 112 | dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, 113 | dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker, dot_mm_worker) 114 | return out 115 | -------------------------------------------------------------------------------- /ceygen/dispatch.h: -------------------------------------------------------------------------------- 1 | #ifndef DISPATCH_H 2 | #define DISPATCH_H 3 | #include "eigen_cpp.h" 4 | 5 | 6 | // dummy compile time-only classes to differentiate between various contiguity types 7 | struct CContig { enum { 8 | Layout = RowMajor, 9 | InnerStride = 1 10 | };}; 11 | struct FContig { enum { 12 | Layout = ColMajor, 13 | InnerStride = 1 14 | };}; 15 | struct NContig { enum { 16 | Layout = RowMajor, 17 | InnerStride = Dynamic 18 | };}; 19 | 20 | // function taking memview, scalar arguments 21 | #define as_func(name, XContigType) \ 22 | void (*name)(dtype *, Py_ssize_t *, Py_ssize_t *, XContigType, dtype *) 23 | 24 | // function taking memview, memoryview arguments 25 | #define aa_func(name, XContigType, OContigType) \ 26 | void (*name)(dtype *, Py_ssize_t *, Py_ssize_t *, XContigType, dtype *, Py_ssize_t *, Py_ssize_t *, OContigType) 27 | 28 | // function taking memview, memview, scalar pointer arguments 29 | #define aas_func(name, XContigType, YContigType) \ 30 | void (*name)(dtype *, Py_ssize_t *, Py_ssize_t *, XContigType, dtype *, Py_ssize_t *, Py_ssize_t *, YContigType, dtype *) 31 | 32 | // function taking three memview arguments 33 | #define aaa_func(name, XContigType, YContigType, OContigType) \ 34 | void (*name)(dtype *, Py_ssize_t *, Py_ssize_t *, XContigType, dtype *, Py_ssize_t *, Py_ssize_t *, YContigType, dtype *, Py_ssize_t *, Py_ssize_t *, OContigType) 35 | 36 | template 37 | struct VSDispatcher 38 | { 39 | union VSFuncs { 40 | as_func(c, CContig); as_func(n, NContig); 41 | }; 42 | 43 | inline void run(dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 44 | dtype *o, as_func(c, CContig), as_func(n, NContig)) 45 | { 46 | /* it is better to just store function pointer inside the if/else branches, 47 | * because calling a function with a lot of parameters generates a lot of 48 | * (conditional) code which then causes cache misses */ 49 | VSFuncs tocall; 50 | if(x_strides[0] == sizeof(dtype)) 51 | tocall.c = c; 52 | else 53 | tocall.n = n; 54 | /* Following is a vile hack! We pretend to call the nns variant, alhough the 55 | * pointer may point to any of the variants stored in the union. This works because 56 | * the functions only differ in CContig/FContig/NContig arguments, and all these 57 | * are empty structures, which must have same (no) memory representation, thus 58 | * all the functions must have the same call signature. Another alternative would 59 | * be to cast function pointer, but abusing union was deemed slightly less ugly. */ 60 | tocall.n(x_data, x_shape, x_strides, NContig(), o); 61 | } 62 | }; 63 | 64 | template 65 | struct VVSDispatcher 66 | { 67 | union VVSFuncs { 68 | aas_func(ccs, CContig, CContig); aas_func(cns, CContig, NContig); 69 | aas_func(ncs, NContig, CContig); aas_func(nns, NContig, NContig); 70 | }; 71 | 72 | inline void run(dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 73 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 74 | dtype *o, 75 | aas_func(ccs, CContig, CContig), aas_func(cns, CContig, NContig), 76 | aas_func(ncs, NContig, CContig), aas_func(nns, NContig, NContig)) 77 | { 78 | VVSFuncs tocall; 79 | if(x_strides[0] == sizeof(dtype)) { 80 | if(y_strides[0] == sizeof(dtype)) 81 | tocall.ccs = ccs; 82 | else 83 | tocall.cns = cns; 84 | } else { 85 | if(y_strides[0] == sizeof(dtype)) 86 | tocall.ncs = ncs; 87 | else 88 | tocall.nns = nns; 89 | } 90 | tocall.nns(x_data, x_shape, x_strides, NContig(), y_data, y_shape, y_strides, NContig(), o); 91 | } 92 | }; 93 | 94 | template 95 | struct VVVDispatcher 96 | { 97 | union VVVFuncs { 98 | aaa_func(ccc, CContig, CContig, CContig); aaa_func(ccn, CContig, CContig, NContig); 99 | aaa_func(cnc, CContig, NContig, CContig); aaa_func(cnn, CContig, NContig, NContig); 100 | aaa_func(ncc, NContig, CContig, CContig); aaa_func(ncn, NContig, CContig, NContig); 101 | aaa_func(nnc, NContig, NContig, CContig); aaa_func(nnn, NContig, NContig, NContig); 102 | }; 103 | 104 | inline void run(dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 105 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 106 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, 107 | aaa_func(ccc, CContig, CContig, CContig), aaa_func(ccn, CContig, CContig, NContig), 108 | aaa_func(cnc, CContig, NContig, CContig), aaa_func(cnn, CContig, NContig, NContig), 109 | aaa_func(ncc, NContig, CContig, CContig), aaa_func(ncn, NContig, CContig, NContig), 110 | aaa_func(nnc, NContig, NContig, CContig), aaa_func(nnn, NContig, NContig, NContig)) 111 | { 112 | VVVFuncs tocall; 113 | if(x_strides[0] == sizeof(dtype)) { 114 | if(y_strides[0] == sizeof(dtype)) { 115 | if(o_strides[0] == sizeof(dtype)) 116 | tocall.ccc = ccc; 117 | else 118 | tocall.ccn = ccn; 119 | } else { 120 | if(o_strides[0] == sizeof(dtype)) 121 | tocall.cnc = cnc; 122 | else 123 | tocall.cnn = cnn; 124 | } 125 | } else { 126 | if(y_strides[0] == sizeof(dtype)) { 127 | if(o_strides[0] == sizeof(dtype)) 128 | tocall.ncc = ncc; 129 | else 130 | tocall.ncn = ncn; 131 | } else { 132 | if(o_strides[0] == sizeof(dtype)) 133 | tocall.nnc = nnc; 134 | else 135 | tocall.nnn = nnn; 136 | } 137 | } 138 | tocall.nnn(x_data, x_shape, x_strides, NContig(), y_data, y_shape, y_strides, NContig(), o_data, o_shape, o_strides, NContig()); 139 | } 140 | }; 141 | 142 | template 143 | struct MSDispatcher 144 | { 145 | union MSFuncs { 146 | as_func(c, CContig); as_func(f, FContig); as_func(n, NContig); 147 | }; 148 | 149 | inline void run(dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 150 | dtype *o, 151 | as_func(c, CContig), as_func(f, FContig), as_func(n, NContig)) 152 | { 153 | MSFuncs tocall; 154 | if(x_strides[1] == sizeof(dtype)) 155 | tocall.c = c; 156 | else if(x_strides[0] == sizeof(dtype)) 157 | tocall.f = f; 158 | else 159 | tocall.n = n; 160 | tocall.n(x_data, x_shape, x_strides, NContig(), o); 161 | } 162 | }; 163 | 164 | template 165 | struct MVDispatcher 166 | { 167 | union MVFuncs { 168 | aa_func(cc, CContig, CContig); aa_func(cn, CContig, NContig); 169 | aa_func(fc, FContig, CContig); aa_func(fn, FContig, NContig); 170 | aa_func(nc, NContig, CContig); aa_func(nn, NContig, NContig); 171 | }; 172 | 173 | inline void run(dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 174 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, 175 | aa_func(cc, CContig, CContig), aa_func(cn, CContig, NContig), 176 | aa_func(fc, FContig, CContig), aa_func(fn, FContig, NContig), 177 | aa_func(nc, NContig, CContig), aa_func(nn, NContig, NContig)) 178 | { 179 | MVFuncs tocall; 180 | if(x_strides[1] == sizeof(dtype)) { 181 | if(o_strides[0] == sizeof(dtype)) 182 | tocall.cc = cc; 183 | else 184 | tocall.cn = cn; 185 | } else if(x_strides[0] == sizeof(dtype)) { 186 | if(o_strides[0] == sizeof(dtype)) 187 | tocall.fc = fc; 188 | else 189 | tocall.fn = fn; 190 | } else { 191 | if(o_strides[0] == sizeof(dtype)) 192 | tocall.nc = nc; 193 | else 194 | tocall.nn = nn; 195 | } 196 | tocall.nn(x_data, x_shape, x_strides, NContig(), o_data, o_shape, o_strides, NContig()); 197 | } 198 | }; 199 | 200 | template 201 | struct MMSDispatcher 202 | { 203 | union MMSFuncs { 204 | aas_func(ccs, CContig, CContig); aas_func(cfs, CContig, FContig); aas_func(cns, CContig, NContig); 205 | aas_func(fcs, FContig, CContig); aas_func(ffs, FContig, FContig); aas_func(fns, FContig, NContig); 206 | aas_func(ncs, NContig, CContig); aas_func(nfs, NContig, FContig); aas_func(nns, NContig, NContig); 207 | }; 208 | 209 | inline void run(dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 210 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 211 | dtype *o, 212 | aas_func(ccs, CContig, CContig), aas_func(cfs, CContig, FContig), aas_func(cns, CContig, NContig), 213 | aas_func(fcs, FContig, CContig), aas_func(ffs, FContig, FContig), aas_func(fns, FContig, NContig), 214 | aas_func(ncs, NContig, CContig), aas_func(nfs, NContig, FContig), aas_func(nns, NContig, NContig)) 215 | { 216 | MMSFuncs tocall; 217 | if(x_strides[1] == sizeof(dtype)) { 218 | if(y_strides[1] == sizeof(dtype)) 219 | tocall.ccs = ccs; 220 | else if(y_strides[0] == sizeof(dtype)) 221 | tocall.cfs = cfs; 222 | else 223 | tocall.cns = cns; 224 | } else if(x_strides[0] == sizeof(dtype)) { 225 | if(y_strides[1] == sizeof(dtype)) 226 | tocall.fcs = fcs; 227 | else if(y_strides[0] == sizeof(dtype)) 228 | tocall.ffs = ffs; 229 | else 230 | tocall.fns = fns; 231 | } else { 232 | if(y_strides[1] == sizeof(dtype)) 233 | tocall.ncs = ncs; 234 | else if(y_strides[0] == sizeof(dtype)) 235 | tocall.nfs = nfs; 236 | else 237 | tocall.nns = nns; 238 | } 239 | tocall.nns(x_data, x_shape, x_strides, NContig(), y_data, y_shape, y_strides, NContig(), o); 240 | } 241 | }; 242 | 243 | template 244 | struct MVVDispatcher 245 | { 246 | union MVVFuncs { 247 | aaa_func(ccc, CContig, CContig, CContig); aaa_func(ccn, CContig, CContig, NContig); 248 | aaa_func(cnc, CContig, NContig, CContig); aaa_func(cnn, CContig, NContig, NContig); 249 | aaa_func(fcc, FContig, CContig, CContig); aaa_func(fcn, FContig, CContig, NContig); 250 | aaa_func(fnc, FContig, NContig, CContig); aaa_func(fnn, FContig, NContig, NContig); 251 | aaa_func(ncc, NContig, CContig, CContig); aaa_func(ncn, NContig, CContig, NContig); 252 | aaa_func(nnc, NContig, NContig, CContig); aaa_func(nnn, NContig, NContig, NContig); 253 | }; 254 | 255 | inline void run(dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 256 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 257 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, 258 | aaa_func(ccc, CContig, CContig, CContig), aaa_func(ccn, CContig, CContig, NContig), 259 | aaa_func(cnc, CContig, NContig, CContig), aaa_func(cnn, CContig, NContig, NContig), 260 | aaa_func(fcc, FContig, CContig, CContig), aaa_func(fcn, FContig, CContig, NContig), 261 | aaa_func(fnc, FContig, NContig, CContig), aaa_func(fnn, FContig, NContig, NContig), 262 | aaa_func(ncc, NContig, CContig, CContig), aaa_func(ncn, NContig, CContig, NContig), 263 | aaa_func(nnc, NContig, NContig, CContig), aaa_func(nnn, NContig, NContig, NContig)) 264 | { 265 | MVVFuncs tocall; 266 | if(x_strides[1] == sizeof(dtype)) { 267 | if(y_strides[0] == sizeof(dtype)) { 268 | if(o_strides[0] == sizeof(dtype)) 269 | tocall.ccc = ccc; 270 | else 271 | tocall.ccn = ccn; 272 | } else { 273 | if(o_strides[0] == sizeof(dtype)) 274 | tocall.cnc = cnc; 275 | else 276 | tocall.cnn = cnn; 277 | } 278 | } else if(x_strides[0] == sizeof(dtype)) { 279 | if(y_strides[0] == sizeof(dtype)) { 280 | if(o_strides[0] == sizeof(dtype)) 281 | tocall.fcc = fcc; 282 | else 283 | tocall.fcn = fcn; 284 | } else { 285 | if(o_strides[0] == sizeof(dtype)) 286 | tocall.fnc = fnc; 287 | else 288 | tocall.fnn = fnn; 289 | } 290 | } else { 291 | if(y_strides[0] == sizeof(dtype)) { 292 | if(o_strides[0] == sizeof(dtype)) 293 | tocall.ncc = ncc; 294 | else 295 | tocall.ncn = ncn; 296 | } else { 297 | if(o_strides[0] == sizeof(dtype)) 298 | tocall.nnc = nnc; 299 | else 300 | tocall.nnn = nnn; 301 | } 302 | } 303 | tocall.nnn(x_data, x_shape, x_strides, NContig(), y_data, y_shape, y_strides, NContig(), o_data, o_shape, o_strides, NContig()); 304 | } 305 | }; 306 | 307 | template 308 | struct MMMDispatcher 309 | { 310 | union MMMFuncs { 311 | aaa_func(ccc, CContig, CContig, CContig); aaa_func(ccf, CContig, CContig, FContig); aaa_func(ccn, CContig, CContig, NContig); 312 | aaa_func(cfc, CContig, FContig, CContig); aaa_func(cff, CContig, FContig, FContig); aaa_func(cfn, CContig, FContig, NContig); 313 | aaa_func(cnc, CContig, NContig, CContig); aaa_func(cnf, CContig, NContig, FContig); aaa_func(cnn, CContig, NContig, NContig); 314 | aaa_func(fcc, FContig, CContig, CContig); aaa_func(fcf, FContig, CContig, FContig); aaa_func(fcn, FContig, CContig, NContig); 315 | aaa_func(ffc, FContig, FContig, CContig); aaa_func(fff, FContig, FContig, FContig); aaa_func(ffn, FContig, FContig, NContig); 316 | aaa_func(fnc, FContig, NContig, CContig); aaa_func(fnf, FContig, NContig, FContig); aaa_func(fnn, FContig, NContig, NContig); 317 | aaa_func(ncc, NContig, CContig, CContig); aaa_func(ncf, NContig, CContig, FContig); aaa_func(ncn, NContig, CContig, NContig); 318 | aaa_func(nfc, NContig, FContig, CContig); aaa_func(nff, NContig, FContig, FContig); aaa_func(nfn, NContig, FContig, NContig); 319 | aaa_func(nnc, NContig, NContig, CContig); aaa_func(nnf, NContig, NContig, FContig); aaa_func(nnn, NContig, NContig, NContig); 320 | }; 321 | 322 | inline void run(dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 323 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 324 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, 325 | aaa_func(ccc, CContig, CContig, CContig), aaa_func(ccf, CContig, CContig, FContig), aaa_func(ccn, CContig, CContig, NContig), 326 | aaa_func(cfc, CContig, FContig, CContig), aaa_func(cff, CContig, FContig, FContig), aaa_func(cfn, CContig, FContig, NContig), 327 | aaa_func(cnc, CContig, NContig, CContig), aaa_func(cnf, CContig, NContig, FContig), aaa_func(cnn, CContig, NContig, NContig), 328 | aaa_func(fcc, FContig, CContig, CContig), aaa_func(fcf, FContig, CContig, FContig), aaa_func(fcn, FContig, CContig, NContig), 329 | aaa_func(ffc, FContig, FContig, CContig), aaa_func(fff, FContig, FContig, FContig), aaa_func(ffn, FContig, FContig, NContig), 330 | aaa_func(fnc, FContig, NContig, CContig), aaa_func(fnf, FContig, NContig, FContig), aaa_func(fnn, FContig, NContig, NContig), 331 | aaa_func(ncc, NContig, CContig, CContig), aaa_func(ncf, NContig, CContig, FContig), aaa_func(ncn, NContig, CContig, NContig), 332 | aaa_func(nfc, NContig, FContig, CContig), aaa_func(nff, NContig, FContig, FContig), aaa_func(nfn, NContig, FContig, NContig), 333 | aaa_func(nnc, NContig, NContig, CContig), aaa_func(nnf, NContig, NContig, FContig), aaa_func(nnn, NContig, NContig, NContig)) 334 | { 335 | MMMFuncs tocall; 336 | if(x_strides[1] == sizeof(dtype)) { 337 | if(y_strides[1] == sizeof(dtype)) { 338 | if(o_strides[1] == sizeof(dtype)) 339 | tocall.ccc = ccc; 340 | else if(o_strides[0] == sizeof(dtype)) 341 | tocall.ccf = ccf; 342 | else 343 | tocall.ccn = ccn; 344 | } else if(y_strides[0] == sizeof(dtype)) { 345 | if(o_strides[1] == sizeof(dtype)) 346 | tocall.cfc = cfc; 347 | else if(o_strides[0] == sizeof(dtype)) 348 | tocall.cff = cff; 349 | else 350 | tocall.cfn = cfn; 351 | } else { 352 | if(o_strides[1] == sizeof(dtype)) 353 | tocall.cnc = cnc; 354 | else if(o_strides[0] == sizeof(dtype)) 355 | tocall.cnf = cnf; 356 | else 357 | tocall.cnn = cnn; 358 | } 359 | } else if(x_strides[0] == sizeof(dtype)) { 360 | if(y_strides[1] == sizeof(dtype)) { 361 | if(o_strides[1] == sizeof(dtype)) 362 | tocall.fcc = fcc; 363 | else if(o_strides[0] == sizeof(dtype)) 364 | tocall.fcf = fcf; 365 | else 366 | tocall.fcn = fcn; 367 | } else if(y_strides[0] == sizeof(dtype)) { 368 | if(o_strides[1] == sizeof(dtype)) 369 | tocall.ffc = ffc; 370 | else if(o_strides[0] == sizeof(dtype)) 371 | tocall.fff = fff; 372 | else 373 | tocall.ffn = ffn; 374 | } else { 375 | if(o_strides[1] == sizeof(dtype)) 376 | tocall.fnc = fnc; 377 | else if(o_strides[0] == sizeof(dtype)) 378 | tocall.fnf = fnf; 379 | else 380 | tocall.fnn = fnn; 381 | } 382 | } else { 383 | if(y_strides[1] == sizeof(dtype)) { 384 | if(o_strides[1] == sizeof(dtype)) 385 | tocall.ncc = ncc; 386 | else if(o_strides[0] == sizeof(dtype)) 387 | tocall.ncf = ncf; 388 | else 389 | tocall.ncn = ncn; 390 | } else if(y_strides[0] == sizeof(dtype)) { 391 | if(o_strides[1] == sizeof(dtype)) 392 | tocall.nfc = nfc; 393 | else if(o_strides[0] == sizeof(dtype)) 394 | tocall.nff = nff; 395 | else 396 | tocall.nfn = nfn; 397 | } else { 398 | if(o_strides[1] == sizeof(dtype)) 399 | tocall.nnc = nnc; 400 | else if(o_strides[0] == sizeof(dtype)) 401 | tocall.nnf = nnf; 402 | else 403 | tocall.nnn = nnn; 404 | } 405 | } 406 | tocall.nnn(x_data, x_shape, x_strides, NContig(), y_data, y_shape, y_strides, NContig(), o_data, o_shape, o_strides, NContig()); 407 | } 408 | }; 409 | 410 | #endif // DISPATCH_H 411 | -------------------------------------------------------------------------------- /ceygen/dispatch.pxd: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | cdef extern from "dispatch.h": 4 | # dummy classes to differentiate between various contiguity types 5 | cdef cppclass CContig: 6 | pass 7 | cdef cppclass FContig: 8 | pass 9 | cdef cppclass NContig: 10 | pass 11 | 12 | cdef cppclass VSDispatcher[Scalar]: 13 | void run(Scalar *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 14 | Scalar *o, 15 | void (*c)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *) nogil, 16 | void (*n)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *) nogil, 17 | ) nogil except + 18 | 19 | cdef cppclass VVSDispatcher[Scalar]: 20 | void run(Scalar *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 21 | Scalar *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 22 | Scalar *o, 23 | void (*ccs)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *) nogil, 24 | void (*cns)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *) nogil, 25 | void (*ncs)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *) nogil, 26 | void (*nns)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *) nogil, 27 | ) nogil except + 28 | 29 | cdef cppclass VVVDispatcher[Scalar]: 30 | void run(Scalar *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 31 | Scalar *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 32 | Scalar *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, 33 | void (*ccc)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 34 | void (*ccn)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 35 | void (*cnc)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 36 | void (*cnn)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 37 | void (*ncc)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 38 | void (*ncn)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 39 | void (*nnc)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 40 | void (*nnn)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 41 | ) nogil except + 42 | 43 | cdef cppclass MSDispatcher[Scalar]: 44 | void run(Scalar *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 45 | Scalar *o, 46 | void (*c)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *) nogil, 47 | void (*f)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *) nogil, 48 | void (*n)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *) nogil, 49 | ) nogil except + 50 | 51 | cdef cppclass MVDispatcher[Scalar]: 52 | void run(Scalar *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 53 | Scalar *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, 54 | void (*cc)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 55 | void (*cn)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 56 | void (*fc)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 57 | void (*fn)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 58 | void (*nc)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 59 | void (*nn)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 60 | ) nogil except + 61 | 62 | cdef cppclass MMSDispatcher[Scalar]: 63 | void run(Scalar *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 64 | Scalar *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 65 | Scalar *o, 66 | void (*ccs)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *) nogil, 67 | void (*cfs)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *) nogil, 68 | void (*cns)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *) nogil, 69 | void (*fcs)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *) nogil, 70 | void (*ffs)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *) nogil, 71 | void (*fns)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *) nogil, 72 | void (*ncs)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *) nogil, 73 | void (*nfs)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *) nogil, 74 | void (*nns)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *) nogil, 75 | ) nogil except + 76 | 77 | cdef cppclass MVVDispatcher[Scalar]: 78 | void run(Scalar *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 79 | Scalar *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 80 | Scalar *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, 81 | void (*ccc)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 82 | void (*ccn)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 83 | void (*cnc)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 84 | void (*cnn)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 85 | void (*fcc)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 86 | void (*fcn)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 87 | void (*fnc)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 88 | void (*fnn)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 89 | void (*ncc)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 90 | void (*ncn)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 91 | void (*nnc)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 92 | void (*nnn)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 93 | ) nogil except + 94 | 95 | cdef cppclass MMMDispatcher[Scalar]: 96 | # okay, this is ridiculous, but this function is inlined and written only once... 97 | void run(Scalar *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, 98 | Scalar *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, 99 | Scalar *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, 100 | void (*ccc)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 101 | void (*ccf)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig) nogil, 102 | void (*ccn)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 103 | void (*cfc)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 104 | void (*cff)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig) nogil, 105 | void (*cfn)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 106 | void (*cnc)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 107 | void (*cnf)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig) nogil, 108 | void (*cnn)(Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 109 | void (*fcc)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 110 | void (*fcf)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig) nogil, 111 | void (*fcn)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 112 | void (*ffc)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 113 | void (*fff)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig) nogil, 114 | void (*ffn)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 115 | void (*fnc)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 116 | void (*fnf)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig) nogil, 117 | void (*fnn)(Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 118 | void (*ncc)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 119 | void (*ncf)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig) nogil, 120 | void (*ncn)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 121 | void (*nfc)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 122 | void (*nff)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig) nogil, 123 | void (*nfn)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 124 | void (*nnc)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, CContig) nogil, 125 | void (*nnf)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, FContig) nogil, 126 | void (*nnn)(Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig, Scalar *, Py_ssize_t *, Py_ssize_t *, NContig) nogil, 127 | ) nogil except + 128 | 129 | 130 | ctypedef fused XVectorContiguity: 131 | CContig 132 | NContig 133 | 134 | ctypedef fused YVectorContiguity: 135 | CContig 136 | NContig 137 | 138 | ctypedef fused OVectorContiguity: 139 | CContig 140 | NContig 141 | 142 | ctypedef fused XMatrixContiguity: 143 | CContig 144 | FContig 145 | NContig 146 | 147 | ctypedef fused YMatrixContiguity: 148 | CContig 149 | FContig 150 | NContig 151 | 152 | ctypedef fused OMatrixContiguity: 153 | CContig 154 | FContig 155 | NContig 156 | -------------------------------------------------------------------------------- /ceygen/dtype.pxd: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # When adding new type: 4 | # * add it to ctypedef fused dtype 5 | # * add it to get_format() in dtype.pyx 6 | # * update documentation in doc/core.rst 7 | # * rebuild Ceygen! 8 | 9 | ctypedef fused dtype: 10 | char 11 | short 12 | int 13 | long 14 | float 15 | double 16 | 17 | # some methods such as inv() cannot really work with non-integer types 18 | ctypedef fused nonint_dtype: 19 | float 20 | double 21 | 22 | cdef dtype[:] vector(int size, dtype *like) with gil 23 | cdef dtype[:, :] matrix(int rows, int cols, dtype *like) with gil 24 | -------------------------------------------------------------------------------- /ceygen/dtype.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from cython cimport view 4 | 5 | 6 | cdef inline str get_format(dtype *dummy): 7 | """ 8 | This function must return Type code for all data types in `dtype` as described in 9 | table at http://docs.python.org/library/array.html 10 | """ 11 | if dtype is char: 12 | return 'c' 13 | if dtype is short: 14 | return 'h' 15 | if dtype is int: 16 | return 'i' 17 | if dtype is long: 18 | return 'l' 19 | if dtype is float: 20 | return 'f' 21 | if dtype is double: 22 | return 'd' 23 | 24 | cdef dtype[:] vector(int size, dtype *like) with gil: 25 | return view.array(shape=(size,), itemsize=sizeof(dtype), format=get_format(like)) 26 | 27 | cdef dtype[:, :] matrix(int rows, int cols, dtype *like) with gil: 28 | return view.array(shape=(rows, cols), itemsize=sizeof(dtype), format=get_format(like)) 29 | -------------------------------------------------------------------------------- /ceygen/eigen_cpp.h: -------------------------------------------------------------------------------- 1 | #ifndef EIGEN_CPP_H 2 | #define EIGEN_CPP_H 3 | // two macros ensures any macro passed will be expanded before being stringified 4 | #define STRINGIZE_DETAIL(x) #x 5 | #define STRINGIZE(x) STRINGIZE_DETAIL(x) 6 | 7 | #include 8 | // make Eigen raise an exception instead of aborting on assert failure. Cython converts 9 | // std::runtime_error to Python RuntimeError 10 | #define eigen_assert(statement) do { if(!(statement)) throw std::invalid_argument(#statement " does not hold in " __FILE__ ":" STRINGIZE(__LINE__)); } while(0) 11 | #define EIGEN_NO_AUTOMATIC_RESIZING // affects operator=, Ceygen doesn't want resizing 12 | #define EIGEN_RUNTIME_NO_MALLOC // enables use of set_is_malloc_allowed() in tests 13 | 14 | #include 15 | #include // for Matrix.inverse() 16 | #include // for Matrix.llt() 17 | 18 | #include // for Py_ssize_t 19 | 20 | #ifdef DEBUG 21 | #include 22 | #endif 23 | 24 | 25 | using namespace Eigen; 26 | 27 | /** 28 | * Very simple Eigen::Map<> subclass that provides default constructor and lets 29 | * Cython late-initialize the map using init() method 30 | */ 31 | template 32 | class BaseMap : public Map 33 | { 34 | public: 35 | typedef Map Base; 36 | typedef typename Base::Scalar Scalar; 37 | 38 | BaseMap() : Base(0, 39 | BaseType::RowsAtCompileTime == Dynamic ? 0 : BaseType::RowsAtCompileTime, 40 | BaseType::ColsAtCompileTime == Dynamic ? 0 : BaseType::ColsAtCompileTime, 41 | StrideType( 42 | StrideType::OuterStrideAtCompileTime == Dynamic ? 0 : StrideType::OuterStrideAtCompileTime, 43 | StrideType::InnerStrideAtCompileTime == Dynamic ? 0 : StrideType::InnerStrideAtCompileTime 44 | ) 45 | ) {} 46 | 47 | inline void init(Scalar *data, const Py_ssize_t *shape, const Py_ssize_t *strides) { 48 | // enum is used just to ensure that this is a compile-time constant 49 | enum { 50 | RowsShapeIndex = 0, // for both vectors and matrices; entry exists just for symmetry 51 | ColsShapeIndex = Base::IsVectorAtCompileTime ? 0 : 1, 52 | OuterStrideIndex = (BaseType::Options & RowMajor) ? 0 : 1, // only used for matrices 53 | InnerStrideIndex = Base::IsVectorAtCompileTime ? 0 : ((BaseType::Options & RowMajor) ? 1 : 0), 54 | }; 55 | 56 | # ifdef DEBUG 57 | std::cerr << __PRETTY_FUNCTION__ << std::endl; 58 | std::cerr << "got: shape: " << shape[0] << ", " << shape[1] << " strides: " << strides[0] << ", " << strides[1] << std::endl; 59 | std::cerr << "got: RowsAtCompileTime: " << BaseType::RowsAtCompileTime << " ColsAtCompileTime: " << BaseType::ColsAtCompileTime << " Options: " << BaseType::Options << std::endl; 60 | std::cerr << "got: OuterStrideAtCompileTime: " << StrideType::OuterStrideAtCompileTime << " InnerStrideAtCompileTime: " << StrideType::InnerStrideAtCompileTime << std::endl; 61 | std::cerr << "got: RowsShapeIndex: " << RowsShapeIndex << " ColsShapeIndex: " << ColsShapeIndex << " OuterStrideIndex: " << OuterStrideIndex << " InnerStrideIndex: " << InnerStrideIndex << std::endl; 62 | # endif 63 | 64 | /* see http://eigen.tuxfamily.org/dox/TutorialMapClass.html - this is NOT a heap allocation 65 | * Note: Cython (and Python) has strides in bytes, Eigen in sizeof(Scalar) units */ 66 | new (this) Base(data, 67 | BaseType::RowsAtCompileTime == Dynamic ? shape[RowsShapeIndex] : BaseType::RowsAtCompileTime, 68 | BaseType::ColsAtCompileTime == Dynamic ? shape[ColsShapeIndex] : BaseType::ColsAtCompileTime, 69 | StrideType( 70 | Base::IsVectorAtCompileTime ? 0 : strides[OuterStrideIndex]/sizeof(Scalar), 71 | strides[InnerStrideIndex]/sizeof(Scalar) 72 | ) 73 | ); 74 | # ifdef DEBUG 75 | std::cerr << "rows=" << this->rows() << ", cols=" << this->cols() 76 | << " outerStride=" << this->outerStride() << ", innerStride=" << this->innerStride() << std::endl; 77 | bool malloc_allowed = internal::is_malloc_allowed(); 78 | internal::set_is_malloc_allowed(true); 79 | std::cerr << *this << std::endl; 80 | internal::set_is_malloc_allowed(malloc_allowed); 81 | # endif 82 | }; 83 | 84 | // if we write "x = y" in a .pyx file, Cython creates a temporary, which breaks 85 | // Matrix's operator= and needlessly copies memory 86 | template 87 | inline void assign(const T &rhs) { 88 | *this = rhs; 89 | } 90 | 91 | // see above 92 | template 93 | inline void assign_inverse(const T &rhs) { 94 | assign(rhs.inverse()); 95 | } 96 | 97 | template 98 | inline void noalias_assign(const T &rhs) { 99 | this->noalias() = rhs; 100 | } 101 | 102 | EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BaseMap) 103 | }; 104 | 105 | template 106 | class VectorMap : public BaseMap, Stride<0, ContiguityType::InnerStride> > 107 | { 108 | }; 109 | 110 | template 111 | class RowVectorMap : public BaseMap, Stride<0, ContiguityType::InnerStride> > 112 | { 113 | }; 114 | 115 | template 116 | class Array1DMap : public BaseMap, Stride<0, ContiguityType::InnerStride> > 117 | { 118 | }; 119 | 120 | template 121 | class MatrixMap : public BaseMap, Stride > 122 | { 123 | }; 124 | 125 | template 126 | class Array2DMap : public BaseMap, Stride > 127 | { 128 | }; 129 | 130 | #endif // EIGEN_CPP_H 131 | -------------------------------------------------------------------------------- /ceygen/eigen_cython.pxd: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # note: Cython doesn't like VectorMap[Scalar] anywhere except in cdef cppclass ... 4 | # declaration. Using just the class without template param everywhere else works 5 | # well 6 | 7 | from libcpp cimport bool 8 | 9 | from dtype cimport dtype 10 | 11 | 12 | cdef extern from "eigen_cpp.h": 13 | void c_set_is_malloc_allowed "internal::set_is_malloc_allowed"(bool) nogil 14 | int EIGEN_WORLD_VERSION, EIGEN_MAJOR_VERSION, EIGEN_MINOR_VERSION 15 | 16 | cdef cppclass BaseMap[Scalar]: 17 | # "constructor": 18 | void init(Scalar *, const Py_ssize_t *, const Py_ssize_t *) nogil 19 | 20 | # our own methods: 21 | void assign(BaseMap) nogil 22 | void assign_inverse(BaseMap) nogil 23 | void noalias_assign(BaseMap) nogil 24 | 25 | # exported Eigen methods 26 | Scalar determinant() nogil 27 | Scalar dot(BaseMap) nogil 28 | # a little hack so that we don't have to introduce VectorwiseOp cppclass: 29 | BaseMap colwise_sum "colwise().sum"() nogil 30 | BaseMap rowwise_sum "rowwise().sum"() nogil 31 | # a little hack so that we don't have to introduce LLT class: 32 | BaseMap llt_matrixL "llt().matrixL"() nogil 33 | Scalar sum() nogil 34 | BaseMap pow(Scalar) nogil 35 | 36 | # this is a huge cheat, these operators don't map 1:1 to actual C++ operators at 37 | # all; but the declarations here are just to tell that the operators are possible.. 38 | BaseMap operator+(BaseMap) nogil 39 | BaseMap operator-(BaseMap) nogil 40 | BaseMap operator*(BaseMap) nogil 41 | BaseMap operator/(BaseMap) nogil 42 | 43 | cdef cppclass VectorMap[Scalar, ContiguityType](BaseMap): 44 | pass 45 | 46 | cdef cppclass RowVectorMap[Scalar, ContiguityType](BaseMap): 47 | pass 48 | 49 | cdef cppclass Array1DMap[Scalar, ContiguityType](BaseMap): 50 | # must be here, Cython has problems inheriting overloads, http://trac.cython.org/cython_trac/ticket/800 51 | BaseMap operator+(Scalar) nogil 52 | BaseMap operator*(Scalar) nogil 53 | 54 | cdef cppclass MatrixMap[Scalar, ContiguityType](BaseMap): 55 | pass 56 | 57 | cdef cppclass Array2DMap[Scalar, ContiguityType](BaseMap): 58 | # http://trac.cython.org/cython_trac/ticket/800 59 | BaseMap operator+(Scalar) nogil 60 | BaseMap operator*(Scalar) nogil 61 | -------------------------------------------------------------------------------- /ceygen/elemwise.pxd: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from dtype cimport dtype 4 | 5 | 6 | cdef dtype[:] add_vs(dtype[:] x, dtype y, dtype[:] out = *) nogil 7 | cdef dtype[:] multiply_vs(dtype[:] x, dtype y, dtype[:] out = *) nogil 8 | cdef dtype[:] power_vs(dtype[:] x, dtype y, dtype[:] out = *) nogil 9 | 10 | cdef dtype[:] add_vv(dtype[:] x, dtype[:] y, dtype[:] out = *) nogil 11 | cdef dtype[:] subtract_vv(dtype[:] x, dtype[:] y, dtype[:] out = *) nogil 12 | cdef dtype[:] multiply_vv(dtype[:] x, dtype[:] y, dtype[:] out = *) nogil 13 | cdef dtype[:] divide_vv(dtype[:] x, dtype[:] y, dtype[:] out = *) nogil 14 | 15 | cdef dtype[:, :] add_ms(dtype[:, :] x, dtype y, dtype[:, :] out = *) nogil 16 | cdef dtype[:, :] multiply_ms(dtype[:, :] x, dtype y, dtype[:, :] out = *) nogil 17 | cdef dtype[:, :] power_ms(dtype[:, :] x, dtype y, dtype[:, :] out = *) nogil 18 | 19 | cdef dtype[:, :] add_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = *) nogil 20 | cdef dtype[:, :] subtract_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = *) nogil 21 | cdef dtype[:, :] multiply_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = *) nogil 22 | cdef dtype[:, :] divide_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = *) nogil 23 | -------------------------------------------------------------------------------- /ceygen/elemwise.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | cimport cython 4 | 5 | from eigen_cython cimport * 6 | from dispatch cimport * 7 | from dtype cimport vector, matrix 8 | 9 | 10 | cdef void add_vs_worker( 11 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 12 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy, 13 | dtype *y) nogil: 14 | cdef Array1DMap[dtype, XVectorContiguity] x 15 | cdef Array1DMap[dtype, OVectorContiguity] o 16 | x.init(x_data, x_shape, x_strides) 17 | o.init(o_data, o_shape, o_strides) 18 | o.assign(x + y[0]) 19 | 20 | @cython.boundscheck(False) 21 | @cython.wraparound(False) 22 | cdef dtype[:] add_vs(dtype[:] x, dtype y, dtype[:] out = None) nogil: 23 | cdef VVSDispatcher[dtype] dispatcher 24 | if out is None: 25 | out = vector(x.shape[0], &y) 26 | # y, out is swapped here so that we can share VVSDispatcher 27 | dispatcher.run(&x[0], x.shape, x.strides, &out[0], out.shape, out.strides, &y, 28 | add_vs_worker, add_vs_worker, add_vs_worker, add_vs_worker) 29 | return out 30 | 31 | 32 | cdef void multiply_vs_worker( 33 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 34 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy, 35 | dtype *y) nogil: 36 | cdef Array1DMap[dtype, XVectorContiguity] x 37 | cdef Array1DMap[dtype, OVectorContiguity] o 38 | x.init(x_data, x_shape, x_strides) 39 | o.init(o_data, o_shape, o_strides) 40 | o.assign(x * y[0]) 41 | 42 | @cython.boundscheck(False) 43 | @cython.wraparound(False) 44 | cdef dtype[:] multiply_vs(dtype[:] x, dtype y, dtype[:] out = None) nogil: 45 | cdef VVSDispatcher[dtype] dispatcher 46 | if out is None: 47 | out = vector(x.shape[0], &y) 48 | # y, out is swapped here so that we can share VVSDispatcher 49 | dispatcher.run(&x[0], x.shape, x.strides, &out[0], out.shape, out.strides, &y, 50 | multiply_vs_worker, multiply_vs_worker, multiply_vs_worker, multiply_vs_worker) 51 | return out 52 | 53 | 54 | cdef void power_vs_worker( 55 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 56 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy, 57 | dtype *y) nogil: 58 | cdef Array1DMap[dtype, XVectorContiguity] x 59 | cdef Array1DMap[dtype, OVectorContiguity] o 60 | x.init(x_data, x_shape, x_strides) 61 | o.init(o_data, o_shape, o_strides) 62 | o.assign(x.pow(y[0])) 63 | 64 | @cython.boundscheck(False) 65 | @cython.wraparound(False) 66 | cdef dtype[:] power_vs(dtype[:] x, dtype y, dtype[:] out = None) nogil: 67 | cdef VVSDispatcher[dtype] dispatcher 68 | if out is None: 69 | out = vector(x.shape[0], &y) 70 | # y, out is swapped here so that we can share VVSDispatcher 71 | dispatcher.run(&x[0], x.shape, x.strides, &out[0], out.shape, out.strides, &y, 72 | power_vs_worker, power_vs_worker, power_vs_worker, power_vs_worker) 73 | return out 74 | 75 | 76 | cdef void add_vv_worker( 77 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 78 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YVectorContiguity y_dummy, 79 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy) nogil: 80 | cdef Array1DMap[dtype, XVectorContiguity] x 81 | cdef Array1DMap[dtype, YVectorContiguity] y 82 | cdef Array1DMap[dtype, OVectorContiguity] o 83 | x.init(x_data, x_shape, x_strides) 84 | y.init(y_data, y_shape, y_strides) 85 | o.init(o_data, o_shape, o_strides) 86 | o.assign(x + y) 87 | 88 | @cython.boundscheck(False) 89 | @cython.wraparound(False) 90 | cdef dtype[:] add_vv(dtype[:] x, dtype[:] y, dtype[:] out = None) nogil: 91 | cdef VVVDispatcher[dtype] dispatcher 92 | if out is None: 93 | out = vector(x.shape[0], &x[0]) 94 | dispatcher.run(&x[0], x.shape, x.strides, &y[0], y.shape, y.strides, &out[0], out.shape, out.strides, 95 | add_vv_worker, add_vv_worker, add_vv_worker, add_vv_worker, 96 | add_vv_worker, add_vv_worker, add_vv_worker, add_vv_worker) 97 | return out 98 | 99 | 100 | cdef void subtract_vv_worker( 101 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 102 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YVectorContiguity y_dummy, 103 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy) nogil: 104 | cdef Array1DMap[dtype, XVectorContiguity] x 105 | cdef Array1DMap[dtype, YVectorContiguity] y 106 | cdef Array1DMap[dtype, OVectorContiguity] o 107 | x.init(x_data, x_shape, x_strides) 108 | y.init(y_data, y_shape, y_strides) 109 | o.init(o_data, o_shape, o_strides) 110 | o.assign(x - y) 111 | 112 | @cython.boundscheck(False) 113 | @cython.wraparound(False) 114 | cdef dtype[:] subtract_vv(dtype[:] x, dtype[:] y, dtype[:] out = None) nogil: 115 | cdef VVVDispatcher[dtype] dispatcher 116 | if out is None: 117 | out = vector(x.shape[0], &x[0]) 118 | dispatcher.run(&x[0], x.shape, x.strides, &y[0], y.shape, y.strides, &out[0], out.shape, out.strides, 119 | subtract_vv_worker, subtract_vv_worker, subtract_vv_worker, subtract_vv_worker, 120 | subtract_vv_worker, subtract_vv_worker, subtract_vv_worker, subtract_vv_worker) 121 | return out 122 | 123 | 124 | cdef void multiply_vv_worker( 125 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 126 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YVectorContiguity y_dummy, 127 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy) nogil: 128 | cdef Array1DMap[dtype, XVectorContiguity] x 129 | cdef Array1DMap[dtype, YVectorContiguity] y 130 | cdef Array1DMap[dtype, OVectorContiguity] o 131 | x.init(x_data, x_shape, x_strides) 132 | y.init(y_data, y_shape, y_strides) 133 | o.init(o_data, o_shape, o_strides) 134 | o.assign(x * y) 135 | 136 | @cython.boundscheck(False) 137 | @cython.wraparound(False) 138 | cdef dtype[:] multiply_vv(dtype[:] x, dtype[:] y, dtype[:] out = None) nogil: 139 | cdef VVVDispatcher[dtype] dispatcher 140 | if out is None: 141 | out = vector(x.shape[0], &x[0]) 142 | dispatcher.run(&x[0], x.shape, x.strides, &y[0], y.shape, y.strides, &out[0], out.shape, out.strides, 143 | multiply_vv_worker, multiply_vv_worker, multiply_vv_worker, multiply_vv_worker, 144 | multiply_vv_worker, multiply_vv_worker, multiply_vv_worker, multiply_vv_worker) 145 | return out 146 | 147 | 148 | cdef void divide_vv_worker( 149 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 150 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YVectorContiguity y_dummy, 151 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy) nogil: 152 | cdef Array1DMap[dtype, XVectorContiguity] x 153 | cdef Array1DMap[dtype, YVectorContiguity] y 154 | cdef Array1DMap[dtype, OVectorContiguity] o 155 | x.init(x_data, x_shape, x_strides) 156 | y.init(y_data, y_shape, y_strides) 157 | o.init(o_data, o_shape, o_strides) 158 | o.assign(x / y) 159 | 160 | @cython.boundscheck(False) 161 | @cython.wraparound(False) 162 | cdef dtype[:] divide_vv(dtype[:] x, dtype[:] y, dtype[:] out = None) nogil: 163 | cdef VVVDispatcher[dtype] dispatcher 164 | if out is None: 165 | out = vector(x.shape[0], &x[0]) 166 | dispatcher.run(&x[0], x.shape, x.strides, &y[0], y.shape, y.strides, &out[0], out.shape, out.strides, 167 | divide_vv_worker, divide_vv_worker, divide_vv_worker, divide_vv_worker, 168 | divide_vv_worker, divide_vv_worker, divide_vv_worker, divide_vv_worker) 169 | return out 170 | 171 | 172 | cdef void add_ms_worker( 173 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 174 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy, 175 | dtype *y) nogil: 176 | cdef Array2DMap[dtype, XMatrixContiguity] x 177 | cdef Array2DMap[dtype, OMatrixContiguity] o 178 | x.init(x_data, x_shape, x_strides) 179 | o.init(o_data, o_shape, o_strides) 180 | o.assign(x + y[0]) 181 | 182 | @cython.boundscheck(False) 183 | @cython.wraparound(False) 184 | cdef dtype[:, :] add_ms(dtype[:, :] x, dtype y, dtype[:, :] out = None) nogil: 185 | cdef MMSDispatcher[dtype] dispatcher 186 | if out is None: 187 | out = matrix(x.shape[0], x.shape[1], &y) 188 | # we swap out and y so tat we can reuse MMSDispatcher 189 | dispatcher.run(&x[0, 0], x.shape, x.strides, &out[0, 0], out.shape, out.strides, &y, 190 | add_ms_worker, add_ms_worker, add_ms_worker, 191 | add_ms_worker, add_ms_worker, add_ms_worker, 192 | add_ms_worker, add_ms_worker, add_ms_worker) 193 | return out 194 | 195 | 196 | cdef void multiply_ms_worker( 197 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 198 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy, 199 | dtype *y) nogil: 200 | cdef Array2DMap[dtype, XMatrixContiguity] x 201 | cdef Array2DMap[dtype, OMatrixContiguity] o 202 | x.init(x_data, x_shape, x_strides) 203 | o.init(o_data, o_shape, o_strides) 204 | o.assign(x * y[0]) 205 | 206 | @cython.boundscheck(False) 207 | @cython.wraparound(False) 208 | cdef dtype[:, :] multiply_ms(dtype[:, :] x, dtype y, dtype[:, :] out = None) nogil: 209 | cdef MMSDispatcher[dtype] dispatcher 210 | if out is None: 211 | out = matrix(x.shape[0], x.shape[1], &y) 212 | # we swap out and y so tat we can reuse MMSDispatcher 213 | dispatcher.run(&x[0, 0], x.shape, x.strides, &out[0, 0], out.shape, out.strides, &y, 214 | multiply_ms_worker, multiply_ms_worker, multiply_ms_worker, 215 | multiply_ms_worker, multiply_ms_worker, multiply_ms_worker, 216 | multiply_ms_worker, multiply_ms_worker, multiply_ms_worker) 217 | return out 218 | 219 | 220 | cdef void power_ms_worker( 221 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 222 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy, 223 | dtype *y) nogil: 224 | cdef Array2DMap[dtype, XMatrixContiguity] x 225 | cdef Array2DMap[dtype, OMatrixContiguity] o 226 | x.init(x_data, x_shape, x_strides) 227 | o.init(o_data, o_shape, o_strides) 228 | o.assign(x.pow(y[0])) 229 | 230 | @cython.boundscheck(False) 231 | @cython.wraparound(False) 232 | cdef dtype[:, :] power_ms(dtype[:, :] x, dtype y, dtype[:, :] out = None) nogil: 233 | cdef MMSDispatcher[dtype] dispatcher 234 | if out is None: 235 | out = matrix(x.shape[0], x.shape[1], &y) 236 | # we swap out and y so tat we can reuse MMSDispatcher 237 | dispatcher.run(&x[0, 0], x.shape, x.strides, &out[0, 0], out.shape, out.strides, &y, 238 | power_ms_worker, power_ms_worker, power_ms_worker, 239 | power_ms_worker, power_ms_worker, power_ms_worker, 240 | power_ms_worker, power_ms_worker, power_ms_worker) 241 | return out 242 | 243 | 244 | cdef void add_mm_worker( 245 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 246 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YMatrixContiguity y_dummy, 247 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy) nogil: 248 | cdef Array2DMap[dtype, XMatrixContiguity] x 249 | cdef Array2DMap[dtype, YMatrixContiguity] y 250 | cdef Array2DMap[dtype, OMatrixContiguity] o 251 | x.init(x_data, x_shape, x_strides) 252 | y.init(y_data, y_shape, y_strides) 253 | o.init(o_data, o_shape, o_strides) 254 | o.assign(x + y) 255 | 256 | @cython.boundscheck(False) 257 | @cython.wraparound(False) 258 | cdef dtype[:, :] add_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = None) nogil: 259 | cdef MMMDispatcher[dtype] dispatcher 260 | if out is None: 261 | out = matrix(x.shape[0], x.shape[1], &x[0, 0]) 262 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0, 0], y.shape, y.strides, 263 | &out[0, 0], out.shape, out.strides, add_mm_worker, add_mm_worker, add_mm_worker, 264 | add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, 265 | add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, 266 | add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, 267 | add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker, add_mm_worker) 268 | return out 269 | 270 | 271 | cdef void subtract_mm_worker( 272 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 273 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YMatrixContiguity y_dummy, 274 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy) nogil: 275 | cdef Array2DMap[dtype, XMatrixContiguity] x 276 | cdef Array2DMap[dtype, YMatrixContiguity] y 277 | cdef Array2DMap[dtype, OMatrixContiguity] o 278 | x.init(x_data, x_shape, x_strides) 279 | y.init(y_data, y_shape, y_strides) 280 | o.init(o_data, o_shape, o_strides) 281 | o.assign(x - y) 282 | 283 | @cython.boundscheck(False) 284 | @cython.wraparound(False) 285 | cdef dtype[:, :] subtract_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = None) nogil: 286 | cdef MMMDispatcher[dtype] dispatcher 287 | if out is None: 288 | out = matrix(x.shape[0], x.shape[1], &x[0, 0]) 289 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0, 0], y.shape, y.strides, 290 | &out[0, 0], out.shape, out.strides, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, 291 | subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, 292 | subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, 293 | subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, 294 | subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker, subtract_mm_worker) 295 | return out 296 | 297 | 298 | cdef void multiply_mm_worker( 299 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 300 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YMatrixContiguity y_dummy, 301 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy) nogil: 302 | cdef Array2DMap[dtype, XMatrixContiguity] x 303 | cdef Array2DMap[dtype, YMatrixContiguity] y 304 | cdef Array2DMap[dtype, OMatrixContiguity] o 305 | x.init(x_data, x_shape, x_strides) 306 | y.init(y_data, y_shape, y_strides) 307 | o.init(o_data, o_shape, o_strides) 308 | o.assign(x * y) 309 | 310 | @cython.boundscheck(False) 311 | @cython.wraparound(False) 312 | cdef dtype[:, :] multiply_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = None) nogil: 313 | cdef MMMDispatcher[dtype] dispatcher 314 | if out is None: 315 | out = matrix(x.shape[0], x.shape[1], &x[0, 0]) 316 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0, 0], y.shape, y.strides, 317 | &out[0, 0], out.shape, out.strides, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, 318 | multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, 319 | multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, 320 | multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, 321 | multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker, multiply_mm_worker) 322 | return out 323 | 324 | 325 | cdef void divide_mm_worker( 326 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 327 | dtype *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YMatrixContiguity y_dummy, 328 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy) nogil: 329 | cdef Array2DMap[dtype, XMatrixContiguity] x 330 | cdef Array2DMap[dtype, YMatrixContiguity] y 331 | cdef Array2DMap[dtype, OMatrixContiguity] o 332 | x.init(x_data, x_shape, x_strides) 333 | y.init(y_data, y_shape, y_strides) 334 | o.init(o_data, o_shape, o_strides) 335 | o.assign(x / y) 336 | 337 | @cython.boundscheck(False) 338 | @cython.wraparound(False) 339 | cdef dtype[:, :] divide_mm(dtype[:, :] x, dtype[:, :] y, dtype[:, :] out = None) nogil: 340 | cdef MMMDispatcher[dtype] dispatcher 341 | if out is None: 342 | out = matrix(x.shape[0], x.shape[1], &x[0, 0]) 343 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0, 0], y.shape, y.strides, 344 | &out[0, 0], out.shape, out.strides, divide_mm_worker, divide_mm_worker, divide_mm_worker, 345 | divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, 346 | divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, 347 | divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, 348 | divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker, divide_mm_worker) 349 | return out 350 | -------------------------------------------------------------------------------- /ceygen/llt.pxd: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from dtype cimport nonint_dtype 4 | 5 | 6 | cdef nonint_dtype[:, :] cholesky(nonint_dtype[:, :] x, nonint_dtype[:, :] out = *) nogil 7 | -------------------------------------------------------------------------------- /ceygen/llt.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | cimport cython 4 | 5 | from eigen_cython cimport * 6 | from dispatch cimport * 7 | from dtype cimport matrix 8 | 9 | 10 | cdef void cholesky_worker( 11 | nonint_dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 12 | nonint_dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy, 13 | nonint_dtype *y) nogil: 14 | cdef MatrixMap[nonint_dtype, XMatrixContiguity] x 15 | cdef MatrixMap[nonint_dtype, OMatrixContiguity] o 16 | x.init(x_data, x_shape, x_strides) 17 | o.init(o_data, o_shape, o_strides) 18 | o.assign(x.llt_matrixL()) 19 | 20 | @cython.boundscheck(False) 21 | @cython.wraparound(False) 22 | cdef nonint_dtype[:, :] cholesky(nonint_dtype[:, :] x, nonint_dtype[:, :] out = None) nogil: 23 | cdef MMSDispatcher[nonint_dtype] dispatcher 24 | if out is None: 25 | out = matrix(x.shape[0], x.shape[1], &x[0, 0]) 26 | # we pass dummy scalar so that we can reuse MMSDispatcher 27 | dispatcher.run(&x[0, 0], x.shape, x.strides, &out[0, 0], out.shape, out.strides, 0, 28 | cholesky_worker, cholesky_worker, cholesky_worker, 29 | cholesky_worker, cholesky_worker, cholesky_worker, 30 | cholesky_worker, cholesky_worker, cholesky_worker) 31 | return out 32 | -------------------------------------------------------------------------------- /ceygen/lu.pxd: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from dtype cimport dtype, nonint_dtype 4 | 5 | 6 | cdef nonint_dtype[:, :] inv(nonint_dtype[:, :] x, nonint_dtype[:, :] out = *) nogil 7 | cdef bint iinv(nonint_dtype[:, :] x) nogil except False 8 | 9 | cdef nonint_dtype det(nonint_dtype[:, :] x) nogil except * 10 | -------------------------------------------------------------------------------- /ceygen/lu.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | cimport cython 4 | 5 | from eigen_cython cimport * 6 | from dispatch cimport * 7 | from dtype cimport matrix 8 | 9 | 10 | cdef void inv_worker( 11 | nonint_dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 12 | nonint_dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy, 13 | nonint_dtype *y) nogil: 14 | cdef MatrixMap[nonint_dtype, XMatrixContiguity] x 15 | cdef MatrixMap[nonint_dtype, OMatrixContiguity] o 16 | x.init(x_data, x_shape, x_strides) 17 | o.init(o_data, o_shape, o_strides) 18 | o.assign_inverse(x) 19 | 20 | @cython.boundscheck(False) 21 | @cython.wraparound(False) 22 | cdef nonint_dtype[:, :] inv(nonint_dtype[:, :] x, nonint_dtype[:, :] out = None) nogil: 23 | cdef MMSDispatcher[nonint_dtype] dispatcher 24 | if out is None: 25 | out = matrix(x.shape[0], x.shape[1], &x[0, 0]) 26 | # we pass dummy scalar so that we can reuse MMSDispatcher 27 | dispatcher.run(&x[0, 0], x.shape, x.strides, &out[0, 0], out.shape, out.strides, 0, 28 | inv_worker, inv_worker, inv_worker, 29 | inv_worker, inv_worker, inv_worker, 30 | inv_worker, inv_worker, inv_worker) 31 | return out 32 | 33 | 34 | cdef void iinv_worker( 35 | nonint_dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 36 | nonint_dtype *o) nogil: 37 | cdef MatrixMap[nonint_dtype, XMatrixContiguity] x 38 | x.init(x_data, x_shape, x_strides) 39 | # why this doesn't exhibit aliasing problems? 40 | x.assign_inverse(x) 41 | 42 | @cython.boundscheck(False) 43 | @cython.wraparound(False) 44 | cdef bint iinv(nonint_dtype[:, :] x) nogil except False: 45 | cdef MSDispatcher[nonint_dtype] dispatcher 46 | # we pass dummy scalar so that we can reuse MSDispatcher 47 | dispatcher.run(&x[0, 0], x.shape, x.strides, 0, 48 | iinv_worker, iinv_worker, iinv_worker) 49 | return True 50 | 51 | 52 | cdef void det_worker( 53 | nonint_dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 54 | nonint_dtype *o) nogil: 55 | cdef MatrixMap[nonint_dtype, XMatrixContiguity] x 56 | x.init(x_data, x_shape, x_strides) 57 | o[0] = x.determinant() 58 | 59 | @cython.boundscheck(False) 60 | @cython.wraparound(False) 61 | cdef nonint_dtype det(nonint_dtype[:, :] x) nogil except *: 62 | cdef MSDispatcher[nonint_dtype] dispatcher 63 | cdef nonint_dtype out 64 | dispatcher.run(&x[0, 0], x.shape, x.strides, &out, det_worker, det_worker, det_worker) 65 | return out 66 | -------------------------------------------------------------------------------- /ceygen/reductions.pxd: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from dtype cimport dtype 4 | 5 | 6 | cdef dtype sum_v(dtype[:] x) nogil except * 7 | cdef dtype sum_m(dtype[:, :] x) nogil except * 8 | cdef dtype[:] rowwise_sum(dtype[:, :] x, dtype[:] out = *) nogil 9 | cdef dtype[:] colwise_sum(dtype[:, :] x, dtype[:] out = *) nogil 10 | -------------------------------------------------------------------------------- /ceygen/reductions.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | cimport cython 4 | 5 | from eigen_cython cimport * 6 | from dispatch cimport * 7 | from dtype cimport vector 8 | 9 | 10 | cdef void sum_v_worker( 11 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XVectorContiguity x_dummy, 12 | dtype *o) nogil: 13 | cdef Array1DMap[dtype, XVectorContiguity] x 14 | x.init(x_data, x_shape, x_strides) 15 | o[0] = x.sum() 16 | 17 | @cython.boundscheck(False) 18 | @cython.wraparound(False) 19 | cdef dtype sum_v(dtype[:] x) nogil except *: 20 | cdef VSDispatcher[dtype] dispatcher 21 | cdef dtype out 22 | dispatcher.run(&x[0], x.shape, x.strides, &out, sum_v_worker, sum_v_worker) 23 | return out 24 | 25 | 26 | cdef void sum_m_worker( 27 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 28 | dtype *o) nogil: 29 | cdef Array2DMap[dtype, XMatrixContiguity] x 30 | x.init(x_data, x_shape, x_strides) 31 | o[0] = x.sum() 32 | 33 | @cython.boundscheck(False) 34 | @cython.wraparound(False) 35 | cdef dtype sum_m(dtype[:, :] x) nogil except *: 36 | cdef MSDispatcher[dtype] dispatcher 37 | cdef dtype out 38 | dispatcher.run(&x[0, 0], x.shape, x.strides, &out, sum_m_worker, sum_m_worker, sum_m_worker) 39 | return out 40 | 41 | 42 | cdef void rowwise_sum_worker( 43 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 44 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy) nogil: 45 | cdef Array2DMap[dtype, XMatrixContiguity] x 46 | cdef Array1DMap[dtype, OVectorContiguity] o 47 | x.init(x_data, x_shape, x_strides) 48 | o.init(o_data, o_shape, o_strides) 49 | o.assign(x.rowwise_sum()) 50 | 51 | @cython.boundscheck(False) 52 | @cython.wraparound(False) 53 | cdef dtype[:] rowwise_sum(dtype[:, :] x, dtype[:] out = None) nogil: 54 | cdef MVDispatcher[dtype] dispatcher 55 | if out is None: 56 | out = vector(x.shape[0], &x[0, 0]) 57 | dispatcher.run(&x[0, 0], x.shape, x.strides, &out[0], out.shape, out.strides, 58 | rowwise_sum_worker, rowwise_sum_worker, rowwise_sum_worker, 59 | rowwise_sum_worker, rowwise_sum_worker, rowwise_sum_worker) 60 | return out 61 | 62 | 63 | cdef void colwise_sum_worker( 64 | dtype *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 65 | dtype *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OVectorContiguity o_dummy) nogil: 66 | cdef Array2DMap[dtype, XMatrixContiguity] x 67 | cdef Array1DMap[dtype, OVectorContiguity] o 68 | x.init(x_data, x_shape, x_strides) 69 | o.init(o_data, o_shape, o_strides) 70 | o.assign(x.colwise_sum()) 71 | 72 | @cython.boundscheck(False) 73 | @cython.wraparound(False) 74 | cdef dtype[:] colwise_sum(dtype[:, :] x, dtype[:] out = None) nogil: 75 | cdef MVDispatcher[dtype] dispatcher 76 | if out is None: 77 | out = vector(x.shape[1], &x[0, 0]) 78 | dispatcher.run(&x[0, 0], x.shape, x.strides, &out[0], out.shape, out.strides, 79 | colwise_sum_worker, colwise_sum_worker, colwise_sum_worker, 80 | colwise_sum_worker, colwise_sum_worker, colwise_sum_worker) 81 | return out 82 | -------------------------------------------------------------------------------- /ceygen/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Ceygen's tests""" 4 | 5 | from ceygen.tests.bench import * 6 | from ceygen.tests.test_core import * 7 | from ceygen.tests.test_dispatch import * 8 | from ceygen.tests.test_dtype import * 9 | from ceygen.tests.test_elemwise import * 10 | from ceygen.tests.test_lu import * 11 | from ceygen.tests.test_llt import * 12 | from ceygen.tests.test_reductions import * 13 | -------------------------------------------------------------------------------- /ceygen/tests/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Ceygen test-suite runner. Used when user calls `python -m ceygen.tests""" 4 | 5 | import unittest as ut 6 | 7 | from ceygen.tests import * 8 | 9 | 10 | if __name__ == '__main__': 11 | ut.main() 12 | -------------------------------------------------------------------------------- /ceygen/tests/bench.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from cython.parallel cimport prange 4 | 5 | import numpy as np 6 | cdef object np_dot = np.dot 7 | cdef object np_add = np.add 8 | cdef object np_multiply = np.multiply 9 | cdef object np_det = np.linalg.det 10 | 11 | import os 12 | import pickle 13 | import subprocess 14 | from time import time 15 | 16 | from support import CeygenTestCase, benchmark 17 | cimport ceygen.core as c 18 | cimport ceygen.elemwise as e 19 | cimport ceygen.lu as lu 20 | 21 | 22 | class timeit: 23 | """Simple context manager to time interations of the block inside""" 24 | 25 | def __init__(self, func, implementation, args): 26 | self.func, self.iterations, self.cost = func, args['iterations'], args['cost'] 27 | self.stats = args['self'].stats 28 | self.percall = args['self'].percall 29 | self.execute = True 30 | if implementation.startswith('numpy') and 'BENCHMARK_NUMPY' not in os.environ: 31 | self.execute = False 32 | self.implementation = implementation.rjust(args['self'].align) 33 | 34 | def __enter__(self): 35 | self.elapsed = time() 36 | return self 37 | 38 | def __exit__(self, exc_type, exc_value, tb): 39 | self.elapsed = time() - self.elapsed 40 | if self.execute: 41 | percall = self.elapsed/self.iterations 42 | gflops = self.cost/percall/10.**9 43 | print "{0}: {1:.2e}s per call, {2:.3f}s total, {3:5.2f} GFLOPS".format( 44 | self.implementation, percall, self.elapsed, gflops) 45 | if isinstance(self.stats, dict): 46 | key = self.func + '.' + self.implementation.strip() 47 | if key not in self.stats: 48 | self.stats[key] = [] 49 | self.percall[key] = [] 50 | self.stats[key].append(gflops) 51 | self.percall[key].append(percall) 52 | else: 53 | assert self.elapsed < 0.01 54 | return False # let the exceptions fall through 55 | 56 | 57 | class Bench(CeygenTestCase): 58 | 59 | align = 8 60 | 61 | def setUp(self): 62 | self.sizes = (2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024) 63 | if 'SAVE' in os.environ: 64 | self.stats = {} 65 | self.percall = {} 66 | else: 67 | self.stats = None 68 | self.percall = None 69 | 70 | def tearDown(self): 71 | if self.stats: 72 | for (func, stats) in self.stats.iteritems(): 73 | percall = self.percall[func] 74 | filename = func 75 | filename += b'-' + subprocess.check_output(['git', 'describe', '--dirty']).strip() 76 | filename += b'.pickle' 77 | with open(filename, 'wb') as f: 78 | pickle.dump({'sizes': self.sizes, 'stats': stats, 'percall': percall}, f) 79 | print "Saved stats to {0}".format(filename) 80 | 81 | # core module 82 | 83 | @benchmark 84 | def test_bench_dot_vv(self): 85 | print 86 | cdef int iterations 87 | cdef double[:] x 88 | 89 | for size in self.sizes: 90 | x_np = np.random.rand(size) 91 | x = x_np 92 | 93 | cost = 2. * size 94 | iterations = min(1.0 * 10.**9 / cost, 1000000) 95 | print "size: {0}, iterations: {1}".format(size, iterations) 96 | 97 | with timeit(b"dot_vv", "ceygen", locals()) as context: 98 | if context.execute: 99 | for i in range(iterations): 100 | c.dot_vv(x, x) 101 | 102 | @benchmark 103 | def test_bench_dot_mv(self): 104 | print 105 | cdef int iterations 106 | cdef double[:, :] x 107 | cdef double[:] y, out 108 | 109 | for size in self.sizes: 110 | x_np = np.random.rand(size, size) 111 | y_np = np.random.rand(size) 112 | out_np = np.empty(size) 113 | x, y, out = x_np, y_np, out_np 114 | 115 | cost = 2. * size**2. 116 | iterations = min(0.5 * 10.**9. / cost, 1000000) 117 | print "size: {0}, iterations: {1}".format(size, iterations) 118 | 119 | with timeit(b"dot_mv", "numpy", locals()) as context: 120 | if context.execute: 121 | for i in range(iterations): 122 | np_dot(x_np, y_np, out_np) 123 | with timeit(b"dot_mv", "ceygen", locals()) as context: 124 | if context.execute: 125 | for i in range(iterations): 126 | c.dot_mv(x, y, out) 127 | 128 | @benchmark 129 | def test_bench_dot_mv_noout(self): 130 | print 131 | cdef int iterations 132 | cdef double[:, :] x 133 | cdef double[:] y 134 | 135 | for size in self.sizes: 136 | x_np = np.random.rand(size, size) 137 | y_np = np.random.rand(size) 138 | x, y = x_np, y_np 139 | 140 | cost = 2. * size**2. 141 | iterations = min(0.5 * 10.**9. / cost, 1000000) 142 | print "size: {0}, iterations: {1}".format(size, iterations) 143 | 144 | with timeit(b"dot_mv_noout", "numpy", locals()) as context: 145 | if context.execute: 146 | for i in range(iterations): 147 | np_dot(x_np, y_np) 148 | with timeit(b"dot_mv_noout", "ceygen", locals()) as context: 149 | if context.execute: 150 | for i in range(iterations): 151 | c.dot_mv(x, y) 152 | 153 | @benchmark 154 | def test_bench_dot_vm(self): 155 | print 156 | cdef int iterations 157 | cdef double[:] x, out 158 | cdef double[:, :] y 159 | 160 | for size in self.sizes: 161 | x_np = np.random.rand(size) 162 | y_np = np.random.rand(size, size) 163 | out_np = np.empty(size) 164 | x, y, out = x_np, y_np, out_np 165 | 166 | cost = 2. * size**2. 167 | iterations = min(0.5 * 10.**9. / cost, 1000000) 168 | print "size: {0}, iterations: {1}".format(size, iterations) 169 | 170 | with timeit(b"dot_vm", "numpy", locals()) as context: 171 | if context.execute: 172 | for i in range(iterations): 173 | np_dot(x_np, y_np, out_np) 174 | with timeit(b"dot_vm", "ceygen", locals()) as context: 175 | if context.execute: 176 | for i in range(iterations): 177 | c.dot_vm(x, y, out) 178 | 179 | @benchmark 180 | def test_bench_dot_mm(self): 181 | print 182 | cdef int iterations 183 | cdef double[:, :] x, y, out 184 | 185 | for size in self.sizes: 186 | x_np = np.random.rand(size, size) 187 | y_np = np.random.rand(size, size) 188 | out_np = np.empty((size, size)) 189 | x, y, out = x_np, y_np, out_np 190 | 191 | cost = 2. * size**3. 192 | iterations = min(max(2. * 10.**9. / cost, 1), 1000000) 193 | print "size: {0}*{0}, iterations: {1}".format(size, iterations) 194 | 195 | with timeit(b"dot_mm", "numpy", locals()) as context: 196 | if context.execute: 197 | for i in range(iterations): 198 | np_dot(x_np, y_np, out_np) 199 | with timeit(b"dot_mm", "ceygen", locals()) as context: 200 | if context.execute: 201 | for i in range(iterations): 202 | c.dot_mm(x, y, out) 203 | 204 | # elemwise module 205 | 206 | @benchmark 207 | def test_bench_multiply_vs(self): 208 | print 209 | cdef int iterations 210 | cdef double[:] x, out 211 | 212 | for size in self.sizes: 213 | x_np = np.random.rand(size) 214 | out_np = np.empty(size) 215 | x, out = x_np, out_np 216 | 217 | cost = size 218 | iterations = min(0.25 * 10.**9 / cost, 1000000) 219 | print "size: {0}, iterations: {1}".format(size, iterations) 220 | 221 | with timeit(b"multiply_vs", "ceygen", locals()) as context: 222 | if context.execute: 223 | for i in range(iterations): 224 | e.multiply_vs(x, 12., out) 225 | 226 | @benchmark 227 | def test_bench_add_vv(self): 228 | print 229 | cdef int iterations 230 | cdef double[:] x, out 231 | 232 | for size in self.sizes: 233 | x_np = np.random.rand(size) 234 | out_np = np.empty(size) 235 | x, out = x_np, out_np 236 | 237 | cost = size 238 | iterations = min(0.25 * 10.**9 / cost, 1000000) 239 | print "size: {0}, iterations: {1}".format(size, iterations) 240 | 241 | with timeit(b"add_vv", "numpy", locals()) as context: 242 | if context.execute: 243 | for i in range(iterations): 244 | np_add(x_np, x_np, out_np) 245 | with timeit(b"add_vv", "ceygen", locals()) as context: 246 | if context.execute: 247 | for i in range(iterations): 248 | e.add_vv(x, x, out) 249 | 250 | @benchmark 251 | def test_bench_add_ms(self): 252 | print 253 | cdef int iterations 254 | cdef double[:, :] x, out 255 | 256 | for size in self.sizes: 257 | x_np = np.random.rand(size, size) 258 | out_np = np.empty((size, size)) 259 | x, out = x_np, out_np 260 | 261 | cost = size**2. 262 | iterations = min(0.25 * 10.**9 / cost, 1000000) 263 | print "size: {0}*{0}, iterations: {1}".format(size, iterations) 264 | 265 | with timeit(b"add_ms", "ceygen", locals()) as context: 266 | if context.execute: 267 | for i in range(iterations): 268 | e.add_ms(x, -11., out) 269 | 270 | @benchmark 271 | def test_bench_multiply_mm(self): 272 | print 273 | cdef int iterations 274 | cdef double[:, :] x, out 275 | 276 | for size in self.sizes: 277 | x_np = np.random.rand(size, size) 278 | out_np = np.empty((size, size)) 279 | x, out = x_np, out_np 280 | 281 | cost = size**2. 282 | iterations = min(0.25 * 10.**9 / cost, 1000000) 283 | print "size: {0}*{0}, iterations: {1}".format(size, iterations) 284 | 285 | with timeit(b"multiply_mm", "numpy", locals()) as context: 286 | if context.execute: 287 | for i in range(iterations): 288 | np_multiply(x_np, x_np, out_np) 289 | with timeit(b"multiply_mm", "ceygen", locals()) as context: 290 | if context.execute: 291 | for i in range(iterations): 292 | e.multiply_mm(x, x, out) 293 | 294 | # lu module 295 | 296 | @benchmark 297 | def test_bench_inv(self): 298 | print 299 | cdef int iterations 300 | cdef double[:, :] x, out 301 | 302 | for size in self.sizes: 303 | x_np = np.random.rand(size, size) 304 | out_np = np.empty((size, size)) 305 | x, out = x_np, out_np 306 | 307 | cost = size**3. # 2/3 * n^3 floating point operations, but additional logic operations 308 | iterations = min(max(0.25 * 10.**9. / cost, 1), 1000000) 309 | print "size: {0}*{0}, iterations: {1}".format(size, iterations) 310 | 311 | with timeit(b"inv", "ceygen", locals()) as context: 312 | if context.execute: 313 | for i in range(iterations): 314 | lu.inv(x, out) 315 | 316 | @benchmark 317 | def test_bench_iinv(self): 318 | print 319 | cdef int iterations 320 | cdef double[:, :] x 321 | 322 | for size in self.sizes: 323 | x_np = np.random.rand(size, size) 324 | x = x_np 325 | 326 | cost = size**3. # 2/3 * n^3 floating point operations, but additional logic operations 327 | iterations = min(max(0.25 * 10.**9. / cost, 1), 1000000) 328 | print "size: {0}*{0}, iterations: {1}".format(size, iterations) 329 | 330 | with timeit(b"iinv", "ceygen", locals()) as context: 331 | if context.execute: 332 | for i in range(iterations): 333 | lu.iinv(x) 334 | 335 | @benchmark 336 | def test_bench_det(self): 337 | print 338 | cdef int i, iterations 339 | cdef double[:, :] x 340 | from multiprocessing import cpu_count, Pool 341 | 342 | for size in self.sizes: 343 | x_np = np.random.rand(size, size) 344 | x = x_np 345 | 346 | cost = size**3. 347 | iterations = 4 * min(max(int(0.25 * 10.**9. / cost), 1), 250000) 348 | print "size: {0}*{0}, iterations: {1}".format(size, iterations) 349 | origalign = self.align 350 | self.align = 17 351 | 352 | with timeit(b"det", "numpy", locals()) as context: 353 | if context.execute: 354 | for i in range(iterations): 355 | np_det(x_np) 356 | 357 | with timeit(b"det", "ceygen", locals()) as context: 358 | if context.execute: 359 | for i in range(iterations): 360 | lu.det(x) 361 | 362 | with timeit(b"det", "numpy parallel", locals()) as context: 363 | if context.execute: 364 | pool = Pool(processes=cpu_count()) # TODO: actual number of cores 365 | pool.map(np_det, (x_np for i in range(iterations))) 366 | 367 | with timeit(b"det", "ceygen parallel", locals()) as context: 368 | if context.execute: 369 | for i in prange(iterations, nogil=True): 370 | lu.det(x) 371 | 372 | self.align = origalign 373 | -------------------------------------------------------------------------------- /ceygen/tests/support.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Various support methods for tests""" 4 | 5 | import numpy as np 6 | 7 | import functools 8 | import os 9 | import unittest as ut 10 | # Python 2.6 compatibility 11 | try: 12 | from unittest import skip, skipIf, skipUnless 13 | except ImportError: 14 | skip = None 15 | 16 | from ceygen.core import set_is_malloc_allowed, eigen_version 17 | 18 | 19 | class _AssertRaisesContext(object): 20 | """A context manager used to implement TestCase.assertRaises method, stolen from Python 2.7""" 21 | 22 | def __init__(self, expected, test_case): 23 | self.expected = expected 24 | self.failureException = test_case.failureException 25 | 26 | def __enter__(self): 27 | return self 28 | 29 | def __exit__(self, exc_type, exc_value, tb): 30 | if exc_type is None: 31 | try: 32 | exc_name = self.expected.__name__ 33 | except AttributeError: 34 | exc_name = str(self.expected) 35 | raise self.failureException( 36 | "{0} not raised".format(exc_name)) 37 | if not issubclass(exc_type, self.expected): 38 | # let unexpected exceptions pass through 39 | return False 40 | self.exception = exc_value # store for later retrieval 41 | return True 42 | 43 | 44 | class CeygenTestCase(ut.TestCase): 45 | """Test case that adds some numeric assert functions""" 46 | 47 | def assertApproxEqual(self, X, Y): 48 | """Return true if X = Y to within machine precision 49 | 50 | Function for checking that different matrices from different 51 | computations are in some sense "equal" in the verification tests. 52 | """ 53 | X = np.asarray(X) 54 | Y = np.asarray(Y) 55 | fuzz = 1.0e-8 56 | self.assertEqual(X.ndim, Y.ndim) 57 | self.assertEqual(X.shape, Y.shape) 58 | 59 | if np.all(abs(X - Y) < fuzz): 60 | return 61 | else: 62 | self.fail("NumPy arrays {0} and {1} are not fuzzy equal (+- {2})".format(X, Y, fuzz)) 63 | 64 | def assertRaises(self, excClass, callableObj=None, *args, **kwargs): 65 | """Python 2.6 doesn't support with assertRaises(Exception): syntax, steal it from 2.7""" 66 | context = _AssertRaisesContext(excClass, self) 67 | if callableObj is None: 68 | return context 69 | with context: 70 | callableObj(*args, **kwargs) 71 | 72 | 73 | class NoMallocTestCase(CeygenTestCase): 74 | """ 75 | CeygenTestCase sublass that by default runs with Eigen memory allocation disallowed. 76 | 77 | Use "with malloc_allowed:" context manager to suppress it temporarily 78 | """ 79 | 80 | def setUp(self): 81 | # assure that no heap memory allocation in Eigen happens during this test class 82 | set_is_malloc_allowed(False) 83 | 84 | def tearDown(self): 85 | set_is_malloc_allowed(True) 86 | 87 | class malloc_allowed: 88 | """Context manager to write with malloc_allowed: ... and be sure that after execution 89 | the state is reset to diwallowed no matter whether exception occured""" 90 | 91 | def __enter__(self): 92 | set_is_malloc_allowed(True) 93 | return self 94 | 95 | def __exit__(self, exc_type, exc_value, tb): 96 | set_is_malloc_allowed(False) 97 | return False # let the exceptions fall through 98 | 99 | 100 | def _id(obj): 101 | return obj 102 | 103 | if skip is None: 104 | def skip(reason): 105 | """Implementation of the @skip decorator from Python 2.7 for Python 2.6""" 106 | def decorator(func): 107 | @functools.wraps(func) 108 | def wrapper(*args, **kwargs): 109 | pass 110 | origdoc = wrapper.__doc__ or wrapper.__name__ 111 | wrapper.__doc__ = wrapper.__name__ + " [skipped '{0}']".format(reason) 112 | return wrapper 113 | return decorator 114 | 115 | def skipIf(condition, reason): 116 | """Implementation of the @skipIf decorator from Python 2.7 for Python 2.6""" 117 | if condition: 118 | return skip(reason) 119 | return _id 120 | 121 | def skipUnless(condition, reason): 122 | """Implementation of the @skipUnless decorator from Python 2.7 for Python 2.6""" 123 | if not condition: 124 | return skip(reason) 125 | return _id 126 | 127 | def skipIfEigenOlderThan(world, major, minor): 128 | ev = eigen_version() 129 | reason = 'because this test only passes with Eigen >= {0}.{1}.{2}'.format(world, major, minor) 130 | reason += ', but tested Ceygen was compiled against {0}.{1}.{2}'.format(ev[0], ev[1], ev[2]) 131 | for (expected, actual) in zip((world, major, minor), ev): 132 | if actual < expected: 133 | return skip(reason) 134 | if actual > expected: # strictly greater, don't check more minor versions 135 | return _id 136 | # else check more minor version 137 | return _id # actual == expected 138 | 139 | def benchmark(func): 140 | """Decorator to mark functions as benchmarks so that they aren't run by default""" 141 | reason = 'because neither BENCHMARK or BENCHMARK_NUMPY environment variable is set' 142 | return skipUnless('BENCHMARK' in os.environ or 'BENCHMARK_NUMPY' in os.environ, reason)(func) 143 | -------------------------------------------------------------------------------- /ceygen/tests/test_core.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from support import NoMallocTestCase, malloc_allowed 6 | cimport ceygen.core as c 7 | 8 | 9 | class TestCore(NoMallocTestCase): 10 | 11 | def test_from_readme(self): 12 | cdef double[:, :] big = np.array([[1., 2., 2., 0., 0., 0.], 13 | [3., 4., 0., -2., 0., 0.]]) 14 | self.assertApproxEqual(c.dot_mm(big[:, 0:2], big[:, 2:4], big[:, 4:6]), [[2., -4.], [6., -8.]]) 15 | self.assertApproxEqual(big, [[1., 2., 2., 0., 2., -4.], 16 | [3., 4., 0., -2., 6., -8.]]) 17 | # TODO: the following line makes Python crash - bug in cython? 18 | #self.assertApproxEqual(c.dot_mm(big[:, 0:2].T, big[:, 2:4], big[:, 4:6]), [[2., -6.], [4., -8.]]) 19 | 20 | def test_eigen_version(self): 21 | vers = c.eigen_version() 22 | self.assertTrue(isinstance(vers, tuple)) 23 | self.assertEqual(len(vers), 3) 24 | self.assertTrue(isinstance(vers[0], int)) 25 | self.assertTrue(isinstance(vers[1], int)) 26 | self.assertTrue(isinstance(vers[2], int)) 27 | 28 | def test_dot_vv(self): 29 | x_np = np.array([1., 2., 3.]) 30 | y_np = np.array([4., 5., 6.]) 31 | self.assertAlmostEqual(c.dot_vv[double](x_np, y_np), 32.) 32 | cdef double[:] x = x_np 33 | cdef double[:] y = y_np 34 | self.assertAlmostEqual(c.dot_vv(x, y), 32.) 35 | 36 | def test_dot_vv_strides(self): 37 | x = np.array([[1., 2.], [3., 4.]]) 38 | e1 = np.array([1., 0.]) 39 | e2 = np.array([0., 1.]) 40 | 41 | self.assertAlmostEqual(c.dot_vv[double](x[0, :], e1), 1.) 42 | self.assertAlmostEqual(c.dot_vv[double](x[0, :], e2), 2.) 43 | self.assertAlmostEqual(c.dot_vv[double](x[1, :], e1), 3.) 44 | self.assertAlmostEqual(c.dot_vv[double](x[1, :], e2), 4.) 45 | 46 | self.assertAlmostEqual(c.dot_vv[double](x[:, 0], e1), 1.) 47 | self.assertAlmostEqual(c.dot_vv[double](x[:, 0], e2), 3.) 48 | self.assertAlmostEqual(c.dot_vv[double](x[:, 1], e1), 2.) 49 | self.assertAlmostEqual(c.dot_vv[double](x[:, 1], e2), 4.) 50 | 51 | def test_dot_vv_baddims(self): 52 | x = np.array([1., 2., 3.]) 53 | y = np.array([4., 5.]) 54 | z = np.array([[1., 2.], [3., 4.]]) 55 | def dot_vv(x, y): 56 | # wrap up because c.dot_vv is cython-only (not callable from Python) 57 | return c.dot_vv[double](x, y) 58 | 59 | self.assertRaises(ValueError, dot_vv, x, y) 60 | self.assertRaises(ValueError, dot_vv, x, z) 61 | 62 | def test_dot_vv_none(self): 63 | x = np.array([1., 2., 3.]) 64 | def dot_vv(x, y): 65 | return c.dot_vv[double](x, y) 66 | self.assertRaises(ValueError, dot_vv, x, None) 67 | self.assertRaises(TypeError, dot_vv, x, [1., 2., 3.]) 68 | self.assertRaises(ValueError, dot_vv, None, x) 69 | self.assertRaises(TypeError, dot_vv, [1., 2., 3.], x) 70 | 71 | 72 | def test_dot_mv(self): 73 | x_np = np.array([[1., 2., 3.], [3., 2., 1.]]) 74 | y_np = np.array([4., 5., 6.]) 75 | self.assertApproxEqual(c.dot_mv[double](x_np, y_np), np.array([32., 28.])) 76 | self.assertApproxEqual(c.dot_mv[double](x_np, y_np, None), np.array([32., 28.])) 77 | out_np = np.zeros(2) 78 | out2_np = c.dot_mv[double](x_np, y_np, out_np) 79 | self.assertApproxEqual(out_np, np.array([32., 28.])) # test that it actually uses out 80 | self.assertApproxEqual(out2_np, np.array([32., 28.])) 81 | 82 | cdef double[:, :] x = x_np 83 | cdef double[:] y = y_np 84 | self.assertApproxEqual(c.dot_mv(x, y), np.array([32., 28.])) 85 | cdef double[:] out = out_np 86 | cdef double[:] out2 = c.dot_mv(x, y, out) 87 | self.assertApproxEqual(out, np.array([32., 28.])) # test that it actually uses out 88 | self.assertApproxEqual(out2, np.array([32., 28.])) 89 | 90 | def test_dot_mv_transposed(self): 91 | x_np = np.array([[1., 2., 3.], [3., 2., 1.]]) 92 | y_np = np.array([4., 5.]) 93 | self.assertApproxEqual(c.dot_mv[double](x_np.T, y_np), np.array([19., 18., 17.])) 94 | 95 | def test_dot_mv_strides(self): 96 | big = np.array([[[1., 2.], [3., 4.]], 97 | [[5., 6.], [7., 8.]]]) 98 | e1 = np.array([1., 0.]) 99 | e2 = np.array([0., 1.]) 100 | self.assertApproxEqual(c.dot_mv[double](big[0, :, :], e1), np.array([1., 3.])) 101 | self.assertApproxEqual(c.dot_mv[double](big[0, :, :], e2), np.array([2., 4.])) 102 | self.assertApproxEqual(c.dot_mv[double](big[1, :, :], e1), np.array([5., 7.])) 103 | self.assertApproxEqual(c.dot_mv[double](big[1, :, :], e2), np.array([6., 8.])) 104 | 105 | self.assertApproxEqual(c.dot_mv[double](big[:, 0, :], e1), np.array([1., 5.])) 106 | self.assertApproxEqual(c.dot_mv[double](big[:, 0, :], e2), np.array([2., 6.])) 107 | self.assertApproxEqual(c.dot_mv[double](big[:, 1, :], e1), np.array([3., 7.])) 108 | self.assertApproxEqual(c.dot_mv[double](big[:, 1, :], e2), np.array([4., 8.])) 109 | 110 | self.assertApproxEqual(c.dot_mv[double](big[:, :, 0], e1), np.array([1., 5.])) 111 | self.assertApproxEqual(c.dot_mv[double](big[:, :, 0], e2), np.array([3., 7.])) 112 | self.assertApproxEqual(c.dot_mv[double](big[:, :, 1], e1), np.array([2., 6.])) 113 | self.assertApproxEqual(c.dot_mv[double](big[:, :, 1], e2), np.array([4., 8.])) 114 | 115 | def test_dot_mv_baddims(self): 116 | def dot_mv(x, y, out=None): 117 | return c.dot_mv[double](x, y, out) 118 | X = np.array([[1., 2., 3.],[2., 3., 4.]]) 119 | y = np.array([1., 2., 3.]) 120 | self.assertRaises(ValueError, dot_mv, np.array([1., 2.]), np.array([1., 2.])) 121 | self.assertRaises(ValueError, dot_mv, X, np.array([1., 2.])) 122 | self.assertRaises(ValueError, dot_mv, X, np.array([1.])) 123 | self.assertRaises(ValueError, dot_mv, X.T, y) 124 | 125 | # good x, y dims, but bad out dims 126 | self.assertRaises(ValueError, dot_mv, X, y, np.zeros(1)) 127 | self.assertRaises(ValueError, dot_mv, X, y, np.zeros(3)) 128 | 129 | def test_dot_mv_none(self): 130 | x, y, out = np.array([[3.]]), np.array([2.]), np.zeros(1) 131 | for X in (x, None): 132 | for Y in (y, None): 133 | for out in (None, out): 134 | if X is x and Y is y: 135 | continue # this case would be valid 136 | try: 137 | c.dot_mv[double](X, Y, out) 138 | except ValueError: 139 | pass 140 | else: 141 | self.fail("ValueError was not raised (X={0}, Y={1}, out={2}".format(X, Y, out)) 142 | 143 | 144 | def test_dot_vm(self): 145 | x_np = np.array([4., 5.]) 146 | y_np = np.array([[1., 2., 3.], [3., 2., 1.]]) 147 | expected = np.array([19., 18., 17.]) 148 | self.assertApproxEqual(c.dot_vm[double](x_np, y_np), expected) 149 | self.assertApproxEqual(c.dot_vm[double](x_np, y_np, None), expected) 150 | out_np = np.zeros(3) 151 | out2_np = c.dot_vm[double](x_np, y_np, out_np) 152 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 153 | self.assertApproxEqual(out2_np, expected) 154 | 155 | cdef double[:] x = x_np 156 | cdef double[:, :] y = y_np 157 | self.assertApproxEqual(c.dot_vm(x, y), expected) 158 | cdef double[:] out = out_np 159 | cdef double[:] out2 = c.dot_vm(x, y, out) 160 | self.assertApproxEqual(out, expected) # test that it actually uses out 161 | self.assertApproxEqual(out2, expected) 162 | 163 | def test_dot_vm_transposed(self): 164 | x_np = np.array([4., 5., 6.]) 165 | y_np = np.array([[1., 2., 3.], [3., 2., 1.]]) 166 | self.assertApproxEqual(c.dot_vm[double](x_np, y_np.T), np.array([32., 28.])) 167 | 168 | def test_dot_vm_baddims(self): 169 | def dot_vm(x, y, out=None): 170 | return c.dot_vm[double](x, y, out) 171 | x = np.array([1., 2.]) 172 | y = np.array([[1., 2., 3.],[2., 3., 4.]]) 173 | self.assertRaises(ValueError, dot_vm, np.array([1., 2.]), np.array([1., 2.])) 174 | self.assertRaises(ValueError, dot_vm, x, np.array([[1., 2.], [2., 3.], [3., 4.]])) 175 | self.assertRaises(ValueError, dot_vm, np.array([1.]), y) 176 | self.assertRaises(ValueError, dot_vm, x, y.T) 177 | 178 | # good x, y dims, but bad out dims 179 | self.assertRaises(ValueError, dot_vm, x, y, np.zeros(1)) 180 | self.assertRaises(ValueError, dot_vm, x, y, np.zeros(2)) 181 | self.assertRaises(ValueError, dot_vm, x, y, np.zeros(4)) 182 | 183 | def test_dot_vm_none(self): 184 | x, y, out = np.array([3.]), np.array([[2.]]), np.zeros(1) 185 | for X in (x, None): 186 | for Y in (y, None): 187 | for out in (None, out): 188 | if X is x and Y is y: 189 | continue # this case would be valid 190 | try: 191 | c.dot_vm[double](X, Y, out) 192 | except ValueError: 193 | pass 194 | else: 195 | self.fail("ValueError was not raised (X={0}, Y={1}, out={2}".format(X, Y, out)) 196 | 197 | 198 | def test_dot_mm(self): 199 | x_np = np.array([[1., 2.], 200 | [3., 4.]]) 201 | y_np = np.array([[5., 6.], 202 | [7., 8.]]) 203 | expected = [ 204 | np.array([[19., 22.], [43., 50.]]), 205 | np.array([[26., 30.], [38., 44.]]), 206 | np.array([[17., 23.], [39., 53.]]), 207 | np.array([[23., 31.], [34., 46.]]) 208 | ] 209 | 210 | self.assertApproxEqual(c.dot_mm[double](x_np, y_np), expected[0]) 211 | self.assertApproxEqual(c.dot_mm[double](x_np.T, y_np), expected[1]) 212 | self.assertApproxEqual(c.dot_mm[double](x_np, y_np.T), expected[2]) 213 | self.assertApproxEqual(c.dot_mm[double](x_np.T, y_np.T), expected[3]) 214 | 215 | cdef double[:, :] x = x_np 216 | cdef double[:, :] y = y_np 217 | self.assertApproxEqual(c.dot_mm(x, y), expected[0]) 218 | self.assertApproxEqual(c.dot_mm(x.T, y), expected[1]) 219 | self.assertApproxEqual(c.dot_mm(x, y.T), expected[2]) 220 | self.assertApproxEqual(c.dot_mm(x.T, y.T), expected[3]) 221 | 222 | # test that it actually uses out 223 | out_np = np.empty((2, 2)) 224 | cdef double[:, :] out = out_np 225 | out2 = c.dot_mm(x, y, out) 226 | self.assertApproxEqual(out2, expected[0]) 227 | self.assertApproxEqual(out, expected[0]) 228 | self.assertApproxEqual(out_np, expected[0]) 229 | 230 | a_np = np.array([[1., 2., 3.], [4., 5., 6.]]) 231 | b_np = np.array([[1.], [2.], [3.]]) 232 | self.assertApproxEqual(c.dot_mm[double](a_np, b_np), np.array([[14.], [32.]])) 233 | self.assertApproxEqual(c.dot_mm[double](b_np.T, a_np.T), np.array([[14., 32.]])) 234 | cdef double[:, :] a = a_np 235 | cdef double[:, :] b = b_np 236 | self.assertApproxEqual(c.dot_mm(a, b), np.array([[14.], [32.]])) 237 | self.assertApproxEqual(c.dot_mm(b.T, a.T), np.array([[14., 32.]])) 238 | 239 | def test_dot_mm_strides(self): 240 | big = np.array([[[1., 2.], [3., 4.]], 241 | [[5., 6.], [7., 8.]]]) 242 | eye = np.eye(2) 243 | 244 | # following are still C-contiguous: 245 | self.assertApproxEqual(c.dot_mm[double](big[0, :, :], eye), big[0, :, :]) 246 | self.assertApproxEqual(c.dot_mm[double](big[1, :, :], eye), big[1, :, :]) 247 | self.assertApproxEqual(c.dot_mm[double](big[:, 0, :], eye), big[:, 0, :]) 248 | self.assertApproxEqual(c.dot_mm[double](big[:, 1, :], eye), big[:, 1, :]) 249 | 250 | # following are Fortran-contiguous: 251 | self.assertApproxEqual(c.dot_mm[double](big[0, :, :].T, eye), big[0, :, :].T) 252 | self.assertApproxEqual(c.dot_mm[double](big[1, :, :].T, eye), big[1, :, :].T) 253 | self.assertApproxEqual(c.dot_mm[double](big[:, 0, :].T, eye), big[:, 0, :].T) 254 | self.assertApproxEqual(c.dot_mm[double](big[:, 1, :].T, eye), big[:, 1, :].T) 255 | 256 | # actually test that our infractructure is capable of detecting memory allocations 257 | for myslice in (big[:, :, 0], big[:, :, 1], big[:, :, 1].T, big[:, :, 1].T): 258 | with self.assertRaises(ValueError): 259 | c.dot_mm[double](myslice, eye) 260 | 261 | # non-contiguous slices in dot_mm cause memory allocations in Eigen, expect it: 262 | with malloc_allowed(): 263 | self.assertApproxEqual(c.dot_mm[double](big[:, :, 0], eye), big[:, :, 0]) 264 | self.assertApproxEqual(c.dot_mm[double](big[:, :, 1], eye), big[:, :, 1]) 265 | self.assertApproxEqual(c.dot_mm[double](big[:, :, 0].T, eye), big[:, :, 0].T) 266 | self.assertApproxEqual(c.dot_mm[double](big[:, :, 1].T, eye), big[:, :, 1].T) 267 | 268 | # assert that we've reenabled assertions on memory allocations 269 | with self.assertRaises(ValueError): 270 | c.dot_mm[double](big[:, :, 0], eye) 271 | 272 | def test_dot_mm_baddims(self): 273 | x = np.array([[1., 2.], 274 | [3., 4.]]) 275 | y = np.array([[5., 6.], 276 | [7., 8.]]) 277 | out = np.empty((2, 2)) 278 | for X in (x, np.array([1., 2.]), np.array([[1.], [2.]]), np.array([[[1.]]])): 279 | for Y in (y, np.array([1., 2.]), np.array([[1.], [2.], [3.]]), np.array([[[1.]]])): 280 | for OUT in (out, None, np.empty((2,)), np.empty((2, 3)), np.empty((3, 2)), np.empty((2, 2, 1))): 281 | if X is x and Y is y and (OUT is out or OUT is None): 282 | continue # these would be valid 283 | try: 284 | c.dot_mm[double](X, Y, OUT) 285 | except ValueError: 286 | pass 287 | else: 288 | self.fail("ValueError was not raised (X={0}, Y={1}, OUT={2}".format(X, Y, OUT)) 289 | 290 | def test_dot_mm_none(self): 291 | x = np.array([[1., 2.], 292 | [3., 4.]]) 293 | y = np.array([[5., 6.], 294 | [7., 8.]]) 295 | out = np.empty((2, 2)) 296 | for X in (x, None): 297 | for Y in (y, None): 298 | for OUT in (out, None): 299 | if X is x and Y in y: 300 | continue # this would be valid 301 | try: 302 | c.dot_mm[double](X, Y, OUT) 303 | except ValueError: 304 | pass 305 | else: 306 | self.fail("ValueError was not raised (X={0}, Y={1}, OUT={2}".format(X, Y, OUT)) 307 | -------------------------------------------------------------------------------- /ceygen/tests/test_dispatch.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from support import CeygenTestCase 6 | from ceygen.dispatch cimport * 7 | 8 | 9 | globalstatus = '' 10 | 11 | cdef void as_func( 12 | double *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 13 | double *o) with gil: 14 | global globalstatus 15 | globalstatus = '' 16 | if XMatrixContiguity is CContig: 17 | globalstatus += 'C' 18 | if XMatrixContiguity is FContig: 19 | globalstatus += 'F' 20 | if XMatrixContiguity is NContig: 21 | globalstatus += 'N' 22 | 23 | cdef void aa_func( 24 | double *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 25 | double *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YMatrixContiguity y_dummy) with gil: 26 | global globalstatus 27 | globalstatus = '' 28 | if XMatrixContiguity is CContig: 29 | globalstatus += 'C' 30 | if XMatrixContiguity is FContig: 31 | globalstatus += 'F' 32 | if XMatrixContiguity is NContig: 33 | globalstatus += 'N' 34 | 35 | if YMatrixContiguity is CContig: 36 | globalstatus += 'C' 37 | if YMatrixContiguity is FContig: 38 | globalstatus += 'F' 39 | if YMatrixContiguity is NContig: 40 | globalstatus += 'N' 41 | 42 | cdef void aas_func( 43 | double *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 44 | double *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YMatrixContiguity y_dummy, 45 | double *o) with gil: 46 | global globalstatus 47 | globalstatus = '' 48 | if XMatrixContiguity is CContig: 49 | globalstatus += 'C' 50 | if XMatrixContiguity is FContig: 51 | globalstatus += 'F' 52 | if XMatrixContiguity is NContig: 53 | globalstatus += 'N' 54 | 55 | if YMatrixContiguity is CContig: 56 | globalstatus += 'C' 57 | if YMatrixContiguity is FContig: 58 | globalstatus += 'F' 59 | if YMatrixContiguity is NContig: 60 | globalstatus += 'N' 61 | 62 | cdef void aaa_func( 63 | double *x_data, Py_ssize_t *x_shape, Py_ssize_t *x_strides, XMatrixContiguity x_dummy, 64 | double *y_data, Py_ssize_t *y_shape, Py_ssize_t *y_strides, YMatrixContiguity y_dummy, 65 | double *o_data, Py_ssize_t *o_shape, Py_ssize_t *o_strides, OMatrixContiguity o_dummy) with gil: 66 | global globalstatus 67 | globalstatus = '' 68 | if XMatrixContiguity is CContig: 69 | globalstatus += 'C' 70 | if XMatrixContiguity is FContig: 71 | globalstatus += 'F' 72 | if XMatrixContiguity is NContig: 73 | globalstatus += 'N' 74 | 75 | if YMatrixContiguity is CContig: 76 | globalstatus += 'C' 77 | if YMatrixContiguity is FContig: 78 | globalstatus += 'F' 79 | if YMatrixContiguity is NContig: 80 | globalstatus += 'N' 81 | 82 | if OMatrixContiguity is CContig: 83 | globalstatus += 'C' 84 | if OMatrixContiguity is FContig: 85 | globalstatus += 'F' 86 | if OMatrixContiguity is NContig: 87 | globalstatus += 'N' 88 | 89 | v_ccontig = (np.array([1., 2.]), 'C') 90 | v_ncontig = (np.array([[1., 2.], [3., 4.]])[:, 1], 'N') 91 | 92 | m_ccontig = (np.array([[1., 2.], [3., 4.]]), 'C') 93 | m_fcontig = (np.array([[1., 2.], [3., 4.]], order='F'), 'F') 94 | m_ncontig = (np.array([[[1., 2.], [3., 4.]], 95 | [[5., 6.], [7., 8.]]])[:, :, 1], 'N') 96 | 97 | class TestDispatch(CeygenTestCase): 98 | 99 | def test_vs(self): 100 | cdef VSDispatcher[double] dispatcher 101 | cdef double[:] x 102 | for X in (v_ccontig, v_ncontig): 103 | x = X[0] 104 | dispatcher.run(&x[0], x.shape, x.strides, 0, 105 | as_func, as_func) 106 | self.assertEquals(globalstatus, X[1]) 107 | 108 | def test_vvs(self): 109 | cdef VVSDispatcher[double] dispatcher 110 | cdef double[:] x, y 111 | for X in (v_ccontig, v_ncontig): 112 | for Y in (v_ccontig, v_ncontig): 113 | x, y = X[0], Y[0] 114 | dispatcher.run(&x[0], x.shape, x.strides, &y[0], y.shape, y.strides, 0, 115 | aas_func, aas_func, aas_func, aas_func) 116 | self.assertEquals(globalstatus, X[1] + Y[1]) 117 | 118 | def test_vvv(self): 119 | cdef VVVDispatcher[double] dispatcher 120 | cdef double[:] x, y, z 121 | for X in (v_ccontig, v_ncontig): 122 | for Y in (v_ccontig, v_ncontig): 123 | for Z in (v_ccontig, v_ncontig): 124 | x, y, z = X[0], Y[0], Z[0] 125 | dispatcher.run(&x[0], x.shape, x.strides, &y[0], y.shape, y.strides, 126 | &z[0], z.shape, z.strides, aaa_func, aaa_func, aaa_func, aaa_func, 127 | aaa_func, aaa_func, aaa_func, aaa_func) 128 | self.assertEquals(globalstatus, X[1] + Y[1] + Z[1]) 129 | 130 | def test_ms(self): 131 | cdef MSDispatcher[double] dispatcher 132 | cdef double[:, :] x 133 | for X in (m_ccontig, m_fcontig, m_ncontig): 134 | x = X[0] 135 | dispatcher.run(&x[0, 0], x.shape, x.strides, 0, 136 | as_func, as_func, as_func) 137 | self.assertEquals(globalstatus, X[1]) 138 | 139 | def test_mv(self): 140 | cdef MVDispatcher[double] dispatcher 141 | cdef double[:, :] x 142 | cdef double[:] y 143 | for X in (m_ccontig, m_fcontig, m_ncontig): 144 | for Y in (v_ccontig, v_ncontig): 145 | x, y = X[0], Y[0] 146 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0], y.shape, y.strides, 147 | aa_func, aa_func, aa_func, aa_func, aa_func, aa_func) 148 | self.assertEquals(globalstatus, X[1] + Y[1]) 149 | 150 | def test_mms(self): 151 | cdef MMSDispatcher[double] dispatcher 152 | cdef double[:, :] x, y 153 | for X in (m_ccontig, m_fcontig, m_ncontig): 154 | for Y in (m_ccontig, m_fcontig, m_ncontig): 155 | x, y = X[0], Y[0] 156 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0, 0], y.shape, y.strides, 0, 157 | aas_func, aas_func, aas_func, aas_func, aas_func, aas_func, 158 | aas_func, aas_func, aas_func) 159 | self.assertEquals(globalstatus, X[1] + Y[1]) 160 | 161 | def test_mvv(self): 162 | cdef MVVDispatcher[double] dispatcher 163 | cdef double[:, :] x 164 | cdef double[:] y, z 165 | for X in (m_ccontig, m_fcontig, m_ncontig): 166 | for Y in (v_ccontig, v_ncontig): 167 | for Z in (v_ccontig, v_ncontig): 168 | x, y, z = X[0], Y[0], Z[0] 169 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0], y.shape, y.strides, 170 | &z[0], z.shape, z.strides, aaa_func, aaa_func, aaa_func, 171 | aaa_func, aaa_func, aaa_func, aaa_func, aaa_func, aaa_func, 172 | aaa_func, aaa_func, aaa_func) 173 | self.assertEquals(globalstatus, X[1] + Y[1] + Z[1]) 174 | 175 | def test_mmm(self): 176 | cdef MMMDispatcher[double] dispatcher 177 | cdef double[:, :] x, y, z 178 | for X in (m_ccontig, m_fcontig, m_ncontig): 179 | for Y in (m_ccontig, m_fcontig, m_ncontig): 180 | for Z in (m_ccontig, m_fcontig, m_ncontig): 181 | x, y, z = X[0], Y[0], Z[0] 182 | dispatcher.run(&x[0, 0], x.shape, x.strides, &y[0, 0], y.shape, y.strides, 183 | &z[0, 0], z.shape, z.strides, aaa_func, aaa_func, aaa_func, 184 | aaa_func, aaa_func, aaa_func, aaa_func, aaa_func, aaa_func, 185 | aaa_func, aaa_func, aaa_func, aaa_func, aaa_func, aaa_func, 186 | aaa_func, aaa_func, aaa_func, aaa_func, aaa_func, aaa_func, 187 | aaa_func, aaa_func, aaa_func, aaa_func, aaa_func, aaa_func) 188 | self.assertEquals(globalstatus, X[1] + Y[1] + Z[1]) 189 | -------------------------------------------------------------------------------- /ceygen/tests/test_dtype.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | cimport cython 4 | 5 | from support import CeygenTestCase 6 | cimport ceygen.dtype as dtype 7 | 8 | 9 | class TestDtype(CeygenTestCase): 10 | 11 | def test_vector(self): 12 | cdef char[:] c 13 | cdef short[:] h 14 | cdef int[:] i 15 | cdef long[:] l 16 | cdef float[:] f 17 | cdef double[:] d 18 | 19 | cdef int length 20 | for length in (1, 3, 16, 134): # TODO: 0 should be valid, but Cython doesn't like it 21 | c = dtype.vector(length, 0) 22 | h = dtype.vector(length, 0) 23 | i = dtype.vector(length, 0) 24 | l = dtype.vector(length, 0) 25 | f = dtype.vector(length, 0) 26 | d = dtype.vector(length, 0) 27 | self.assertEqual(c.shape[0], length) 28 | self.assertEqual(h.shape[0], length) 29 | self.assertEqual(i.shape[0], length) 30 | self.assertEqual(l.shape[0], length) 31 | self.assertEqual(f.shape[0], length) 32 | self.assertEqual(d.shape[0], length) 33 | 34 | def test_matrix(self): 35 | cdef char[:, :] c 36 | cdef short[:, :] h 37 | cdef int[:, :] i 38 | cdef long[:, :] l 39 | cdef float[:, :] f 40 | cdef double[:, :] d 41 | 42 | cdef int rows, cols 43 | for rows in (1, 3, 16, 134): 44 | for cols in (1, 2, 16, 209): 45 | c = dtype.matrix(rows, cols, 0) 46 | h = dtype.matrix(rows, cols, 0) 47 | i = dtype.matrix(rows, cols, 0) 48 | l = dtype.matrix(rows, cols, 0) 49 | f = dtype.matrix(rows, cols, 0) 50 | d = dtype.matrix(rows, cols, 0) 51 | self.assertEqual((c.shape[0], c.shape[1]), (rows, cols)) 52 | self.assertEqual((h.shape[0], h.shape[1]), (rows, cols)) 53 | self.assertEqual((i.shape[0], i.shape[1]), (rows, cols)) 54 | self.assertEqual((l.shape[0], l.shape[1]), (rows, cols)) 55 | self.assertEqual((f.shape[0], f.shape[1]), (rows, cols)) 56 | self.assertEqual((d.shape[0], d.shape[1]), (rows, cols)) 57 | -------------------------------------------------------------------------------- /ceygen/tests/test_elemwise.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from support import NoMallocTestCase 6 | cimport ceygen.elemwise as e 7 | 8 | 9 | class TestElemwise(NoMallocTestCase): 10 | 11 | def test_add_vs(self): 12 | x_np = np.array([1., 3., 5.]) 13 | y_np = -4. 14 | expected = np.array([-3., -1., 1.]) 15 | 16 | self.assertApproxEqual(e.add_vs[double](x_np, y_np), expected) 17 | out_np = np.empty(3) 18 | out2 = e.add_vs[double](x_np, y_np, out_np) 19 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 20 | self.assertApproxEqual(out2, expected) 21 | 22 | cdef double[:] x = x_np 23 | cdef double y = y_np 24 | self.assertApproxEqual(e.add_vs(x, y), expected) 25 | out_np[:] = -123. # reset so that we would catch errors 26 | cdef double[:] out = out_np 27 | out2 = e.add_vs(x, y, out) 28 | self.assertApproxEqual(out, expected) 29 | self.assertApproxEqual(out2, expected) 30 | 31 | def test_add_vs_baddims(self): 32 | x = np.array([1., 2., 3.]) 33 | y = 3. 34 | out = np.empty(3) 35 | 36 | for X in (x, np.array([1., 2.])): 37 | for OUT in (out, np.empty(1), np.empty(4)): 38 | if X is x and OUT is out: 39 | e.add_vs[double](X, y, OUT) # this should be valid 40 | continue 41 | with self.assertRaises(ValueError): 42 | e.add_vs[double](X, y, OUT) 43 | 44 | def test_add_vs_none(self): 45 | with self.assertRaises(ValueError): 46 | e.add_vs[double](None, 3.) 47 | 48 | 49 | def test_multiply_vs(self): 50 | x_np = np.array([1., 3., -5.]) 51 | y_np = -4. 52 | expected = np.array([-4., -12., 20.]) 53 | 54 | self.assertApproxEqual(e.multiply_vs[double](x_np, y_np), expected) 55 | out_np = np.empty(3) 56 | out2 = e.multiply_vs[double](x_np, y_np, out_np) 57 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 58 | self.assertApproxEqual(out2, expected) 59 | 60 | cdef double[:] x = x_np 61 | cdef double y = y_np 62 | self.assertApproxEqual(e.multiply_vs(x, y), expected) 63 | out_np[:] = -123. # reset so that we would catch errors 64 | cdef double[:] out = out_np 65 | out2 = e.multiply_vs(x, y, out) 66 | self.assertApproxEqual(out, expected) 67 | self.assertApproxEqual(out2, expected) 68 | 69 | def test_multiply_vs_baddims(self): 70 | x = np.array([1., 2., 3.]) 71 | y = 3. 72 | out = np.empty(3) 73 | 74 | for X in (x, np.array([1., 2.])): 75 | for OUT in (out, np.empty(1), np.empty(4)): 76 | if X is x and OUT is out: 77 | e.multiply_vs[double](X, y, OUT) # this should be valid 78 | continue 79 | with self.assertRaises(ValueError): 80 | e.multiply_vs[double](X, y, OUT) 81 | 82 | def test_multiply_vs_none(self): 83 | with self.assertRaises(ValueError): 84 | e.multiply_vs[double](None, 3.) 85 | 86 | 87 | def test_power_vs(self): 88 | x_np = np.array([1., 3., -5.]) 89 | y_np = 2. 90 | expected = np.array([1., 9., 25.]) 91 | 92 | self.assertApproxEqual(e.power_vs[double](x_np, y_np), expected) 93 | out_np = np.empty(3) 94 | out2 = e.power_vs[double](x_np, y_np, out_np) 95 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 96 | self.assertApproxEqual(out2, expected) 97 | 98 | cdef double[:] x = x_np 99 | cdef double y = y_np 100 | self.assertApproxEqual(e.power_vs(x, y), expected) 101 | out_np[:] = -123. # reset so that we would catch errors 102 | cdef double[:] out = out_np 103 | out2 = e.power_vs(x, y, out) 104 | self.assertApproxEqual(out, expected) 105 | self.assertApproxEqual(out2, expected) 106 | 107 | def test_power_vs_baddims(self): 108 | x = np.array([1., 2., 3.]) 109 | y = 3. 110 | out = np.empty(3) 111 | 112 | for X in (x, np.array([1., 2.])): 113 | for OUT in (out, np.empty(1), np.empty(4)): 114 | if X is x and OUT is out: 115 | e.power_vs[double](X, y, OUT) # this should be valid 116 | continue 117 | with self.assertRaises(ValueError): 118 | e.power_vs[double](X, y, OUT) 119 | 120 | def test_power_vs_none(self): 121 | with self.assertRaises(ValueError): 122 | e.power_vs[double](None, 3.) 123 | 124 | 125 | def test_add_vv(self): 126 | x_np = np.array([1., 3., 5.]) 127 | y_np = np.array([3., 2., 1.]) 128 | expected_xy, expected_yx = np.array([4., 5., 6.]), np.array([4., 5., 6.]) 129 | 130 | self.assertApproxEqual(e.add_vv[double](x_np, y_np), expected_xy) 131 | self.assertApproxEqual(e.add_vv[double](y_np, x_np), expected_yx) 132 | out_np = np.empty(3) 133 | out2 = e.add_vv[double](x_np, y_np, out_np) 134 | self.assertApproxEqual(out_np, expected_xy) # test that it actually uses out 135 | self.assertApproxEqual(out2, expected_xy) 136 | 137 | cdef double[:] x = x_np 138 | cdef double[:] y = y_np 139 | self.assertApproxEqual(e.add_vv(x, y), expected_xy) 140 | self.assertApproxEqual(e.add_vv(y, x), expected_yx) 141 | cdef double[:] out = out_np 142 | out2 = e.add_vv(y, x, out) 143 | self.assertApproxEqual(out, expected_yx) 144 | self.assertApproxEqual(out2, expected_yx) 145 | 146 | def test_add_vv_baddims(self): 147 | x = np.array([1., 2., 3.]) 148 | y = np.array([3., 2., 1.]) 149 | out = np.empty(3) 150 | 151 | for X in (x, np.array([1., 2.])): 152 | for Y in (y, np.array([1., 2., 3., 4.])): 153 | for OUT in (out, np.empty(2), np.empty(4)): 154 | if X is x and Y is y and OUT is out: 155 | e.add_vv[double](X, Y, OUT) # this should be valid 156 | continue 157 | with self.assertRaises(ValueError): 158 | e.add_vv[double](X, Y, OUT) 159 | 160 | def test_add_vv_none(self): 161 | x = np.array([1., 2., 3.]) 162 | y = np.array([3., 2., 1.]) 163 | 164 | for X in (x, None): 165 | for Y in (y, None): 166 | if X is x and Y is y: 167 | e.add_vv[double](X, Y) # this should be valid 168 | continue 169 | with self.assertRaises(ValueError): 170 | e.add_vv[double](X, Y) 171 | 172 | 173 | def test_subtract_vv(self): 174 | x_np = np.array([1., 2., 3.]) 175 | y_np = np.array([3., 2., 1.]) 176 | expected_xy, expected_yx = np.array([-2., 0., 2.]), np.array([2., 0., -2.]) 177 | 178 | self.assertApproxEqual(e.subtract_vv[double](x_np, y_np), expected_xy) 179 | self.assertApproxEqual(e.subtract_vv[double](y_np, x_np), expected_yx) 180 | out_np = np.empty(3) 181 | out2 = e.subtract_vv[double](x_np, y_np, out_np) 182 | self.assertApproxEqual(out_np, expected_xy) # test that it actually uses out 183 | self.assertApproxEqual(out2, expected_xy) 184 | 185 | cdef double[:] x = x_np 186 | cdef double[:] y = y_np 187 | self.assertApproxEqual(e.subtract_vv(x, y), expected_xy) 188 | self.assertApproxEqual(e.subtract_vv(y, x), expected_yx) 189 | cdef double[:] out = out_np 190 | out2 = e.subtract_vv(y, x, out) 191 | self.assertApproxEqual(out, expected_yx) 192 | self.assertApproxEqual(out2, expected_yx) 193 | 194 | def test_subtract_vv_baddims(self): 195 | x = np.array([1., 2., 3.]) 196 | y = np.array([3., 2., 1.]) 197 | out = np.empty(3) 198 | 199 | for X in (x, np.array([1., 2.])): 200 | for Y in (y, np.array([1., 2., 3., 4.])): 201 | for OUT in (out, np.empty(2), np.empty(4)): 202 | if X is x and Y is y and OUT is out: 203 | e.subtract_vv[double](X, Y, OUT) # this should be valid 204 | continue 205 | with self.assertRaises(ValueError): 206 | e.subtract_vv[double](X, Y, OUT) 207 | 208 | def test_subtract_vv_none(self): 209 | x = np.array([1., 2., 3.]) 210 | y = np.array([3., 2., 1.]) 211 | 212 | for X in (x, None): 213 | for Y in (y, None): 214 | if X is x and Y is y: 215 | e.subtract_vv[double](X, Y) # this should be valid 216 | continue 217 | with self.assertRaises(ValueError): 218 | e.subtract_vv[double](X, Y) 219 | 220 | 221 | def test_multiply_vv(self): 222 | x_np = np.array([1., 2., 3.]) 223 | y_np = np.array([3., 2., 1.]) 224 | expected_xy, expected_yx = np.array([3., 4., 3.]), np.array([3., 4., 3.]) 225 | 226 | self.assertApproxEqual(e.multiply_vv[double](x_np, y_np), expected_xy) 227 | self.assertApproxEqual(e.multiply_vv[double](y_np, x_np), expected_yx) 228 | out_np = np.empty(3) 229 | out2 = e.multiply_vv[double](x_np, y_np, out_np) 230 | self.assertApproxEqual(out_np, expected_xy) # test that it actually uses out 231 | self.assertApproxEqual(out2, expected_xy) 232 | 233 | cdef double[:] x = x_np 234 | cdef double[:] y = y_np 235 | self.assertApproxEqual(e.multiply_vv(x, y), expected_xy) 236 | self.assertApproxEqual(e.multiply_vv(y, x), expected_yx) 237 | cdef double[:] out = out_np 238 | out2 = e.multiply_vv(y, x, out) 239 | self.assertApproxEqual(out, expected_yx) 240 | self.assertApproxEqual(out2, expected_yx) 241 | 242 | def test_multiply_vv_baddims(self): 243 | x = np.array([1., 2., 3.]) 244 | y = np.array([3., 2., 1.]) 245 | out = np.empty(3) 246 | 247 | for X in (x, np.array([1., 2.])): 248 | for Y in (y, np.array([1., 2., 3., 4.])): 249 | for OUT in (out, np.empty(2), np.empty(4)): 250 | if X is x and Y is y and OUT is out: 251 | e.multiply_vv[double](X, Y, OUT) # this should be valid 252 | continue 253 | with self.assertRaises(ValueError): 254 | e.multiply_vv[double](X, Y, OUT) 255 | 256 | def test_multiply_vv_none(self): 257 | x = np.array([1., 2., 3.]) 258 | y = np.array([3., 2., 1.]) 259 | 260 | for X in (x, None): 261 | for Y in (y, None): 262 | if X is x and Y is y: 263 | e.multiply_vv[double](X, Y) # this should be valid 264 | continue 265 | with self.assertRaises(ValueError): 266 | e.multiply_vv[double](X, Y) 267 | 268 | 269 | def test_divide_vv(self): 270 | x_np = np.array([1., 2., 3.]) 271 | y_np = np.array([3., 2., 1.]) 272 | expected_xy, expected_yx = np.array([1./3., 1., 3.]), np.array([3., 1., 1./3.]) 273 | 274 | self.assertApproxEqual(e.divide_vv[double](x_np, y_np), expected_xy) 275 | self.assertApproxEqual(e.divide_vv[double](y_np, x_np), expected_yx) 276 | out_np = np.empty(3) 277 | out2 = e.divide_vv[double](x_np, y_np, out_np) 278 | self.assertApproxEqual(out_np, expected_xy) # test that it actually uses out 279 | self.assertApproxEqual(out2, expected_xy) 280 | 281 | cdef double[:] x = x_np 282 | cdef double[:] y = y_np 283 | self.assertApproxEqual(e.divide_vv(x, y), expected_xy) 284 | self.assertApproxEqual(e.divide_vv(y, x), expected_yx) 285 | cdef double[:] out = out_np 286 | out2 = e.divide_vv(y, x, out) 287 | self.assertApproxEqual(out, expected_yx) 288 | self.assertApproxEqual(out2, expected_yx) 289 | 290 | def test_divide_vv_baddims(self): 291 | x = np.array([1., 2., 3.]) 292 | y = np.array([3., 2., 1.]) 293 | out = np.empty(3) 294 | 295 | for X in (x, np.array([1., 2.])): 296 | for Y in (y, np.array([1., 2., 3., 4.])): 297 | for OUT in (out, np.empty(2), np.empty(4)): 298 | if X is x and Y is y and OUT is out: 299 | e.divide_vv[double](X, Y, OUT) # this should be valid 300 | continue 301 | with self.assertRaises(ValueError): 302 | e.divide_vv[double](X, Y, OUT) 303 | 304 | def test_divide_vv_none(self): 305 | x = np.array([1., 2., 3.]) 306 | y = np.array([3., 2., 1.]) 307 | 308 | for X in (x, None): 309 | for Y in (y, None): 310 | if X is x and Y is y: 311 | e.divide_vv[double](X, Y) # this should be valid 312 | continue 313 | with self.assertRaises(ValueError): 314 | e.divide_vv[double](X, Y) 315 | 316 | 317 | def test_add_ms(self): 318 | x_np = np.array([[1., 3., 5.]]) 319 | y_np = -4. 320 | expected = np.array([[-3., -1., 1.]]) 321 | 322 | self.assertApproxEqual(e.add_ms[double](x_np, y_np), expected) 323 | self.assertApproxEqual(e.add_ms[double](x_np.T, y_np), expected.T) 324 | out_np = np.empty((1, 3)) 325 | out2 = e.add_ms[double](x_np, y_np, out_np) 326 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 327 | self.assertApproxEqual(out2, expected) 328 | 329 | cdef double[:, :] x = x_np 330 | cdef double y = y_np 331 | self.assertApproxEqual(e.add_ms(x, y), expected) 332 | out_np[:, :] = -123. # reset so that we would catch errors 333 | cdef double[:, :] out = out_np 334 | out2 = e.add_ms(x, y, out) 335 | self.assertApproxEqual(out, expected) 336 | self.assertApproxEqual(out2, expected) 337 | 338 | def test_add_ms_baddims(self): 339 | x = np.array([[1., 2., 3.]]) 340 | y = 3. 341 | out = np.empty((1, 3)) 342 | 343 | for X in (x, np.array([[1., 2.]]), np.array([1., 2.])): 344 | for OUT in (out, np.empty((1, 1)), np.empty((1, 4)), np.empty(3)): 345 | if X is x and OUT is out: 346 | e.add_ms[double](X, y, OUT) # this should be valid 347 | continue 348 | with self.assertRaises(ValueError): 349 | e.add_ms[double](X, y, OUT) 350 | 351 | def test_add_ms_none(self): 352 | with self.assertRaises(ValueError): 353 | e.add_ms[double](None, 3.) 354 | 355 | 356 | def test_multiply_ms(self): 357 | x_np = np.array([[1., 3., -5.]]) 358 | y_np = -4. 359 | expected = np.array([[-4., -12., 20.]]) 360 | 361 | self.assertApproxEqual(e.multiply_ms[double](x_np, y_np), expected) 362 | self.assertApproxEqual(e.multiply_ms[double](x_np.T, y_np), expected.T) 363 | out_np = np.empty((1, 3)) 364 | out2 = e.multiply_ms[double](x_np, y_np, out_np) 365 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 366 | self.assertApproxEqual(out2, expected) 367 | 368 | cdef double[:, :] x = x_np 369 | cdef double y = y_np 370 | self.assertApproxEqual(e.multiply_ms(x, y), expected) 371 | out_np[:, :] = -123. # reset so that we would catch errors 372 | cdef double[:, :] out = out_np 373 | out2 = e.multiply_ms(x, y, out) 374 | self.assertApproxEqual(out, expected) 375 | self.assertApproxEqual(out2, expected) 376 | 377 | def test_multiply_ms_baddims(self): 378 | x = np.array([[1., 2., 3.]]) 379 | y = 3. 380 | out = np.empty((1, 3)) 381 | 382 | for X in (x, np.array([[1., 2.]]), np.array([1., 2.])): 383 | for OUT in (out, np.empty((1, 1)), np.empty((1, 4)), np.empty(3)): 384 | if X is x and OUT is out: 385 | e.multiply_ms[double](X, y, OUT) # this should be valid 386 | continue 387 | with self.assertRaises(ValueError): 388 | e.multiply_ms[double](X, y, OUT) 389 | 390 | def test_multiply_ms_none(self): 391 | with self.assertRaises(ValueError): 392 | e.multiply_ms[double](None, 3.) 393 | 394 | 395 | def test_power_ms(self): 396 | x_np = np.array([[1., 3., -5.]]) 397 | y_np = 2. 398 | expected = np.array([[1., 9., 25.]]) 399 | 400 | self.assertApproxEqual(e.power_ms[double](x_np, y_np), expected) 401 | self.assertApproxEqual(e.power_ms[double](x_np.T, y_np), expected.T) 402 | out_np = np.empty((1, 3)) 403 | out2 = e.power_ms[double](x_np, y_np, out_np) 404 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 405 | self.assertApproxEqual(out2, expected) 406 | 407 | cdef double[:, :] x = x_np 408 | cdef double y = y_np 409 | self.assertApproxEqual(e.power_ms(x, y), expected) 410 | out_np[:, :] = -123. # reset so that we would catch errors 411 | cdef double[:, :] out = out_np 412 | out2 = e.power_ms(x, y, out) 413 | self.assertApproxEqual(out, expected) 414 | self.assertApproxEqual(out2, expected) 415 | 416 | def test_power_ms_baddims(self): 417 | x = np.array([[1., 2., 3.]]) 418 | y = 3. 419 | out = np.empty((1, 3)) 420 | 421 | for X in (x, np.array([[1., 2.]]), np.array([1., 2.])): 422 | for OUT in (out, np.empty((1, 1)), np.empty((1, 4)), np.empty(3)): 423 | if X is x and OUT is out: 424 | e.power_ms[double](X, y, OUT) # this should be valid 425 | continue 426 | with self.assertRaises(ValueError): 427 | e.power_ms[double](X, y, OUT) 428 | 429 | def test_power_ms_none(self): 430 | with self.assertRaises(ValueError): 431 | e.power_ms[double](None, 3.) 432 | 433 | 434 | def test_add_mm(self): 435 | x_np = np.array([[1., 3., 5.]]) 436 | y_np = np.array([[3., 2., 1.]]) 437 | expected_xy, expected_yx = np.array([[4., 5., 6.]]), np.array([[4., 5., 6.]]) 438 | 439 | self.assertApproxEqual(e.add_mm[double](x_np, y_np), expected_xy) 440 | self.assertApproxEqual(e.add_mm[double](y_np, x_np), expected_yx) 441 | self.assertApproxEqual(e.add_mm[double](x_np.T, y_np.T), expected_xy.T) 442 | self.assertApproxEqual(e.add_mm[double](y_np.T, x_np.T), expected_yx.T) 443 | out_np = np.empty((1, 3)) 444 | out2 = e.add_mm[double](x_np, y_np, out_np) 445 | self.assertApproxEqual(out_np, expected_xy) # test that it actually uses out 446 | self.assertApproxEqual(out2, expected_xy) 447 | 448 | cdef double[:, :] x = x_np 449 | cdef double[:, :] y = y_np 450 | self.assertApproxEqual(e.add_mm(x, y), expected_xy) 451 | self.assertApproxEqual(e.add_mm(y, x), expected_yx) 452 | self.assertApproxEqual(e.add_mm(x.T, y.T), expected_xy.T) 453 | self.assertApproxEqual(e.add_mm(y.T, x.T), expected_yx.T) 454 | cdef double[:, :] out = out_np 455 | out2 = e.add_mm(y, x, out) 456 | self.assertApproxEqual(out, expected_yx) 457 | self.assertApproxEqual(out2, expected_yx) 458 | 459 | def test_add_mm_baddims(self): 460 | x = np.array([[1., 2., 3.]]) 461 | y = np.array([[3., 2., 1.]]) 462 | out = np.empty((1, 3)) 463 | 464 | for X in (x, np.array([[1., 2.]]), np.array([[1.], [2.], [3.]])): 465 | for Y in (y, np.array([[1., 2., 3., 4.]]), np.array([[1.], [2.], [3.], [4.]])): 466 | for OUT in (out, np.empty((1, 2)), np.empty((3, 1)), np.empty((1, 4))): 467 | if X is x and Y is y and OUT is out: 468 | e.add_mm[double](X, Y, OUT) # this should be valid 469 | continue 470 | with self.assertRaises(ValueError): 471 | e.add_mm[double](X, Y, OUT) 472 | 473 | def test_add_mm_none(self): 474 | x = np.array([[1., 2., 3.]]) 475 | y = np.array([[3., 2., 1.]]) 476 | 477 | for X in (x, None): 478 | for Y in (y, None): 479 | if X is x and Y is y: 480 | e.add_mm[double](X, Y) # this should be valid 481 | continue 482 | with self.assertRaises(ValueError): 483 | e.add_mm[double](X, Y) 484 | 485 | 486 | def test_subtract_mm(self): 487 | x_np = np.array([[1., 2., 3.]]) 488 | y_np = np.array([[3., 2., 1.]]) 489 | expected_xy, expected_yx = np.array([[-2., 0., 2.]]), np.array([[2., 0., -2.]]) 490 | 491 | self.assertApproxEqual(e.subtract_mm[double](x_np, y_np), expected_xy) 492 | self.assertApproxEqual(e.subtract_mm[double](y_np, x_np), expected_yx) 493 | self.assertApproxEqual(e.subtract_mm[double](x_np.T, y_np.T), expected_xy.T) 494 | self.assertApproxEqual(e.subtract_mm[double](y_np.T, x_np.T), expected_yx.T) 495 | out_np = np.empty((1, 3)) 496 | out2 = e.subtract_mm[double](x_np, y_np, out_np) 497 | self.assertApproxEqual(out_np, expected_xy) # test that it actually uses out 498 | self.assertApproxEqual(out2, expected_xy) 499 | 500 | cdef double[:, :] x = x_np 501 | cdef double[:, :] y = y_np 502 | self.assertApproxEqual(e.subtract_mm(x, y), expected_xy) 503 | self.assertApproxEqual(e.subtract_mm(y, x), expected_yx) 504 | self.assertApproxEqual(e.subtract_mm(x.T, y.T), expected_xy.T) 505 | self.assertApproxEqual(e.subtract_mm(y.T, x.T), expected_yx.T) 506 | cdef double[:, :] out = out_np 507 | out2 = e.subtract_mm(y, x, out) 508 | self.assertApproxEqual(out, expected_yx) 509 | self.assertApproxEqual(out2, expected_yx) 510 | 511 | def test_subtract_mm_baddims(self): 512 | x = np.array([[1., 2., 3.]]) 513 | y = np.array([[3., 2., 1.]]) 514 | out = np.empty((1, 3)) 515 | 516 | for X in (x, np.array([[1., 2.]]), np.array([[1.], [2.], [3.]])): 517 | for Y in (y, np.array([[1., 2., 3., 4.]]), np.array([[1.], [2.], [3.], [4.]])): 518 | for OUT in (out, np.empty((1, 2)), np.empty((3, 1)), np.empty((1, 4))): 519 | if X is x and Y is y and OUT is out: 520 | e.subtract_mm[double](X, Y, OUT) # this should be valid 521 | continue 522 | with self.assertRaises(ValueError): 523 | e.subtract_mm[double](X, Y, OUT) 524 | 525 | def test_subtract_mm_none(self): 526 | x = np.array([[1., 2., 3.]]) 527 | y = np.array([[3., 2., 1.]]) 528 | 529 | for X in (x, None): 530 | for Y in (y, None): 531 | if X is x and Y is y: 532 | e.subtract_mm[double](X, Y) # this should be valid 533 | continue 534 | with self.assertRaises(ValueError): 535 | e.subtract_mm[double](X, Y) 536 | 537 | 538 | def test_multiply_mm(self): 539 | x_np = np.array([[1., 2., 3.]]) 540 | y_np = np.array([[3., 2., 1.]]) 541 | expected_xy, expected_yx = np.array([[3., 4., 3.]]), np.array([[3., 4., 3.]]) 542 | 543 | self.assertApproxEqual(e.multiply_mm[double](x_np, y_np), expected_xy) 544 | self.assertApproxEqual(e.multiply_mm[double](y_np, x_np), expected_yx) 545 | self.assertApproxEqual(e.multiply_mm[double](x_np.T, y_np.T), expected_xy.T) 546 | self.assertApproxEqual(e.multiply_mm[double](y_np.T, x_np.T), expected_yx.T) 547 | out_np = np.empty((1, 3)) 548 | out2 = e.multiply_mm[double](x_np, y_np, out_np) 549 | self.assertApproxEqual(out_np, expected_xy) # test that it actually uses out 550 | self.assertApproxEqual(out2, expected_xy) 551 | 552 | cdef double[:, :] x = x_np 553 | cdef double[:, :] y = y_np 554 | self.assertApproxEqual(e.multiply_mm(x, y), expected_xy) 555 | self.assertApproxEqual(e.multiply_mm(y, x), expected_yx) 556 | self.assertApproxEqual(e.multiply_mm(x.T, y.T), expected_xy.T) 557 | self.assertApproxEqual(e.multiply_mm(y.T, x.T), expected_yx.T) 558 | cdef double[:, :] out = out_np 559 | out2 = e.multiply_mm(y, x, out) 560 | self.assertApproxEqual(out, expected_yx) 561 | self.assertApproxEqual(out2, expected_yx) 562 | 563 | def test_multiply_mm_baddims(self): 564 | x = np.array([[1., 2., 3.]]) 565 | y = np.array([[3., 2., 1.]]) 566 | out = np.empty((1, 3)) 567 | 568 | for X in (x, np.array([[1., 2.]]), np.array([[1.], [2.], [3.]])): 569 | for Y in (y, np.array([[1., 2., 3., 4.]]), np.array([[1.], [2.], [3.], [4.]])): 570 | for OUT in (out, np.empty((1, 2)), np.empty((3, 1)), np.empty((1, 4))): 571 | if X is x and Y is y and OUT is out: 572 | e.multiply_mm[double](X, Y, OUT) # this should be valid 573 | continue 574 | with self.assertRaises(ValueError): 575 | e.multiply_mm[double](X, Y, OUT) 576 | 577 | def test_multiply_mm_none(self): 578 | x = np.array([[1., 2., 3.]]) 579 | y = np.array([[3., 2., 1.]]) 580 | 581 | for X in (x, None): 582 | for Y in (y, None): 583 | if X is x and Y is y: 584 | e.multiply_mm[double](X, Y) # this should be valid 585 | continue 586 | with self.assertRaises(ValueError): 587 | e.multiply_mm[double](X, Y) 588 | 589 | 590 | def test_divide_mm(self): 591 | x_np = np.array([[1., 2., 3.]]) 592 | y_np = np.array([[3., 2., 1.]]) 593 | expected_xy, expected_yx = np.array([[1./3., 1., 3.]]), np.array([[3., 1., 1./3.]]) 594 | 595 | self.assertApproxEqual(e.divide_mm[double](x_np, y_np), expected_xy) 596 | self.assertApproxEqual(e.divide_mm[double](y_np, x_np), expected_yx) 597 | self.assertApproxEqual(e.divide_mm[double](x_np.T, y_np.T), expected_xy.T) 598 | self.assertApproxEqual(e.divide_mm[double](y_np.T, x_np.T), expected_yx.T) 599 | out_np = np.empty((1, 3)) 600 | out2 = e.divide_mm[double](x_np, y_np, out_np) 601 | self.assertApproxEqual(out_np, expected_xy) # test that it actually uses out 602 | self.assertApproxEqual(out2, expected_xy) 603 | 604 | cdef double[:, :] x = x_np 605 | cdef double[:, :] y = y_np 606 | self.assertApproxEqual(e.divide_mm(x, y), expected_xy) 607 | self.assertApproxEqual(e.divide_mm(y, x), expected_yx) 608 | self.assertApproxEqual(e.divide_mm(x.T, y.T), expected_xy.T) 609 | self.assertApproxEqual(e.divide_mm(y.T, x.T), expected_yx.T) 610 | cdef double[:, :] out = out_np 611 | out2 = e.divide_mm(y, x, out) 612 | self.assertApproxEqual(out, expected_yx) 613 | self.assertApproxEqual(out2, expected_yx) 614 | 615 | def test_divide_mm_baddims(self): 616 | x = np.array([[1., 2., 3.]]) 617 | y = np.array([[3., 2., 1.]]) 618 | out = np.empty((1, 3)) 619 | 620 | for X in (x, np.array([[1., 2.]]), np.array([[1.], [2.], [3.]])): 621 | for Y in (y, np.array([[1., 2., 3., 4.]]), np.array([[1.], [2.], [3.], [4.]])): 622 | for OUT in (out, np.empty((1, 2)), np.empty((3, 1)), np.empty((1, 4))): 623 | if X is x and Y is y and OUT is out: 624 | e.divide_mm[double](X, Y, OUT) # this should be valid 625 | continue 626 | with self.assertRaises(ValueError): 627 | e.divide_mm[double](X, Y, OUT) 628 | 629 | def test_divide_mm_none(self): 630 | x = np.array([[1., 2., 3.]]) 631 | y = np.array([[3., 2., 1.]]) 632 | 633 | for X in (x, None): 634 | for Y in (y, None): 635 | if X is x and Y is y: 636 | e.divide_mm[double](X, Y) # this should be valid 637 | continue 638 | with self.assertRaises(ValueError): 639 | e.divide_mm[double](X, Y) 640 | -------------------------------------------------------------------------------- /ceygen/tests/test_llt.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from support import CeygenTestCase, skipIfEigenOlderThan 6 | cimport ceygen.llt as llt 7 | 8 | 9 | class TestLlt(CeygenTestCase): 10 | 11 | def test_cholesky(self): 12 | x_np = np.array([[1., 0., 0.], [0., 4., 0.], [0., 0., 25.]]) 13 | expected = np.array([[1., 0., 0.], [0., 2., 0.], [0., 0., 5.]]) 14 | 15 | self.assertApproxEqual(llt.cholesky[double](x_np), expected) 16 | out_np = np.empty((3, 3)) 17 | out2 = llt.cholesky[double](x_np, out_np) 18 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 19 | self.assertApproxEqual(out2, expected) 20 | 21 | cdef double[:, :] x = x_np 22 | self.assertApproxEqual(llt.cholesky(x), expected) 23 | cdef double[:, :] out = np.empty((3, 3)) 24 | out2 = llt.cholesky(x, out) 25 | self.assertApproxEqual(out, expected) 26 | self.assertApproxEqual(out2, expected) 27 | 28 | @skipIfEigenOlderThan(3, 1, 0) 29 | def test_cholesky_badinput(self): 30 | x = 0.5 * np.eye(2) 31 | out = np.zeros((2, 2)) 32 | for X in (x, None, np.array([1.]), np.array([[1.], [2.]])): 33 | for OUT in (out, None, np.empty(2), np.empty((3, 2))): 34 | if X is x and (OUT is out or OUT is None): 35 | llt.cholesky[double](X, OUT) # this should be valid 36 | else: 37 | with self.assertRaises(ValueError): 38 | llt.cholesky[double](X, OUT) # this should be valid 39 | -------------------------------------------------------------------------------- /ceygen/tests/test_lu.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from support import CeygenTestCase, skipIfEigenOlderThan 6 | cimport ceygen.lu as l 7 | 8 | 9 | class TestLu(CeygenTestCase): 10 | 11 | def test_inv(self): 12 | x_np = np.array([[1., -1.], [2., -1.]]) 13 | expected = np.array([[-1., 1.], [-2., 1.]]) 14 | 15 | self.assertApproxEqual(l.inv[double](x_np), expected) 16 | out_np = np.empty((2, 2)) 17 | out2 = l.inv[double](x_np, out_np) 18 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 19 | self.assertApproxEqual(out2, expected) 20 | 21 | cdef double[:, :] x = x_np 22 | self.assertApproxEqual(l.inv(x), expected) 23 | cdef double[:, :] out = np.empty((2, 2)) 24 | out2 = l.inv(x, out) 25 | self.assertApproxEqual(out, expected) 26 | self.assertApproxEqual(out2, expected) 27 | 28 | def test_inv_badinput(self): 29 | x = 0.5 * np.eye(2) 30 | out = np.zeros((2, 2)) 31 | for X in (x, None, np.array([1.]), np.array([[1.], [2.]])): 32 | for OUT in (out, None, np.empty(2), np.empty((3, 2))): 33 | if X is x and (OUT is out or OUT is None): 34 | l.inv[double](X, OUT) # this should be valid 35 | else: 36 | with self.assertRaises(ValueError): 37 | l.inv[double](X, OUT) # this should be valid 38 | 39 | def test_iinv(self): 40 | for size in (1, 2, 3, 5, 9, 12, 15, 31): 41 | x = np.random.rand(size, size) 42 | x_copy = x.copy() 43 | l.iinv[double](x) 44 | self.assertApproxEqual(x, np.linalg.inv(x_copy)) 45 | 46 | def test_iinv_badinput(self): 47 | # l.iing(None) doesn't fail, shouldn't matter 48 | for X in(np.array([1., 2.]), np.array([[1.], [2.]])): 49 | with self.assertRaises(ValueError): 50 | l.iinv[double](X) 51 | 52 | def test_det(self): 53 | self.assertApproxEqual(l.det[double](np.array([[1., 2.], [3., 4.]])), -2.) 54 | self.assertApproxEqual(l.det[double](np.array([[1., 2.], [2., 4.]])), 0.) 55 | self.assertApproxEqual(l.det[double](np.array([[17.]])), 17.) 56 | 57 | @skipIfEigenOlderThan(3, 1, 90) 58 | def test_det_badinput(self): 59 | for X in(np.array([1.]), np.array([[1., 2.]])): 60 | with self.assertRaises(ValueError): 61 | l.det[double](X) 62 | -------------------------------------------------------------------------------- /ceygen/tests/test_reductions.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from support import NoMallocTestCase 6 | cimport ceygen.reductions as r 7 | 8 | 9 | class TestReductions(NoMallocTestCase): 10 | 11 | def test_sum_v(self): 12 | x_np = np.array([1., 3., 5.]) 13 | expected = 9. 14 | 15 | self.assertApproxEqual(r.sum_v[double](x_np), expected) 16 | cdef double[:] x = x_np 17 | self.assertApproxEqual(r.sum_v(x), expected) 18 | # sum of None (which can be interpreted as an empty vector) is defined to be 0.0 19 | self.assertApproxEqual(r.sum_v[double](None), 0.) 20 | 21 | def test_sum_v_badargs(self): 22 | with self.assertRaises(ValueError): 23 | r.sum_v[double](np.array([[1.]])) 24 | with self.assertRaises(TypeError): 25 | l = [1, 2, 3] 26 | r.sum_v[double](l) 27 | 28 | def test_sum_m(self): 29 | x_np = np.array([[1., 3., 5.]]) 30 | expected = 9. 31 | 32 | self.assertApproxEqual(r.sum_m[double](x_np), expected) 33 | cdef double[:, :] x = x_np 34 | self.assertApproxEqual(r.sum_m(x), expected) 35 | # sum of None (which can be interpreted as an empty matrix) is defined to be 0.0 36 | self.assertApproxEqual(r.sum_m[double](None), 0.) 37 | 38 | def test_sum_m_badargs(self): 39 | with self.assertRaises(ValueError): 40 | r.sum_m[double](np.array([1.])) 41 | with self.assertRaises(TypeError): 42 | l = [[1, 2, 3]] 43 | r.sum_m[double](l) 44 | 45 | def test_rowwise_sum(self): 46 | x_np = np.array([[2., 4., 6.], 47 | [1., 3., 5.]]) 48 | expected, expected_t = [12., 9.], [3., 7., 11.] 49 | 50 | self.assertApproxEqual(r.rowwise_sum[double](x_np), expected) 51 | self.assertApproxEqual(r.rowwise_sum[double](x_np.T), expected_t) 52 | out_np = np.empty(2) 53 | out2 = r.rowwise_sum[double](x_np, out_np) 54 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 55 | self.assertApproxEqual(out2, expected) 56 | 57 | cdef double[:, :] x = x_np 58 | self.assertApproxEqual(r.rowwise_sum(x), expected) 59 | out_np[:] = -123. # reset so that we would catch errors 60 | cdef double[:] out = out_np 61 | out2 = r.rowwise_sum(x, out) 62 | self.assertApproxEqual(out, expected) 63 | self.assertApproxEqual(out2, expected) 64 | 65 | def test_rowwise_sum_badargs(self): 66 | x = np.array([[1., 2., 3.]]) 67 | out = np.empty(1) 68 | 69 | for X in (x, np.array([1., 2.]), np.array([[1.], [2.]]), None): 70 | for OUT in (out, np.array([[1.]])): 71 | if X is x and OUT is out: 72 | r.rowwise_sum[double](X, OUT) # this should be valid 73 | continue 74 | with self.assertRaises(ValueError): 75 | r.rowwise_sum[double](X, OUT) 76 | 77 | def test_colwise_sum(self): 78 | x_np = np.array([[2., 4., 6.], 79 | [1., 3., 5.]]) 80 | expected, expected_t = [3., 7., 11.], [12., 9.] 81 | 82 | self.assertApproxEqual(r.colwise_sum[double](x_np), expected) 83 | self.assertApproxEqual(r.colwise_sum[double](x_np.T), expected_t) 84 | out_np = np.empty(3) 85 | out2 = r.colwise_sum[double](x_np, out_np) 86 | self.assertApproxEqual(out_np, expected) # test that it actually uses out 87 | self.assertApproxEqual(out2, expected) 88 | 89 | cdef double[:, :] x = x_np 90 | self.assertApproxEqual(r.colwise_sum(x), expected) 91 | out_np[:] = -123. # reset so that we would catch errors 92 | cdef double[:] out = out_np 93 | out2 = r.colwise_sum(x, out) 94 | self.assertApproxEqual(out, expected) 95 | self.assertApproxEqual(out2, expected) 96 | 97 | def test_colwise_sum_badargs(self): 98 | x = np.array([[1.], [2.], [3.]]) 99 | out = np.empty(1) 100 | 101 | for X in (x, np.array([1., 2.]), np.array([[1., 2.]]), None): 102 | for OUT in (out, np.array([[1.]])): 103 | if X is x and OUT is out: 104 | r.colwise_sum[double](X, OUT) # this should be valid 105 | continue 106 | with self.assertRaises(ValueError): 107 | r.colwise_sum[double](X, OUT) 108 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | /_build/ 2 | -------------------------------------------------------------------------------- /doc/ChangeLog.rst: -------------------------------------------------------------------------------- 1 | ../ChangeLog.rst -------------------------------------------------------------------------------- /doc/HACKING.rst: -------------------------------------------------------------------------------- 1 | ../HACKING.rst -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Ceygen.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Ceygen.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Ceygen" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Ceygen" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /doc/README.rst: -------------------------------------------------------------------------------- 1 | ../README.rst -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Ceygen documentation build configuration file, created by 4 | # sphinx-quickstart on Sun Jan 27 12:37:42 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import subprocess, sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.pngmath', 'sphinx.ext.viewcode'] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'Ceygen' 44 | copyright = u'2013, Matěj Laitl' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = subprocess.check_output([sys.executable, os.path.join(os.path.dirname(os.path.abspath('.')), 'setup.py'), '--version']) 52 | # The full version, including alpha/beta/rc tags. 53 | release = version 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = ['_build'] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | # Be nitpicky - warn about all undefined references 90 | nitpicky = True 91 | 92 | 93 | # -- Options for HTML output --------------------------------------------------- 94 | 95 | # The theme to use for HTML and HTML Help pages. See the documentation for 96 | # a list of builtin themes. 97 | html_theme = 'default' 98 | 99 | # Theme options are theme-specific and customize the look and feel of a theme 100 | # further. For a list of options available for each theme, see the 101 | # documentation. 102 | #html_theme_options = {} 103 | 104 | # Add any paths that contain custom themes here, relative to this directory. 105 | #html_theme_path = [] 106 | 107 | # The name for this set of Sphinx documents. If None, it defaults to 108 | # " v documentation". 109 | #html_title = None 110 | 111 | # A shorter title for the navigation bar. Default is the same as html_title. 112 | #html_short_title = None 113 | 114 | # The name of an image file (relative to this directory) to place at the top 115 | # of the sidebar. 116 | #html_logo = None 117 | 118 | # The name of an image file (within the static path) to use as favicon of the 119 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 120 | # pixels large. 121 | #html_favicon = None 122 | 123 | # Add any paths that contain custom static files (such as style sheets) here, 124 | # relative to this directory. They are copied after the builtin static files, 125 | # so a file named "default.css" will overwrite the builtin "default.css". 126 | html_static_path = ['_static'] 127 | 128 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 129 | # using the given strftime format. 130 | #html_last_updated_fmt = '%b %d, %Y' 131 | 132 | # If true, SmartyPants will be used to convert quotes and dashes to 133 | # typographically correct entities. 134 | #html_use_smartypants = True 135 | 136 | # Custom sidebar templates, maps document names to template names. 137 | #html_sidebars = {} 138 | 139 | # Additional templates that should be rendered to pages, maps page names to 140 | # template names. 141 | #html_additional_pages = {} 142 | 143 | # If false, no module index is generated. 144 | #html_domain_indices = True 145 | 146 | # If false, no index is generated. 147 | #html_use_index = True 148 | 149 | # If true, the index is split into individual pages for each letter. 150 | #html_split_index = False 151 | 152 | # If true, links to the reST sources are added to the pages. 153 | #html_show_sourcelink = True 154 | 155 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 156 | #html_show_sphinx = True 157 | 158 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 159 | #html_show_copyright = True 160 | 161 | # If true, an OpenSearch description file will be output, and all pages will 162 | # contain a tag referring to it. The value of this option must be the 163 | # base URL from which the finished HTML is served. 164 | #html_use_opensearch = '' 165 | 166 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 167 | #html_file_suffix = None 168 | 169 | # Output file base name for HTML help builder. 170 | htmlhelp_basename = 'Ceygendoc' 171 | 172 | 173 | # -- Options for LaTeX output -------------------------------------------------- 174 | 175 | latex_elements = { 176 | # The paper size ('letterpaper' or 'a4paper'). 177 | #'papersize': 'letterpaper', 178 | 179 | # The font size ('10pt', '11pt' or '12pt'). 180 | #'pointsize': '10pt', 181 | 182 | # Additional stuff for the LaTeX preamble. 183 | #'preamble': '', 184 | } 185 | 186 | # Grouping the document tree into LaTeX files. List of tuples 187 | # (source start file, target name, title, author, documentclass [howto/manual]). 188 | latex_documents = [ 189 | ('index', 'Ceygen.tex', u'Ceygen Documentation', 190 | u'Matěj Laitl', 'manual'), 191 | ] 192 | 193 | # The name of an image file (relative to this directory) to place at the top of 194 | # the title page. 195 | #latex_logo = None 196 | 197 | # For "manual" documents, if this is true, then toplevel headings are parts, 198 | # not chapters. 199 | #latex_use_parts = False 200 | 201 | # If true, show page references after internal links. 202 | #latex_show_pagerefs = False 203 | 204 | # If true, show URL addresses after external links. 205 | #latex_show_urls = False 206 | 207 | # Documents to append as an appendix to all manuals. 208 | #latex_appendices = [] 209 | 210 | # If false, no module index is generated. 211 | #latex_domain_indices = True 212 | 213 | 214 | # -- Options for manual page output -------------------------------------------- 215 | 216 | # One entry per manual page. List of tuples 217 | # (source start file, name, description, authors, manual section). 218 | man_pages = [ 219 | ('index', 'ceygen', u'Ceygen Documentation', 220 | [u'Matěj Laitl'], 1) 221 | ] 222 | 223 | # If true, show URL addresses after external links. 224 | #man_show_urls = False 225 | 226 | 227 | # -- Options for Texinfo output ------------------------------------------------ 228 | 229 | # Grouping the document tree into Texinfo files. List of tuples 230 | # (source start file, target name, title, author, 231 | # dir menu entry, description, category) 232 | texinfo_documents = [ 233 | ('index', 'Ceygen', u'Ceygen Documentation', 234 | u'Matěj Laitl', 'Ceygen', 'One line description of project.', 235 | 'Miscellaneous'), 236 | ] 237 | 238 | # Documents to append as an appendix to all manuals. 239 | #texinfo_appendices = [] 240 | 241 | # If false, no module index is generated. 242 | #texinfo_domain_indices = True 243 | 244 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 245 | #texinfo_show_urls = 'footnote' 246 | 247 | 248 | # Example configuration for intersphinx: refer to the Python standard library. 249 | intersphinx_mapping = { 250 | 'http://docs.python.org/': None, 251 | 'http://docs.scipy.org/doc/numpy/': None, 252 | } 253 | -------------------------------------------------------------------------------- /doc/core.rst: -------------------------------------------------------------------------------- 1 | ============================= 2 | Core Data Types and Functions 3 | ============================= 4 | 5 | This module provides basic linear algebra operations such as vector and matrix 6 | products as provided by the <`Eigen/Core`_> include. 7 | 8 | Core Data Types 9 | =============== 10 | 11 | .. module:: ceygen.dtype 12 | 13 | .. data:: dtype 14 | 15 | Cython `fused type`_, a selection of C char, short, int, long, float and double 16 | (Python :obj:`float`). 17 | 18 | .. data:: nonint_dtype 19 | 20 | Cython `fused type`_ for methods that cannot work with integer types (such as 21 | :func:`~ceygen.lu.inv`). 22 | 23 | .. function:: vector(size, like) 24 | 25 | Convenience function to create a new vector (*cython.view.array*) and return a 26 | memoryview of it. This function is declared *with gil* (it can be called without the 27 | GIL_ held, but acquires it during execution) and is rather expensive (as many Python 28 | calls are done). 29 | 30 | :param int size: number of elements of the desired vector 31 | :param like: dummy pointer to desired data type; value not used 32 | :type like: :obj:`dtype * ` 33 | :rtype: |vector| 34 | 35 | .. function:: matrix(rows, col, like) 36 | 37 | Convenience function to create a new matrix (*cython.view.array*) and return a 38 | memoryview of it. This function is declared *with gil* (it can be called without the 39 | GIL_ held, but acquires it during execution) and is rather expensive (as many Python 40 | calls are done). 41 | 42 | :param int rows: number of rows of the desired matrix 43 | :param int cols: number of columns of the desired matrix 44 | :param like: dummy pointer to desired data type; value not used 45 | :type like: :obj:`dtype * ` 46 | :rtype: |matrix| 47 | 48 | Linear Algebra Functions 49 | ======================== 50 | 51 | .. module:: ceygen.core 52 | 53 | .. function:: dot_vv(x, y) 54 | 55 | Vector-vector dot product, returns a scalar of appropriate type. 56 | 57 | :param x: first factor 58 | :type x: |vector| 59 | :param y: second factor 60 | :type y: |vector| 61 | :raises: |valueerror| 62 | :raises: |typeerror| 63 | :rtype: |scalar| 64 | 65 | .. function:: dot_mv(x, y[, out=None]) 66 | 67 | Matrix-(column) vector product, returns a vector of appropriate type. 68 | 69 | :param x: first factor (matrix) 70 | :type x: |matrix| 71 | :param y: second factor (vector) 72 | :type y: |vector| 73 | :param out: |out| 74 | :type out: |vector| 75 | :raises: |valueerror| 76 | :raises: |typeerror| 77 | :rtype: |vector| 78 | 79 | .. function:: dot_vm(x, y[, out=None]) 80 | 81 | (Row) vector-matrix product, returns a vector of appropriate type. This is equivalent 82 | to dotvm(*y*.T, *x*) because there's no distinction between row and column vectors in 83 | Cython memoryviews, but calling this function directly may incur slightly less 84 | overhead. 85 | 86 | :param x: first factor (vector) 87 | :type x: |vector| 88 | :param y: second factor (matrix) 89 | :type y: |matrix| 90 | :param out: |out| 91 | :type out: |vector| 92 | :raises: |valueerror| 93 | :raises: |typeerror| 94 | :rtype: |vector| 95 | 96 | .. function:: dot_mm(x, y[, out=None]) 97 | 98 | Matrix-matrix product, returns a matrix of appropriate type and dimensions. You may of 99 | course use this function to multiply matrices that are in fact vectors, you just need 100 | to pay attention to column-vector vs. row-vector distinction this time. 101 | 102 | If both *x* and *y* are contiguous in some way (either C or Fortran, independently), 103 | this function takes optimized code path that doesn't involve memory allocation in 104 | Eigen; speed gains are around 40% for matrices around 2\*2 -- 24\*24 size. No special 105 | markup is needed to trigger this. See also :func:`set_is_malloc_allowed`. 106 | 107 | :param x: first factor 108 | :type x: |matrix| 109 | :param y: second factor 110 | :type y: |matrix| 111 | :param out: |out| 112 | :type out: |matrix| 113 | :raises: |valueerror| 114 | :raises: |typeerror| 115 | :rtype: |matrix| 116 | 117 | Miscellaneous Functions 118 | ======================= 119 | 120 | .. function:: set_is_malloc_allowed(allowed) 121 | 122 | Set the internal Eigen flag whether it is allowed to allocate memory on heap. 123 | 124 | If this flag is :obj:`False` and Eigen will try to allocate memory on heap, it will 125 | assert which causes :obj:`~exceptions.ValueError` to be raised by Ceygen. This is 126 | useful to ensure you use the most optimized code path. Defaults to :obj:`True`. 127 | Note: for this to work, Ceygen defines *EIGEN_RUNTIME_NO_MALLOC* preprocessor 128 | directive before including Eigen. 129 | 130 | See http://eigen.tuxfamily.org/dox/TopicPreprocessorDirectives.html 131 | 132 | .. function:: eigen_version() 133 | 134 | Return version of Eigen which Ceygen was compiled against as a tuple of three integers, 135 | for example (3, 1, 2). 136 | 137 | :rtype: :obj:`tuple` of 3 :obj:`ints ` 138 | 139 | .. _`Eigen/Core`: http://eigen.tuxfamily.org/dox/QuickRefPage.html#QuickRef_Headers 140 | .. _`fused type`: http://docs.cython.org/src/userguide/fusedtypes.html 141 | 142 | .. include:: definitions.rst 143 | -------------------------------------------------------------------------------- /doc/definitions.rst: -------------------------------------------------------------------------------- 1 | .. Definitions to be shared by other documentation documents. 2 | 3 | .. |scalar| replace:: :obj:`~ceygen.dtype.dtype` 4 | .. |nonint_scalar| replace:: :obj:`~ceygen.dtype.nonint_dtype` 5 | .. |vector| replace:: :obj:`dtype[:] ` 6 | .. |nonint_vector| replace:: :obj:`nonint_dtype[:] ` 7 | .. |matrix| replace:: :obj:`dtype[:, :] ` 8 | .. |nonint_matrix| replace:: :obj:`nonint_dtype[:, :] ` 9 | .. |out| replace:: memory view to write the result to. Specifying this optional argument 10 | means that Ceygen doesn't have to allocate memory for the result (allocating memory 11 | involves acquiring the GIL_ and calling many expensive Python functions). Once 12 | specified, it must must have correct dimensions to store the result of this operation 13 | (otherwise you get :obj:`~exceptions.ValueError`); the same *out* instance will be also 14 | returned. **Warning**: don't repeat *x* (or *y*) here, it `would give incorrect result 15 | without any error`_. Perhaps there's an in-place variant instead? 16 | .. |out_elemwise| replace:: memory view to write the result to. Specifying this optional 17 | argument means that Ceygen doesn't have to allocate memory for the result (allocating 18 | memory involves acquiring the GIL_ and calling many expensive Python functions). Once 19 | specified, it must must have correct dimensions to store the result of this operation 20 | (otherwise you get :obj:`~exceptions.ValueError`); the same *out* instance will be also 21 | returned. *As an exception from the general rule*, you **may repeat** *x* (or *y*) here 22 | `for this element-wise operation`_. 23 | .. |valueerror| replace:: :obj:`~exceptions.ValueError` if argument dimensions aren't 24 | appropriate for this operation or if arguments are otherwise invalid. 25 | .. |typeerror| replace:: :obj:`~exceptions.TypeError` if you pass an argument that doesn't 26 | support buffer interface (e.g. a plain list). Use preferrably a `Cython memoryview`_ 27 | and resort to :obj:`Python array `, `Cython array`_ or a 28 | :obj:`NumPy array `. 29 | .. |alwaystrue| replace:: Always :obj:`True` to allow fast exception propagation. 30 | .. |arrayexprs| replace:: This module exists only as a stop-gap until support for 31 | element-wise operations with memoryviews are implemented in Cython. It will be phased 32 | out once Cython with Mark Florisson's `array expressions`_ `pull request`_ merged is 33 | released. 34 | 35 | .. _`would give incorrect result without any error`: http://eigen.tuxfamily.org/dox/TopicAliasing.html 36 | .. _`for this element-wise operation`: http://eigen.tuxfamily.org/dox/TopicAliasing.html 37 | .. _`Cython memoryview`: http://docs.cython.org/src/userguide/memoryviews.html 38 | .. _`Cython array`: http://docs.cython.org/src/userguide/memoryviews.html#cython-arrays 39 | .. _`GIL`: http://docs.python.org/glossary.html#term-global-interpreter-lock 40 | .. _`array expressions`: https://github.com/markflorisson88/minivect/raw/master/thesis/thesis.pdf 41 | .. _`pull request`: https://github.com/cython/cython/pull/144 42 | -------------------------------------------------------------------------------- /doc/elemwise.rst: -------------------------------------------------------------------------------- 1 | ======================= 2 | Element-wise Operations 3 | ======================= 4 | 5 | This module implements some basic element-wise operations such as addition or division. 6 | 7 | Because aliasing is not a problem for element-wise operations, you can make the operations 8 | in-place simply by repeating *x* or *y* in *out*. Following examples are therefore valid 9 | and produce expected results:: 10 | 11 | ceygen.elemwise.add_mm(x, y, x) 12 | ceygen.elemwise.multiply_vv(a, b, b) 13 | 14 | .. note:: |arrayexprs| 15 | 16 | .. module:: ceygen.elemwise 17 | 18 | Vector-scalar Operations 19 | ======================== 20 | 21 | .. function:: add_vs(x, y[, out=None]) 22 | 23 | Add scalar *y* to each coefficient of vector *x* and return the resulting vector. 24 | 25 | Note: there's no **subtract_vs**, just add opposite number. 26 | 27 | :param x: first addend (vector) 28 | :type x: |vector| 29 | :param y: second addend (scalar) 30 | :type y: |scalar| 31 | :param out: |out_elemwise| 32 | :type out: |vector| 33 | :raises: |valueerror| 34 | :raises: |typeerror| 35 | :rtype: |vector| 36 | 37 | .. function:: multiply_vs(x, y[, out=None]) 38 | 39 | Multiply each coefficient of vector *x* by scalar *y* and return the resulting vector. 40 | 41 | Note: there's no **divide_vs**, just multiply by inverse number. 42 | 43 | :param x: first factor (vector) 44 | :type x: |vector| 45 | :param y: second factor (scalar) 46 | :type y: |scalar| 47 | :param out: |out_elemwise| 48 | :type out: |vector| 49 | :raises: |valueerror| 50 | :raises: |typeerror| 51 | :rtype: |vector| 52 | 53 | .. function:: power_vs(x, y[, out=None]) 54 | 55 | Compute *y*-th power of each coefficient of vector *x*. 56 | 57 | :param x: base (vector) 58 | :type x: |vector| 59 | :param y: exponent (scalar) 60 | :type y: |scalar| 61 | :param out: |out_elemwise| 62 | :type out: |vector| 63 | :raises: |valueerror| 64 | :raises: |typeerror| 65 | :rtype: |vector| 66 | 67 | Vector-vector Operations 68 | ======================== 69 | 70 | .. function:: add_vv(x, y[, out=None]) 71 | 72 | Vector-vector addition: *x* + *y* 73 | 74 | :param x: first addend 75 | :type x: |vector| 76 | :param y: second addend 77 | :type y: |vector| 78 | :param out: |out_elemwise| 79 | :type out: |vector| 80 | :raises: |valueerror| 81 | :raises: |typeerror| 82 | :rtype: |vector| 83 | 84 | .. function:: subtract_vv(x, y[, out=None]) 85 | 86 | Vector-vector subtraction: *x* - *y* 87 | 88 | :param x: minuend 89 | :type x: |vector| 90 | :param y: subtrahend 91 | :type y: |vector| 92 | :param out: |out_elemwise| 93 | :type out: |vector| 94 | :raises: |valueerror| 95 | :raises: |typeerror| 96 | :rtype: |vector| 97 | 98 | .. function:: multiply_vv(x, y[, out=None]) 99 | 100 | Vector-vector element-wise multiplication: *x* \* *y* 101 | 102 | :param x: first factor 103 | :type x: |vector| 104 | :param y: second factor 105 | :type y: |vector| 106 | :param out: |out_elemwise| 107 | :type out: |vector| 108 | :raises: |valueerror| 109 | :raises: |typeerror| 110 | :rtype: |vector| 111 | 112 | .. function:: divide_vv(x, y[, out=None]) 113 | 114 | Vector-vector element-wise division: *x* / *y* 115 | 116 | :param x: numerator 117 | :type x: |vector| 118 | :param y: denominator 119 | :type y: |vector| 120 | :param out: |out_elemwise| 121 | :type out: |vector| 122 | :raises: |valueerror| 123 | :raises: |typeerror| 124 | :rtype: |vector| 125 | 126 | Matrix-scalar Operations 127 | ======================== 128 | 129 | .. function:: add_ms(x, y[, out=None]) 130 | 131 | Add scalar *y* to each coefficient of matrix *x* and return the resulting matrix. 132 | 133 | Note: there's no **subtract_ms**, just add opposite number. 134 | 135 | :param x: first addend (matrix) 136 | :type x: |matrix| 137 | :param y: second addend (scalar) 138 | :type y: |scalar| 139 | :param out: |out_elemwise| 140 | :type out: |matrix| 141 | :raises: |valueerror| 142 | :raises: |typeerror| 143 | :rtype: |matrix| 144 | 145 | .. function:: multiply_ms(x, y[, out=None]) 146 | 147 | Multiply each coefficient of matrix *x* by scalar *y* and return the resulting matrix. 148 | 149 | Note: there's no **divide_ms**, just multiply by inverse number. 150 | 151 | :param x: first factor (vector) 152 | :type x: |matrix| 153 | :param y: second factor (scalar) 154 | :type y: |scalar| 155 | :param out: |out_elemwise| 156 | :type out: |matrix| 157 | :raises: |valueerror| 158 | :raises: |typeerror| 159 | :rtype: |matrix| 160 | 161 | .. function:: power_ms(x, y[, out=None]) 162 | 163 | Compute *y*-th power of each coefficient of matrix *x*. 164 | 165 | :param x: base (matrix) 166 | :type x: |matrix| 167 | :param y: exponent (scalar) 168 | :type y: |scalar| 169 | :param out: |out_elemwise| 170 | :type out: |matrix| 171 | :raises: |valueerror| 172 | :raises: |typeerror| 173 | :rtype: |matrix| 174 | 175 | Matrix-matrix Operations 176 | ======================== 177 | 178 | .. function:: add_mm(x, y[, out=None]) 179 | 180 | Matrix-matrix addition: *x* + *y* 181 | 182 | :param x: first addend 183 | :type x: |matrix| 184 | :param y: second addend 185 | :type y: |matrix| 186 | :param out: |out_elemwise| 187 | :type out: |matrix| 188 | :raises: |valueerror| 189 | :raises: |typeerror| 190 | :rtype: |matrix| 191 | 192 | .. function:: subtract_mm(x, y[, out=None]) 193 | 194 | Matrix-matrix subtraction: *x* - *y* 195 | 196 | :param x: minuend 197 | :type x: |matrix| 198 | :param y: subtrahend 199 | :type y: |matrix| 200 | :param out: |out_elemwise| 201 | :type out: |matrix| 202 | :raises: |valueerror| 203 | :raises: |typeerror| 204 | :rtype: |matrix| 205 | 206 | .. function:: multiply_mm(x, y[, out=None]) 207 | 208 | Matrix-matrix element-wise multiplication: *x* \* *y* 209 | 210 | :param x: first factor 211 | :type x: |matrix| 212 | :param y: second factor 213 | :type y: |matrix| 214 | :param out: |out_elemwise| 215 | :type out: |matrix| 216 | :raises: |valueerror| 217 | :raises: |typeerror| 218 | :rtype: |matrix| 219 | 220 | .. function:: divide_mm(x, y[, out=None]) 221 | 222 | Matrix-matrix element-wise division: *x* / *y* 223 | 224 | :param x: numerator 225 | :type x: |matrix| 226 | :param y: denominator 227 | :type y: |matrix| 228 | :param out: |out_elemwise| 229 | :type out: |matrix| 230 | :raises: |valueerror| 231 | :raises: |typeerror| 232 | :rtype: |matrix| 233 | 234 | .. include:: definitions.rst 235 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | ======================== 2 | Ceygen API Documentation 3 | ======================== 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | README 9 | ChangeLog 10 | core 11 | elemwise 12 | lu 13 | llt 14 | reductions 15 | HACKING 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | -------------------------------------------------------------------------------- /doc/llt.rst: -------------------------------------------------------------------------------- 1 | ======================================== 2 | Cholesky Decomposition-powered Functions 3 | ======================================== 4 | 5 | This module contains algebraic functions powered by the Cholesky matrix decomposition (as 6 | provided by the <`Eigen/Cholesky`_> include). 7 | 8 | .. module:: ceygen.llt 9 | 10 | .. function:: cholesky(x[, out=None]) 11 | 12 | Compute Cholesky decomposition of matrix *x* (which must be square, Hermitian and 13 | positive-definite) so that *x* = *out* \* *out*.H (*out*.H being conjugate transpose of 14 | *out*) 15 | 16 | :param x: matrix to decompose 17 | :type x: |nonint_matrix| 18 | :param out: |out| 19 | :type out: |nonint_matrix| 20 | :raises: |valueerror| 21 | :raises: |typeerror| 22 | :rtype: |nonint_matrix| 23 | 24 | .. _`Eigen/Cholesky`: http://eigen.tuxfamily.org/dox/QuickRefPage.html#QuickRef_Headers 25 | 26 | .. include:: definitions.rst 27 | -------------------------------------------------------------------------------- /doc/lu.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | LU Decomposition-powered Functions 3 | ================================== 4 | 5 | This module contains algebraic functions powered by the LU matrix decomposition (as 6 | provided by the <`Eigen/LU`_> include), most notably matrix inverse and determinant. 7 | 8 | .. module:: ceygen.lu 9 | 10 | .. function:: inv(x[, out=None]) 11 | 12 | Return matrix inverse computed using LU decomposition with partial pivoting. It is your 13 | responsibility to ensure that *x* is invertible, otherwise you get undefined result 14 | without any warning. 15 | 16 | :param x: matrix to invert 17 | :type x: |nonint_matrix| 18 | :param out: |out| 19 | :type out: |nonint_matrix| 20 | :raises: |valueerror| 21 | :raises: |typeerror| 22 | :rtype: |nonint_matrix| 23 | 24 | .. function:: iinv(x) 25 | 26 | Compte matrix inverse using LU decomposition with partial pivoting in-place. Equivalent 27 | to *x* = :obj:`inv(x) `, but without overhead. It is your responsibility to ensure 28 | that *x* is invertible, otherwise you get undefined result without any warning. 29 | 30 | :param x: matrix to invert in-place 31 | :type x: |nonint_matrix| 32 | :raises: |valueerror| 33 | :raises: |typeerror| 34 | :returns: |alwaystrue| 35 | 36 | .. function:: det(x) 37 | 38 | Compute determinant of a square matrix *x* using LU decomposition. 39 | 40 | :param x: matrix whose determimant to compute 41 | :type x: |matrix| 42 | :raises: |valueerror| 43 | :raises: |typeerror| 44 | :rtype: |scalar| 45 | 46 | .. _`Eigen/LU`: http://eigen.tuxfamily.org/dox/QuickRefPage.html#QuickRef_Headers 47 | 48 | .. include:: definitions.rst 49 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Ceygen.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Ceygen.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /doc/reductions.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Reductions 3 | ========== 4 | 5 | This module provides various reductions from matrices and vectors to scalars and from 6 | matrices to to vectors. 7 | 8 | .. module:: ceygen.reductions 9 | 10 | .. function:: sum_v(x) 11 | 12 | Return sum of the vector *x*. 13 | 14 | :param x: vector to sum up 15 | :type x: |vector| 16 | :raises: |valueerror| 17 | :raises: |typeerror| 18 | :rtype: |scalar| 19 | 20 | .. function:: sum_m(x) 21 | 22 | Return sum of the matrix *x*. 23 | 24 | :param x: matrix to sum up 25 | :type x: |matrix| 26 | :raises: |valueerror| 27 | :raises: |typeerror| 28 | :rtype: |scalar| 29 | 30 | .. function:: rowwise_sum(x[, out]) 31 | 32 | Compute sum of the invidual rows of matrix *x*. 33 | 34 | :param x: matrix to sum up 35 | :type x: |matrix| 36 | :param out: |out| 37 | :type out: |vector| 38 | :raises: |valueerror| 39 | :raises: |typeerror| 40 | :rtype: |vector| 41 | 42 | .. function:: colwise_sum(x[, out]) 43 | 44 | Compute sum of the invidual columns of matrix *x*. 45 | 46 | :param x: matrix to sum up 47 | :type x: |matrix| 48 | :param out: |out| 49 | :type out: |vector| 50 | :raises: |valueerror| 51 | :raises: |typeerror| 52 | :rtype: |vector| 53 | 54 | .. include:: definitions.rst 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from Cython.Build.Dependencies import create_extension_list 5 | 6 | from distutils.core import setup 7 | from os.path import basename, dirname, join, splitext 8 | import re 9 | 10 | from support.dist import CeygenDistribution 11 | 12 | 13 | modules = create_extension_list(['ceygen/*.pyx', 'ceygen/tests/*.pyx']) 14 | for module in modules: 15 | module.language = "c++" 16 | 17 | # list of pxd files that belong to a corresponding module directly in the ceygen package 18 | ceygen_pxds = [splitext(basename(m.sources[0]))[0] + '.pxd' for m in modules if re.match('ceygen\.[^.]*$', m.name)] 19 | 20 | with open(join(dirname(__file__) ,'README.rst')) as file: 21 | long_description = file.read() 22 | 23 | setup( 24 | packages=['ceygen', 'ceygen.tests'], 25 | package_data={'ceygen': ceygen_pxds}, 26 | distclass=CeygenDistribution, 27 | ext_modules=modules, 28 | include_dirs=['/usr/include/eigen3'], # default overridable by setup.cfg 29 | cflags=['-O2', '-march=native', '-fopenmp'], # ditto 30 | ldflags=['-fopenmp'], # ditto 31 | 32 | # meta-data; see http://docs.python.org/distutils/setupscript.html#additional-meta-data 33 | name='Ceygen', 34 | version="0.4-pre", 35 | author='Matěj Laitl', 36 | author_email='matej@laitl.cz', 37 | maintainer='Matěj Laitl', 38 | maintainer_email='matej@laitl.cz', 39 | url='https://github.com/strohel/Ceygen', 40 | description='Cython helper for linear algebra with typed memoryviews built atop the Eigen C++ library', 41 | long_description=long_description, 42 | download_url='http://pypi.python.org/pypi/Ceygen', 43 | platforms='cross-platform', 44 | license='MIT', 45 | classifiers=[ 46 | 'Development Status :: 3 - Alpha', 47 | 'Intended Audience :: Developers', 48 | 'License :: OSI Approved :: MIT License', 49 | 'Operating System :: OS Independent', 50 | 'Programming Language :: C++', 51 | 'Programming Language :: Cython', 52 | 'Programming Language :: Python', 53 | 'Programming Language :: Python :: 2', 54 | 'Programming Language :: Python :: 3', 55 | 'Topic :: Scientific/Engineering :: Mathematics', 56 | 'Topic :: Software Development :: Libraries :: Python Modules', 57 | ], 58 | ) 59 | -------------------------------------------------------------------------------- /support/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | A package with supportive Python modules that are not part of the Ceygen itself, but 5 | faciliate Ceygen building, installation, testing etc. 6 | """ 7 | -------------------------------------------------------------------------------- /support/compare_ceygen_numpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from mpl_toolkits.mplot3d import Axes3D 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | from fnmatch import fnmatch 9 | from glob import glob 10 | from os.path import splitext 11 | import pickle 12 | import re 13 | 14 | 15 | def natural_sort(l): 16 | convert = lambda text: int(text) if text.isdigit() else text.lower() 17 | alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] 18 | return l.sort(key=alphanum_key) 19 | 20 | def main(): 21 | files = glob('*-*.pickle') 22 | #funcs = set([filename.split('.', 2)[0] for filename in files]) 23 | funcs = ('add_vv', 'det', 'dot_mm', 'dot_mv') 24 | markers = ('x', 'o', 's', 'v') 25 | linestyles = ('-', '--', '-.', ':') 26 | 27 | maxsizeindex = 13 28 | for func in funcs: 29 | funcfiles = [filename for filename in files if fnmatch(filename, func + '.*')] 30 | natural_sort(funcfiles) 31 | sizes = None 32 | Z = [] 33 | for filename in funcfiles: 34 | with open(filename) as f: 35 | contents = pickle.load(f) 36 | if sizes is None: 37 | sizes = contents['sizes'][0:maxsizeindex] 38 | else: 39 | assert sizes == contents['sizes'][0:maxsizeindex] 40 | Z.append(contents['percall'][0:maxsizeindex]) 41 | 42 | variants = [filename.split('.')[1].split('-')[0] for filename in funcfiles] 43 | fig = plt.figure() 44 | fig.canvas.manager.set_window_title(func) 45 | X = range(len(sizes)) 46 | 47 | # compute relative times 48 | for i in range(len(sizes)): 49 | maximum = max((z[i] for z in Z)) 50 | for z in Z: 51 | z[i] /= maximum 52 | 53 | ax = fig.add_subplot(111, xlabel='matrix/vector size (one side)', xticks=X, xticklabels=sizes, 54 | ylabel="relative time per call", title=func) 55 | for (variant, y, marker, linestyle) in zip(variants, Z, markers, linestyles): 56 | ax.plot(X, y, label=variant, marker=marker, linestyle=linestyle) 57 | ax.set_ylim((0, 1.1)) 58 | ax.legend(loc=0) 59 | plt.show() 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /support/dist.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from distutils.dist import Distribution 4 | 5 | from .dist_cmd_build_ext import build_ext 6 | from .dist_cmd_test import test 7 | 8 | 9 | class CeygenDistribution(Distribution): 10 | 11 | def __init__(self, attrs): 12 | self.cflags = None # Default CFLAGS overridable by setup.cfg 13 | self.ldflags = None # Default LDFLAGS overridable by setup.cfg 14 | Distribution.__init__(self, attrs) 15 | self.cmdclass['build_ext'] = build_ext 16 | self.cmdclass['test'] = test 17 | -------------------------------------------------------------------------------- /support/dist_cmd_build_ext.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from Cython.Build import cythonize 4 | 5 | import os 6 | from distutils.command.build_ext import build_ext as orig_build_ext 7 | 8 | 9 | class build_ext(orig_build_ext): 10 | 11 | user_options = orig_build_ext.user_options + [ 12 | ('cflags=', None, "specify extra CFLAGS to pass to C and C++ compiler"), 13 | ('ldflags=', None, "specify extra LDFLAGS to pass to linker"), 14 | ('annotate', None, "pass --annotate to Cython when building extensions"), 15 | ] 16 | 17 | boolean_options = orig_build_ext.boolean_options + ['annotate'] 18 | 19 | def initialize_options(self): 20 | orig_build_ext.initialize_options(self) 21 | self.cflags = None 22 | self.ldflags = None 23 | self.annotate = None 24 | 25 | def finalize_options(self): 26 | orig_build_ext.finalize_options(self) 27 | ceygenpath = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'ceygen') 28 | self.include_dirs.insert(0, ceygenpath) 29 | 30 | if self.cflags is None: 31 | self.cflags = self.distribution.cflags or [] 32 | if isinstance(self.cflags, str): 33 | self.cflags = self.cflags.split() 34 | 35 | if self.ldflags is None: 36 | self.ldflags = self.distribution.ldflags or [] 37 | if isinstance(self.ldflags, str): 38 | self.ldflags = self.ldflags.split() 39 | 40 | def run(self): 41 | self.distribution.ext_modules = cythonize(self.distribution.ext_modules, 42 | annotate=self.annotate, force=self.force, build_dir=self.build_temp) 43 | self.extensions = self.distribution.ext_modules # orig_build_ext caches the list 44 | orig_build_ext.run(self) 45 | 46 | def build_extension(self, ext): 47 | """HACK to actually apply cflags, ldflags""" 48 | orig_compile_args = ext.extra_compile_args 49 | ext.extra_compile_args = orig_compile_args or [] 50 | ext.extra_compile_args.extend(self.cflags) 51 | orig_link_args = ext.extra_link_args 52 | ext.extra_link_args = orig_link_args or [] 53 | ext.extra_link_args.extend(self.ldflags) 54 | 55 | orig_build_ext.build_extension(self, ext) 56 | 57 | ext.extra_compile_args = orig_compile_args 58 | ext.extra_link_args = orig_link_args 59 | -------------------------------------------------------------------------------- /support/dist_cmd_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | A custom command for distutils to facilitate stress-testing of Ceygen 5 | """ 6 | 7 | from distutils.cmd import Command 8 | from distutils.errors import DistutilsExecError 9 | 10 | from os.path import abspath, dirname, join 11 | import sys 12 | import unittest 13 | 14 | 15 | class test(Command): 16 | """Test Ceygen in the build directory""" 17 | 18 | description = 'run unit test-suite of Ceygen within the build directory' 19 | user_options = [] 20 | 21 | def initialize_options(self): 22 | self.build_lib = None 23 | 24 | def finalize_options(self): 25 | self.set_undefined_options('build', ('build_lib', 'build_lib')) 26 | 27 | def run(self): 28 | self.run_command('build') # build if not alredy run 29 | orig_path = sys.path[:] 30 | try: 31 | build_path = abspath(self.build_lib) 32 | sys.path.insert(0, build_path) 33 | import ceygen.tests as t 34 | assert dirname(t.__file__) == join(build_path, 'ceygen', 'tests') 35 | suite = unittest.TestLoader().loadTestsFromModule(t) 36 | result = unittest.TextTestRunner(verbosity=self.verbose).run(suite) 37 | if not result.wasSuccessful(): 38 | raise Exception("There were test failures") 39 | except Exception as e: 40 | raise DistutilsExecError(e) 41 | finally: 42 | sys.path = orig_path 43 | -------------------------------------------------------------------------------- /support/visualize_stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from mpl_toolkits.mplot3d import Axes3D 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | from fnmatch import fnmatch 9 | from glob import glob 10 | from os.path import splitext 11 | import pickle 12 | import re 13 | 14 | 15 | def natural_sort(l): 16 | convert = lambda text: int(text) if text.isdigit() else text.lower() 17 | alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] 18 | return l.sort(key=alphanum_key) 19 | 20 | def main(): 21 | files = glob('*-*.pickle') 22 | funcs = set([filename.split('-', 2)[0] for filename in files]) 23 | for func in funcs: 24 | funcfiles = [filename for filename in files if fnmatch(filename, func + '-*')] 25 | natural_sort(funcfiles) 26 | sizes = None 27 | Z = [] 28 | for filename in funcfiles: 29 | with open(filename) as f: 30 | contents = pickle.load(f) 31 | if sizes is None: 32 | sizes = contents['sizes'] 33 | else: 34 | assert sizes == contents['sizes'] 35 | Z.append(contents['stats']) 36 | 37 | yticklabels = [splitext(filename)[0][len(func) + 1:] for filename in funcfiles] 38 | fig = plt.figure() 39 | fig.canvas.manager.set_window_title(func) 40 | X = range(len(sizes)) 41 | Y = range(len(yticklabels)) 42 | ax = fig.add_subplot(111, xlabel='size', xticks=X, xticklabels=sizes, yticks=Y, 43 | yticklabels=yticklabels, zlabel="GFLOPS", projection='3d') 44 | X, Y = np.meshgrid(X, Y) 45 | surf = ax.plot_wireframe(X, Y, Z, rstride=1, cstride=1) 46 | plt.show() 47 | 48 | if __name__ == '__main__': 49 | main() 50 | --------------------------------------------------------------------------------