├── .appveyor.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── docs ├── _static │ └── theme_overrides.css ├── _templates │ └── page.html ├── advanced-01.svg ├── advanced-02.svg ├── advanced-03.svg ├── advanced.rst ├── autodiff-01.svg ├── autodiff-02.svg ├── autodiff-03.svg ├── autodiff-04.svg ├── autodiff.rst ├── basics-01.svg ├── basics-02.svg ├── basics-03.svg ├── basics-04.svg ├── basics.rst ├── calls.rst ├── changelog.rst ├── color.rst ├── complex.rst ├── conf.py ├── custom.rst ├── demo.rst ├── dynamic-01.svg ├── dynamic-02.svg ├── dynamic-03.svg ├── dynamic-04.svg ├── dynamic-05.svg ├── dynamic-06.svg ├── dynamic.rst ├── enoki-logo.png ├── enoki-logo.svg ├── gpu.rst ├── half.rst ├── index.rst ├── intro-01.png ├── intro-01.svg ├── intro-02.png ├── intro-02.svg ├── intro-03.png ├── intro.rst ├── matrix.rst ├── morton-01.svg ├── morton.rst ├── nested-01.svg ├── nested-02.svg ├── nested-03.svg ├── nested-04.svg ├── nested-05.svg ├── nested.rst ├── python.rst ├── quaternions.rst ├── random.rst ├── reference.rst ├── requirements.txt ├── sh.rst ├── stl.rst └── transform.rst ├── include └── enoki │ ├── array.h │ ├── array_avx.h │ ├── array_avx2.h │ ├── array_avx512.h │ ├── array_base.h │ ├── array_call.h │ ├── array_enum.h │ ├── array_fallbacks.h │ ├── array_generic.h │ ├── array_idiv.h │ ├── array_intrin.h │ ├── array_kmask.h │ ├── array_macro.h │ ├── array_masked.h │ ├── array_math.h │ ├── array_neon.h │ ├── array_recursive.h │ ├── array_round.h │ ├── array_router.h │ ├── array_sse42.h │ ├── array_static.h │ ├── array_struct.h │ ├── array_traits.h │ ├── array_utils.h │ ├── autodiff.h │ ├── color.h │ ├── complex.h │ ├── cuda.h │ ├── dynamic.h │ ├── fwd.h │ ├── half.h │ ├── matrix.h │ ├── morton.h │ ├── python.h │ ├── quaternion.h │ ├── random.h │ ├── sh.h │ ├── special.h │ ├── stl.h │ └── transform.h ├── resources ├── FindSphinx.cmake ├── __init__.py ├── archflags_unix.cpp ├── archflags_win32.cpp ├── check-style.sh ├── enoki_gdb.py └── enoki_lldb.py ├── src ├── autodiff │ └── autodiff.cpp ├── cuda │ ├── common.cu │ ├── common.cuh │ ├── horiz.cu │ └── jit.cu └── python │ ├── common.h │ ├── complex.h │ ├── cuda.cpp │ ├── cuda_0d.cpp │ ├── cuda_1d.cpp │ ├── cuda_2d.cpp │ ├── cuda_3d.cpp │ ├── cuda_4d.cpp │ ├── cuda_autodiff.cpp │ ├── cuda_autodiff_0d.cpp │ ├── cuda_autodiff_1d.cpp │ ├── cuda_autodiff_2d.cpp │ ├── cuda_autodiff_3d.cpp │ ├── cuda_autodiff_4d.cpp │ ├── cuda_autodiff_complex.cpp │ ├── cuda_autodiff_matrix.cpp │ ├── cuda_complex.cpp │ ├── cuda_matrix.cpp │ ├── cuda_pcg32.cpp │ ├── docstr.h │ ├── dynamic.cpp │ ├── dynamic_0d.cpp │ ├── dynamic_1d.cpp │ ├── dynamic_2d.cpp │ ├── dynamic_3d.cpp │ ├── dynamic_4d.cpp │ ├── dynamic_complex.cpp │ ├── dynamic_matrix.cpp │ ├── dynamic_pcg32.cpp │ ├── main.cpp │ ├── matrix.h │ ├── quat.h │ ├── random.h │ ├── scalar.cpp │ ├── scalar_0d.cpp │ ├── scalar_1d.cpp │ ├── scalar_2d.cpp │ ├── scalar_3d.cpp │ ├── scalar_4d.cpp │ ├── scalar_complex.cpp │ ├── scalar_matrix.cpp │ ├── scalar_pcg32.cpp │ └── scalar_quat.cpp └── tests ├── CMakeLists.txt ├── autodiff.cpp ├── basic.cpp ├── call.cpp ├── color.cpp ├── complex.cpp ├── conv.cpp ├── custom.cpp ├── dynamic.cpp ├── explog.cpp ├── float.cpp ├── histogram.cpp ├── horiz.cpp ├── hyperbolic.cpp ├── idiv.cpp ├── integer.cpp ├── memory.cpp ├── memory2.cpp ├── morton.cpp ├── nested.cpp ├── python ├── test.cpp └── test_pytorch.py ├── ray.h ├── sh.cpp ├── special.cpp ├── sphere.cpp ├── test.h ├── trig.cpp └── vector.cpp /.appveyor.yml: -------------------------------------------------------------------------------- 1 | version: 1.0.{build} 2 | image: 3 | - Visual Studio 2017 4 | test: off 5 | skip_branch_with_pr: true 6 | platform: 7 | - x64 8 | matrix: 9 | fast_finish: true 10 | configuration: 11 | - Debug 12 | - Release 13 | environment: 14 | matrix: 15 | - TEST_NAME: basic 16 | - TEST_NAME: call 17 | - TEST_NAME: color 18 | - TEST_NAME: complex 19 | - TEST_NAME: conv 20 | - TEST_NAME: custom 21 | - TEST_NAME: dynamic 22 | - TEST_NAME: explog 23 | - TEST_NAME: float 24 | - TEST_NAME: histogram 25 | - TEST_NAME: horiz 26 | - TEST_NAME: hyperbolic 27 | - TEST_NAME: idiv 28 | - TEST_NAME: integer 29 | - TEST_NAME: memory 30 | - TEST_NAME: memory2 31 | - TEST_NAME: morton 32 | - TEST_NAME: nested 33 | - TEST_NAME: sh 34 | - TEST_NAME: special 35 | - TEST_NAME: sphere 36 | - TEST_NAME: trig 37 | - TEST_NAME: vector 38 | build: 39 | parallel:true 40 | build_script: 41 | - set preferredToolArchitecture=x64 42 | - cmake -G "Visual Studio 15 2017" -A "x64" -DCMAKE_SUPPRESS_REGENERATION=1 -DENOKI_TEST=1 -DENOKI_TEST_NAME=%TEST_NAME% . 43 | - set MSBuildLogger="C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" 44 | - cmake --build . --config %CONFIGURATION% -- /v:m /m /logger:%MSBuildLogger% 45 | - ctest 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # CMake 2 | /CMakeCache.txt 3 | /CPackConfig.cmake 4 | /CPackSourceConfig.cmake 5 | CMakeFiles 6 | cmake_install.cmake 7 | CTestTestfile.cmake 8 | CPackConfig.cmake 9 | CPackSourceConfig.cmake 10 | Makefile 11 | 12 | # Ninja 13 | .ninja_deps 14 | .ninja_log 15 | *.ninja 16 | 17 | # Miscellaneous; 18 | /build* 19 | /Testing 20 | /tests/Testing 21 | *.dir 22 | *~ 23 | \.DS_Store 24 | *.dSYM 25 | 26 | # Visual Studio 27 | *.vcxproj 28 | enoki.sdf 29 | enoki.sln 30 | enoki.opensdf 31 | enoki.VC.VC.opendb 32 | enoki.VC.db 33 | Debug 34 | Release 35 | *.filters 36 | /.vs 37 | /x64 38 | 39 | # Visual Studio Code 40 | /.vscode 41 | 42 | # Build products 43 | *_none 44 | *_sse42 45 | *_avx 46 | *_avx2 47 | *_avx512_knl 48 | *_avx512_skx 49 | *_neon 50 | *.ppm 51 | libenoki-cuda.so 52 | libenoki-cuda.dylib 53 | enoki-cuda.dll 54 | libenoki-autodiff.so 55 | libenoki-autodiff.dylib 56 | enoki-autodiff.dll 57 | enoki.cpython* 58 | /enoki 59 | compile_commands.json 60 | \.clangd 61 | 62 | # Documentation 63 | /html 64 | /docs/.build 65 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ext/cub"] 2 | path = ext/cub 3 | url = https://github.com/NVlabs/cub 4 | 5 | [submodule "ext/pybind11"] 6 | path = ext/pybind11 7 | url = https://github.com/pybind/pybind11 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Wenzel Jakob , All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | You are under no obligation whatsoever to provide any bug fixes, patches, or 29 | upgrades to the features, functionality or performance of the source code 30 | ("Enhancements") to anyone; however, if you choose to make your Enhancements 31 | available either publicly, or directly to the author of this software, without 32 | imposing a separate written license agreement for such Enhancements, then you 33 | hereby grant the following license: a non-exclusive, royalty-free perpetual 34 | license to install, use, modify, prepare derivative works, incorporate into 35 | other computer software, distribute, and sublicense such enhancements or 36 | derivative works thereof, in binary and source code form. 37 | -------------------------------------------------------------------------------- /docs/_static/theme_overrides.css: -------------------------------------------------------------------------------- 1 | table.docutils td, table.docutils th { 2 | padding:.5em; 3 | } 4 | 5 | table.docutils th { 6 | vertical-align:middle; 7 | } 8 | 9 | .toggle .header { 10 | display: block; 11 | clear: both; 12 | padding-bottom: 1em; 13 | } 14 | 15 | .toggle .header:after { 16 | content: " ▼"; 17 | } 18 | 19 | .toggle .header.open:after { 20 | content: " ▲"; 21 | } 22 | 23 | .MathJax .mo { color: inherit } 24 | .MathJax .mi { color: inherit } 25 | 26 | div.sphinxsidebar h4 { 27 | font-family: "Open Sans", Helvetica, Arial, sans-serif; 28 | font-size: 14px; 29 | font-weight: bold; 30 | text-transform: uppercase; 31 | color: #606060; 32 | } 33 | 34 | div.sphinxsidebar { 35 | margin: 1em; 36 | } 37 | 38 | div.sidebar-wrapper { 39 | padding:0px; 40 | } 41 | -------------------------------------------------------------------------------- /docs/_templates/page.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "!page.html" %} 3 | 4 | {% block footer %} 5 | 15 | {% endblock %} 16 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | Change log 2 | ========== 3 | 4 | Version 0.1.0 (Sep 2, 2019) 5 | ------------------- 6 | 7 | - First public release of Enoki 8 | -------------------------------------------------------------------------------- /docs/color.rst: -------------------------------------------------------------------------------- 1 | .. cpp:namespace:: enoki 2 | 3 | Color space transformations 4 | =========================== 5 | 6 | Enoki provides a set of helper functions for color space transformations. For 7 | now, only sRGB and inverse sRGB gamma correction are available. To use them, 8 | include the following header file: 9 | 10 | .. code-block:: cpp 11 | 12 | #include 13 | 14 | 15 | Functions 16 | ********* 17 | 18 | .. cpp:function:: template Value linear_to_srgb(Value value) 19 | 20 | Efficiently applies the sRGB gamma correction 21 | 22 | .. math :: 23 | 24 | x\mapsto\begin{cases}12.92x,&x\leq 0.0031308\\1.055x^{1/2.4}-0.055,&x>0.0031308\end{cases} 25 | 26 | to an input value in the interval :math:`(0, 1)`. 27 | 28 | .. cpp:function:: template Value srgb_to_linear(Value value) 29 | 30 | Efficiently applies the inverse sRGB gamma correction 31 | 32 | .. math :: 33 | 34 | x\mapsto{\begin{cases}{\frac {x}{12.92}},&x\leq 0.04045\\\left({\frac {x+0.055}{1.055}}\right)^{2.4},&x>0.04045\end{cases}} 35 | 36 | to an input value in the interval :math:`(0, 1)`. 37 | 38 | -------------------------------------------------------------------------------- /docs/enoki-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitsuba-renderer/enoki/141cf4bd18eee674841a7c3e3c28f3db44adc6fa/docs/enoki-logo.png -------------------------------------------------------------------------------- /docs/half.rst: -------------------------------------------------------------------------------- 1 | .. cpp:namespace:: enoki 2 | 3 | Half-precision floats 4 | ===================== 5 | 6 | Enoki provides a compact implementation of a 16 bit *half-precision* floating 7 | point type that is compatible with the FP16 format on GPUs and high dynamic 8 | range image libraries such as OpenEXR. To use this feature, include the 9 | following header: 10 | 11 | .. code-block:: cpp 12 | 13 | #include 14 | 15 | 16 | Current processors don't natively implement half precision arithmetic, hence 17 | mathematical operations involving this type always involve a 18 | ``half``:math:`\to` ``float``:math:`\to` ``half`` roundtrip. For this reason, 19 | it is unwise to rely on it for expensive parts of a computation. 20 | 21 | The main reason for including a dedicated half precision type in Enoki is that 22 | it provides an ideal storage format for floating point data that does not 23 | require the full accuracy of the single precision representation, which leads 24 | to an immediate storage savings of :math:`2\times`. 25 | 26 | .. note:: 27 | 28 | If supported by the target architecture, Enoki uses the *F16C* instruction 29 | set to perform efficient vectorized conversion between half and single 30 | precision variables (however, this only affects conversion and no other 31 | arithmetic operations). ARM NEON also provides native conversion 32 | instructions. 33 | 34 | Usage 35 | ----- 36 | 37 | The following example shows how to use the :cpp:class:`enoki::half` type in a 38 | typical use case. 39 | 40 | .. code-block:: cpp 41 | 42 | using Color4f = Array; 43 | using Color4h = Array; 44 | 45 | uint8_t *image_ptr = ...; 46 | 47 | Color4f pixel(load(image_ptr)); // <- conversion vectorized using F16C 48 | 49 | /* ... update 'pixel' using single-precision arithmetic ... */ 50 | 51 | store(image_ptr, Color4h(pixel)); // <- conversion vectorized using F16C 52 | 53 | Reference 54 | --------- 55 | 56 | .. cpp:class:: half 57 | 58 | A :cpp:class:`half` instance encodes a sign bit, an exponent width of 5 59 | bits, and 10 explicitly stored mantissa bits. 60 | 61 | All standard mathematical operators are overloaded and implemented using 62 | the processor's floating point unit after a conversion to a IEEE754 single 63 | precision. The result of the operation is then converted back to half 64 | precision. 65 | 66 | .. cpp:var:: uint16_t value 67 | 68 | Stores the represented half precision value as an unsigned 16-bit integer. 69 | 70 | .. cpp:function:: half(float value) 71 | 72 | Constructs a half-precision value from the given single precision 73 | argument. 74 | 75 | .. cpp:function:: operator float() const 76 | 77 | Implicit ``half`` to ``float`` conversion operator. 78 | 79 | .. cpp:function:: static half from_binary(uint16_t value) 80 | 81 | Reinterpret a 16-bit unsigned integer as a half-precision variable. 82 | 83 | .. cpp:function:: half operator+(half h) const 84 | 85 | Addition operator. 86 | 87 | .. cpp:function:: half& operator+=(half h) 88 | 89 | Addition compound assignment operator. 90 | 91 | .. cpp:function:: half operator-() const 92 | 93 | Unary minus operator 94 | 95 | .. cpp:function:: half operator*(half h) const 96 | 97 | Multiplication operator. 98 | 99 | .. cpp:function:: half& operator*=(half h) 100 | 101 | Multiplication compound assignment operator. 102 | 103 | .. cpp:function:: half operator/(half h) const 104 | 105 | Division operator. 106 | 107 | .. cpp:function:: half& operator/=(half h) 108 | 109 | Division compound assignment operator. 110 | 111 | .. cpp:function:: bool operator<(half h) const 112 | 113 | Less-than comparison operator. 114 | 115 | .. cpp:function:: bool operator<=(half h) const 116 | 117 | Less-than-or-equal comparison operator. 118 | 119 | .. cpp:function:: bool operator>(half h) const 120 | 121 | Greater-than comparison operator. 122 | 123 | .. cpp:function:: bool operator>=(half h) const 124 | 125 | Greater-than-or-equal comparison operator. 126 | 127 | .. cpp:function:: bool operator==(half h) const 128 | 129 | Equality operator. 130 | 131 | .. cpp:function:: bool operator!=(half h) const 132 | 133 | Inequality operator. 134 | 135 | .. cpp:function:: friend std::ostream& operator<<(std::ostream &os, const half &h) 136 | 137 | Stream insertion operator. 138 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Enoki: structured vectorization and differentiation on modern processor architectures 2 | ====================================================================================== 3 | 4 | .. only:: not latex 5 | 6 | .. image:: enoki-logo.svg 7 | :width: 400px 8 | :align: center 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | :caption: Core features 13 | 14 | intro 15 | demo 16 | basics 17 | nested 18 | gpu 19 | autodiff 20 | calls 21 | custom 22 | dynamic 23 | advanced 24 | changelog 25 | reference 26 | → GitHub 27 | 28 | .. toctree:: 29 | :maxdepth: 1 30 | :caption: Extras 31 | 32 | random 33 | morton 34 | complex 35 | quaternions 36 | matrix 37 | transform 38 | sh 39 | color 40 | half 41 | stl 42 | python 43 | 44 | -------------------------------------------------------------------------------- /docs/intro-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitsuba-renderer/enoki/141cf4bd18eee674841a7c3e3c28f3db44adc6fa/docs/intro-01.png -------------------------------------------------------------------------------- /docs/intro-01.svg: -------------------------------------------------------------------------------- 1 | soastruct Vector3f { float x; float y; float z;};Vector3f vectors[N];struct Vector3fP { float x[N]; float y[N]; float z[N];};Vector3fP vectors;Array of Structures”Structure of Arrays” -------------------------------------------------------------------------------- /docs/intro-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitsuba-renderer/enoki/141cf4bd18eee674841a7c3e3c28f3db44adc6fa/docs/intro-02.png -------------------------------------------------------------------------------- /docs/intro-02.svg: -------------------------------------------------------------------------------- 1 | soa2inline __m128 sample_tea(__m128i v0, __m128i v1) {    const __m128i C1 = _mm_set1_epi32(0xA341316C);    const __m128i C2 = _mm_set1_epi32(0xC8013EA4);    const __m128i C3 = _mm_set1_epi32(0xAD90777D);    const __m128i C4 = _mm_set1_epi32(0x7E95761E);    __m128i sum =_mm_set1_epi32(0x9e3779b9);    v0 = _mm_add_epi32(v0, _mm_xor_si128(        _mm_xor_si128(            _mm_add_epi32(_mm_slli_epi32(v1, 4), C1),            _mm_add_epi32(v1, sum)),            _mm_add_epi32(_mm_srli_epi32(v1, 5), C2)));    v1 = _mm_add_epi32(v1, _mm_xor_si128(        _mm_xor_si128(            _mm_add_epi32(_mm_slli_epi32(v0, 4), C3),            _mm_add_epi32(v0, sum)),            _mm_add_epi32(_mm_srli_epi32(v0, 5), C4)));    .... -------------------------------------------------------------------------------- /docs/intro-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitsuba-renderer/enoki/141cf4bd18eee674841a7c3e3c28f3db44adc6fa/docs/intro-03.png -------------------------------------------------------------------------------- /docs/morton-01.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 12 | 15 | 24 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /docs/morton.rst: -------------------------------------------------------------------------------- 1 | .. cpp:namespace:: enoki 2 | 3 | Morton/Z-order indexing 4 | ======================= 5 | 6 | Enoki provides efficient support for encoding and decoding of Morton/Z-order 7 | indices of arbitrary dimension. Both scalar indices and index vectors are 8 | supported. Z-order indexing can improve the locality accesses when two- or 9 | higher-dimensional data is arranged in memory. 10 | 11 | .. figure:: morton-01.svg 12 | :width: 300px 13 | :align: center 14 | 15 | (Figure by `David Eppstein `_) 16 | 17 | To use this feature, include the following header: 18 | 19 | .. code-block:: cpp 20 | 21 | #include 22 | 23 | 24 | Usage 25 | ----- 26 | 27 | The following shows a round trip, encoding a 32-bit position as a Morton index 28 | and decoding it again. 29 | 30 | .. code-block:: cpp 31 | 32 | using Vector2u = Array; 33 | 34 | Vector2u pos(123u, 456u); 35 | uint32_t encoded = morton_encode(pos); 36 | Vector2u decoded = morton_decode(encoded); 37 | 38 | std::cout << "Original : " << pos << std::endl; 39 | std::cout << "Encoded : " << encoded << std::endl; 40 | std::cout << "Decoded : " << decoded << std::endl; 41 | 42 | /* Prints: 43 | Original : [123, 456] 44 | Encoded : 177605 45 | Decoded : [123, 456] 46 | */ 47 | 48 | Depending on hardware support, Enoki implements these operations using BMI2 49 | instructions or bit shifts with precomputed magic constants. 50 | 51 | The same also works for nested vectors: 52 | 53 | .. code-block:: cpp 54 | 55 | using UInt32P = Packet; 56 | using Vector2uP = Array; 57 | 58 | Vector2uP pos(123u, 456u); 59 | UInt32P encoded = morton_encode(pos); 60 | Vector2uP decoded = morton_decode(encoded); 61 | 62 | std::cout << "Original : " << pos << std::endl; 63 | std::cout << "Encoded : " << encoded << std::endl; 64 | std::cout << "Decoded : " << decoded << std::endl; 65 | 66 | /* Prints: 67 | Original : [[123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456]] 68 | Encoded : [177605, 177605, 177605, 177605, 177605, 177605, 177605, 177605] 69 | Decoded : [[123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456]] 70 | */ 71 | 72 | Reference 73 | --------- 74 | 75 | .. cpp:function:: template value_t morton_encode(Array array) 76 | 77 | Converts a potentially nested N-dimensional array into its corresponding 78 | Morton/Z-order index by interleaving the bits of the component values. The 79 | array must have an unsigned integer as its underlying scalar type. 80 | 81 | .. cpp:function:: template Array morton_encode(value_t array) 82 | 83 | Converts a Morton/Z-order index or index array into a potentially nested 84 | N-dimensional array. The array must have an unsigned integer as its 85 | underlying scalar type. 86 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | guzzle_sphinx_theme 2 | pygments-mathematica 3 | -------------------------------------------------------------------------------- /docs/sh.rst: -------------------------------------------------------------------------------- 1 | .. cpp:namespace:: enoki 2 | 3 | Spherical harmonics 4 | =================== 5 | 6 | Enoki can efficiently evaluate real spherical harmonics basis functions for 7 | both scalar and vector arguments. To use this feature, include the following 8 | header file: 9 | 10 | .. code-block:: cpp 11 | 12 | #include 13 | 14 | The evaluation routines rely on efficient pre-generated branch-free code 15 | processed using aggressive constant folding and common subexpression 16 | elimination passes. Evaluation routines are provided up to order 10. 17 | 18 | The generated code is based on the paper `Efficient Spherical Harmonic 19 | Evaluation `_, *Journal of Computer 20 | Graphics Techniques (JCGT)*, vol. 2, no. 2, 84-90, 2013 by `Peter-Pike Sloan 21 | `_. 22 | 23 | .. note:: 24 | 25 | The directions provided to ``sh_eval_*`` must be normalized 3D vectors 26 | (i.e. using Cartesian instead of spherical coordinates). 27 | 28 | The Mathematica equivalent of the real spherical harmonic basis implemented 29 | in :file:`enoki/sh.h` is given by the following definition: 30 | 31 | .. code-block:: wolfram-language 32 | 33 | SphericalHarmonicQ[l_, m_, d_] := Block[{θ, ϕ}, 34 | θ = ArcCos[d[[3]]]; 35 | ϕ = ArcTan[d[[1]], d[[2]]]; 36 | Piecewise[{ 37 | {SphericalHarmonicY[l, m, θ, ϕ], m == 0}, 38 | {Sqrt[2] * Re[SphericalHarmonicY[l, m, θ, ϕ]], m > 0}, 39 | {Sqrt[2] * Im[SphericalHarmonicY[l, -m, θ, ϕ]], m < 0} 40 | }] 41 | ] 42 | 43 | Usage 44 | ----- 45 | 46 | The following example shows how to evaluate the spherical harmonics basis up to 47 | and including order 2 producing a total of 9 function evaluations. 48 | 49 | .. code-block:: cpp 50 | 51 | using Vector3f = Array; 52 | Vector3f d = normalize(Vector3f(1, 2, 3)); 53 | 54 | float coeffs[9]; 55 | sh_eval(d, 2, coeffs); 56 | 57 | // Prints: [0.282095, -0.261169, 0.391754, -0.130585, 0.156078, -0.468235, 0.292864, -0.234118, -0.117059] 58 | std::cout << load>(coeffs) << std::endl; 59 | 60 | 61 | Reference 62 | --------- 63 | 64 | .. cpp:function:: template void sh_eval(const Array &d, size_t order, expr_t> *out) 65 | 66 | Evaluates the real spherical harmonics basis functions up to and including 67 | order ``order``. The output array must have room for ``(order + 1)*(order + 68 | 1)`` entries. This function dispatches to one of the ``sh_eval_*`` 69 | implementations and throws an exception if ``order > 9``. 70 | 71 | .. cpp:function:: template void sh_eval_0(const Array &d, expr_t> *out) 72 | 73 | Evaluates the real spherical harmonics basis functions up to and including 74 | order 0. The output array must have room for ``1`` entry. 75 | 76 | .. cpp:function:: template void sh_eval_1(const Array &d, expr_t> *out) 77 | 78 | Evaluates the real spherical harmonics basis functions up to and including 79 | order 1. The output array must have room for ``4`` entries. 80 | 81 | .. cpp:function:: template void sh_eval_2(const Array &d, expr_t> *out) 82 | 83 | Evaluates the real spherical harmonics basis functions up to and including 84 | order 2. The output array must have room for ``9`` entries. 85 | 86 | .. cpp:function:: template void sh_eval_3(const Array &d, expr_t> *out) 87 | 88 | Evaluates the real spherical harmonics basis functions up to and including 89 | order 3. The output array must have room for ``16`` entries. 90 | 91 | .. cpp:function:: template void sh_eval_4(const Array &d, expr_t> *out) 92 | 93 | Evaluates the real spherical harmonics basis functions up to and including 94 | order 4. The output array must have room for ``25`` entries. 95 | 96 | .. cpp:function:: template void sh_eval_5(const Array &d, expr_t> *out) 97 | 98 | Evaluates the real spherical harmonics basis functions up to and including 99 | order 5. The output array must have room for ``36`` entries. 100 | 101 | .. cpp:function:: template void sh_eval_6(const Array &d, expr_t> *out) 102 | 103 | Evaluates the real spherical harmonics basis functions up to and including 104 | order 6. The output array must have room for ``49`` entries. 105 | 106 | .. cpp:function:: template void sh_eval_7(const Array &d, expr_t> *out) 107 | 108 | Evaluates the real spherical harmonics basis functions up to and including 109 | order 7. The output array must have room for ``64`` entries. 110 | 111 | .. cpp:function:: template void sh_eval_8(const Array &d, expr_t> *out) 112 | 113 | Evaluates the real spherical harmonics basis functions up to and including 114 | order 8. The output array must have room for ``81`` entries. 115 | 116 | .. cpp:function:: template void sh_eval_9(const Array &d, expr_t> *out) 117 | 118 | Evaluates the real spherical harmonics basis functions up to and including 119 | order 9. The output array must have room for ``100`` entries. 120 | -------------------------------------------------------------------------------- /docs/stl.rst: -------------------------------------------------------------------------------- 1 | .. cpp:namespace:: enoki 2 | 3 | Standard Template Library 4 | ========================= 5 | 6 | When Enoki extracts packets or slices through custom data structures, it also 7 | handles STL data structures including ``std::array``, and ``std::pair``, and 8 | ``std::tuple``. Please review the section on :ref:`dynamic arrays ` 9 | for general details on vectorizing over dynamic arrays and working with slices. 10 | 11 | To use this feature, include the following header file: 12 | 13 | .. code-block:: cpp 14 | 15 | #include 16 | 17 | Usage 18 | ----- 19 | 20 | Consider the following example, where a function returns a ``std::tuple`` 21 | containing a 3D position and a mask specifying whether the computation was 22 | successful. When the :file:`enoki/stl.h` header file is included, Enoki's 23 | dynamic vectorization machinery can be applied to vectorize such functions over 24 | arbitrarily large inputs. 25 | 26 | .. code-block:: cpp 27 | :emphasize-lines: 2,3,4,5,6,30,36 28 | 29 | /// Return value of 'my_function' 30 | template 31 | using Return = std::tuple< 32 | Array, 33 | mask_t 34 | >; 35 | 36 | template Return my_function(T theta, T phi) { 37 | /* Turn spherical -> cartesian coordinates */ 38 | Array pos( 39 | sin(theta) * cos(phi), 40 | sin(theta) * sin(phi), 41 | cos(theta) 42 | ); 43 | 44 | /* Only points on the top hemisphere are 'valid' */ 45 | return std::make_pair(pos, pos.z() > 0); 46 | } 47 | 48 | /// Packet of floats 49 | using FloatP = Packet; 50 | 51 | /// Arbitrarily large sequence of floats 52 | using FloatX = DynamicArray; 53 | 54 | /// Tuple containing a packet of results 55 | using ReturnP = Return; 56 | 57 | /// Tuple containing dynamic arrays with arbitrarily many results 58 | using ReturnX = Return; 59 | 60 | int main(int argc, char *argv[]) { 61 | FloatX theta = linspace(-10.f, 10.f, 10); 62 | FloatX phi = linspace(0.f, 60.f, 10); 63 | 64 | ReturnX result = vectorize(my_function, theta, phi); 65 | 66 | /* Prints: 67 | [[0.544021, 0, -0.839072], 68 | [-0.924676, -0.373065, 0.0761302], 69 | [0.478888, 0.461548, 0.746753], 70 | [0.0777672, 0.173978, -0.981674], 71 | [-0.0330365, -0.895583, 0.443666], 72 | [-0.304446, 0.842896, 0.443666], 73 | [0.127097, -0.141995, -0.981674], 74 | [0.596782, -0.293616, 0.746753], 75 | [-0.994388, 0.0734624, 0.0761293], 76 | [0.518133, 0.165823, -0.839072]] 77 | */ 78 | 79 | std::cout << std::get<0>(result) << std::endl; 80 | 81 | /* Prints: 82 | [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] 83 | */ 84 | std::cout << std::get<1>(result) << std::endl; 85 | } 86 | -------------------------------------------------------------------------------- /docs/transform.rst: -------------------------------------------------------------------------------- 1 | .. cpp:namespace:: enoki 2 | 3 | Homogeneous transformations 4 | =========================== 5 | 6 | Enoki provides a number of convenience functions to construct 3D homogeneous 7 | coordinate transformations (rotations, translations, scales, perspective 8 | transformation matrices, etc.). To use them, include the following header file: 9 | 10 | .. code-block:: cpp 11 | 12 | #include 13 | 14 | Reference 15 | --------- 16 | 17 | .. cpp:function:: template Matrix translate(Vector3 v) 18 | 19 | Constructs a homogeneous coordinate transformation, which translates points by ``v``. 20 | 21 | .. cpp:function:: template Matrix scale(Vector3 v) 22 | 23 | Constructs a homogeneous coordinate transformation, which scales points by ``v``. 24 | 25 | .. cpp:function:: template Matrix rotate(Vector3 v, Float angle) 26 | 27 | Constructs a homogeneous coordinate transformation, which rotates by ``angle`` radians 28 | around the axis ``v``. The function requires ``v`` to be normalized. 29 | 30 | .. cpp:function:: template auto transform_decompose(Matrix m) 31 | 32 | Performs a polar decomposition of a non-perspective 4x4 homogeneous 33 | coordinate matrix and returns a tuple of 34 | 35 | 1. A positive definite 3x3 matrix containing an inhomogeneous scaling operation 36 | 37 | 2. A rotation quaternion 38 | 39 | 3. A 3D translation vector 40 | 41 | This representation is helpful when animating keyframe animations. 42 | 43 | The function also handles singular inputs ``m``, in which case the rotation 44 | component is set to the identity quaternion and the scaling part simply 45 | copies the input matrix. 46 | 47 | .. cpp:function:: template auto transform_compose(Matrix3 scale, Quaternion rotation, Vector3 translate) 48 | 49 | This function composes a 4x4 homogeneous coordinate transformation from the 50 | given scale, rotation, and translation. It performs the reverse of 51 | ``transform_decompose``. 52 | 53 | .. cpp:function:: template auto transform_compose_inverse(Matrix3 scale, Quaternion rotation, Vector3 translate) 54 | 55 | This function composes a 4x4 homogeneous *inverse* coordinate 56 | transformation from the given scale, rotation, and translation. It is the 57 | equivalent to (but more efficient than) the expression 58 | ``inverse(transform_compose(...))``. 59 | 60 | .. cpp:function:: template Matrix look_at(Point3 origin, Point3, target, Vector3 up) 61 | 62 | Constructs a homogeneous coordinate transformation, which translates to 63 | :math:`\mathrm{origin}`, maps the negative :math:`z` axis to 64 | :math:`\mathrm{target}-\mathrm{origin}` (normalized) and the positive 65 | :math:`y` axis to :math:`\mathrm{up}` (if orthogonal to 66 | :math:`\mathrm{target}-\mathrm{origin}`). The algorithm performs 67 | Gram-Schmidt orthogonalization to ensure that the returned matrix is 68 | orthonormal. 69 | 70 | .. cpp:function:: template Matrix perspective(Float fov, Float near, Float far) 71 | 72 | Constructs an OpenGL-compatible perspective projection matrix with the 73 | specified field of view (in radians) and near and far clip planes. The 74 | returned matrix performs the transformation 75 | 76 | .. math:: 77 | 78 | \begin{pmatrix} 79 | x\\y\\z\end{pmatrix} 80 | \mapsto 81 | \begin{pmatrix} 82 | -c\,x/z\\ -c\,x/z\\ 83 | \frac{2\,\mathrm{far}\,\mathrm{near}\,+\,z\,(\mathrm{far}+\mathrm{near})}{z\, (\mathrm{far}-\mathrm{near})} 84 | \end{pmatrix}, 85 | 86 | where 87 | 88 | .. math:: 89 | 90 | c = \mathrm{cot}\!\left(0.5\, \textrm{fov}\right), 91 | 92 | which maps :math:`(0, 0, -\mathrm{near})^T` to :math:`(0, 0, -1)^T` and 93 | :math:`(0, 0, -\mathrm{far})^T` to :math:`(0, 0, 1)^T`. 94 | 95 | .. cpp:function:: template Matrix frustum(Float left, Float right, Float bottom, Float top, Float near, Float far) 96 | 97 | Constructs an OpenGL-compatible perspective projection matrix. The provided 98 | parameters specify the intersection of the camera frustum with the near 99 | clipping plane. Specifically, the returned transformation maps 100 | :math:`(\mathrm{left}, \mathrm{bottom}, -\mathrm{near})` to :math:`(-1, -1, 101 | -1)` and :math:`(\mathrm{right}, \mathrm{top}, -\mathrm{near})` to 102 | :math:`(1, 1, -1)`. 103 | 104 | .. cpp:function:: template Matrix ortho(Float left, Float right, Float bottom, Float top, Float near, Float far) 105 | 106 | Constructs an OpenGL-compatible orthographic projection matrix. The 107 | provided parameters specify the intersection of the camera frustum with the 108 | near clipping plane. Specifically, the returned transformation maps 109 | :math:`(\mathrm{left}, \mathrm{bottom}, -\mathrm{near})` to :math:`(-1, -1, 110 | -1)` and :math:`(\mathrm{right}, \mathrm{top}, -\mathrm{near})` to 111 | :math:`(1, 1, -1)`. 112 | -------------------------------------------------------------------------------- /include/enoki/array_enum.h: -------------------------------------------------------------------------------- 1 | /* 2 | enoki/array_call.h -- Enoki arrays of pointers, support for 3 | array (virtual) method calls 4 | 5 | Copyright (c) 2019 Wenzel Jakob 6 | 7 | All rights reserved. Use of this source code is governed by a BSD-style 8 | license that can be found in the LICENSE file. 9 | */ 10 | 11 | #pragma once 12 | 13 | NAMESPACE_BEGIN(enoki) 14 | 15 | template 16 | struct StaticArrayImpl::use_enum_impl>> 18 | : StaticArrayImpl, Size_, IsMask_, Derived_> { 19 | 20 | using UnderlyingType = std::underlying_type_t; 21 | 22 | using Base = StaticArrayImpl; 23 | 24 | ENOKI_ARRAY_DEFAULTS(StaticArrayImpl) 25 | using Base::derived; 26 | 27 | using Value = std::conditional_t; 28 | using Scalar = std::conditional_t; 29 | 30 | StaticArrayImpl() = default; 31 | StaticArrayImpl(Value value) : Base(UnderlyingType(value)) { } 32 | 33 | template > = 0> 34 | StaticArrayImpl(const T &b) : Base(b) { } 35 | 36 | template > = 0> 37 | StaticArrayImpl(const T &v1, const T &v2) : Base(v1, v2) { } 38 | 39 | template 40 | StaticArrayImpl(const T &b, detail::reinterpret_flag) 41 | : Base(b, detail::reinterpret_flag()) { } 42 | 43 | template == array_depth_v && array_size_v == Base::Size1 && 45 | array_depth_v == array_depth_v && array_size_v == Base::Size2 && 46 | Base::Size2 != 0> = 0> 47 | StaticArrayImpl(const T1 &a1, const T2 &a2) 48 | : Base(a1, a2) { } 49 | 50 | ENOKI_INLINE decltype(auto) coeff(size_t i) const { 51 | using Coeff = decltype(Base::coeff(i)); 52 | if constexpr (std::is_same_v) 53 | return (const Value &) Base::coeff(i); 54 | else 55 | return Base::coeff(i); 56 | } 57 | 58 | ENOKI_INLINE decltype(auto) coeff(size_t i) { 59 | using Coeff = decltype(Base::coeff(i)); 60 | if constexpr (std::is_same_v) 61 | return (Value &) Base::coeff(i); 62 | else 63 | return Base::coeff(i); 64 | } 65 | 66 | template 67 | ENOKI_INLINE size_t compress_(T *&ptr, const Mask &mask) const { 68 | return Base::compress_((UnderlyingType *&) ptr, mask); 69 | } 70 | 71 | template Derived_& operator=(T&& t) { 72 | ENOKI_MARK_USED(t); 73 | if constexpr (std::is_same_v) 74 | return (Derived_ &) Base::operator=(UnderlyingType(0)); 75 | else if constexpr (std::is_convertible_v) 76 | return (Derived_ &) Base::operator=(UnderlyingType(t)); 77 | else 78 | return (Derived_ &) Base::operator=(std::forward(t)); 79 | } 80 | }; 81 | 82 | NAMESPACE_END(enoki) 83 | -------------------------------------------------------------------------------- /include/enoki/array_masked.h: -------------------------------------------------------------------------------- 1 | /* 2 | enoki/array_masked.h -- Helper classes for masked assignments and 3 | in-place operators 4 | 5 | Enoki is a C++ template library that enables transparent vectorization 6 | of numerical kernels using ENOKI instruction sets available on current 7 | processor architectures. 8 | 9 | Copyright (c) 2019 Wenzel Jakob 10 | 11 | All rights reserved. Use of this source code is governed by a BSD-style 12 | license that can be found in the LICENSE file. 13 | */ 14 | 15 | #pragma once 16 | 17 | NAMESPACE_BEGIN(enoki) 18 | 19 | // ----------------------------------------------------------------------- 20 | //! @{ \name Masked array helper classes 21 | // ----------------------------------------------------------------------- 22 | 23 | NAMESPACE_BEGIN(detail) 24 | 25 | template struct MaskedValue { 26 | MaskedValue(T &d, bool m) : d(d), m(m) { } 27 | 28 | template ENOKI_INLINE void operator =(const T2 &value) { if (m) d = value; } 29 | template ENOKI_INLINE void operator+=(const T2 &value) { if (m) d += value; } 30 | template ENOKI_INLINE void operator-=(const T2 &value) { if (m) d -= value; } 31 | template ENOKI_INLINE void operator*=(const T2 &value) { if (m) d *= value; } 32 | template ENOKI_INLINE void operator/=(const T2 &value) { if (m) d /= value; } 33 | template ENOKI_INLINE void operator|=(const T2 &value) { if (m) d |= value; } 34 | template ENOKI_INLINE void operator&=(const T2 &value) { if (m) d &= value; } 35 | template ENOKI_INLINE void operator^=(const T2 &value) { if (m) d ^= value; } 36 | 37 | T &d; 38 | bool m; 39 | }; 40 | 41 | template struct MaskedArray : ArrayBase, MaskedArray> { 42 | using Mask = mask_t; 43 | using Scalar = MaskedValue>; 44 | using MaskType = MaskedArray; 45 | using Value = std::conditional_t>, 46 | MaskedValue>, 47 | MaskedArray>>; 48 | using UnderlyingValue = value_t; 49 | static constexpr size_t Size = array_size_v; 50 | static constexpr bool IsMaskedArray = true; 51 | 52 | MaskedArray(T &d, const Mask &m) : d(d), m(m) { } 53 | 54 | template ENOKI_INLINE void operator =(const T2 &value) { d.massign_(value, m); } 55 | template ENOKI_INLINE void operator+=(const T2 &value) { d.madd_(value, m); } 56 | template ENOKI_INLINE void operator-=(const T2 &value) { d.msub_(value, m); } 57 | template ENOKI_INLINE void operator*=(const T2 &value) { d.mmul_(value, m); } 58 | template ENOKI_INLINE void operator/=(const T2 &value) { d.mdiv_(value, m); } 59 | template ENOKI_INLINE void operator|=(const T2 &value) { d.mor_(value, m); } 60 | template ENOKI_INLINE void operator&=(const T2 &value) { d.mand_(value, m); } 61 | template ENOKI_INLINE void operator^=(const T2 &value) { d.mxor_(value, m); } 62 | 63 | /// Type alias for a similar-shaped array over a different type 64 | template using ReplaceValue = MaskedArray>; 65 | 66 | T &d; 67 | Mask m; 68 | }; 69 | 70 | NAMESPACE_END(detail) 71 | 72 | template 73 | struct Array, Size_> 74 | : detail::MaskedArray> { 75 | using Base = detail::MaskedArray>; 76 | using Base::Base; 77 | using Base::operator=; 78 | Array(const Base &b) : Base(b) { } 79 | }; 80 | 81 | template 82 | ENOKI_INLINE auto masked(T &value, const Mask &mask) { 83 | if constexpr (std::is_same_v) 84 | return detail::MaskedValue{ value, mask }; 85 | else 86 | return struct_support_t::masked(value, mask); 87 | } 88 | 89 | //! @} 90 | // ----------------------------------------------------------------------- 91 | 92 | NAMESPACE_END(enoki) 93 | -------------------------------------------------------------------------------- /include/enoki/array_round.h: -------------------------------------------------------------------------------- 1 | /* 2 | enoki/array_round.h -- Fallback for nonstandard rounding modes 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using ENOKI instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #pragma once 15 | 16 | #include 17 | 18 | NAMESPACE_BEGIN(enoki) 19 | 20 | #if defined(ENOKI_X86_64) || defined(ENOKI_X86_32) 21 | /// RAII wrapper that saves and restores the FP Control/Status Register 22 | template struct set_rounding_mode { 23 | set_rounding_mode() : value(_mm_getcsr()) { 24 | unsigned int csr = value & ~(unsigned int) _MM_ROUND_MASK; 25 | switch (Mode) { 26 | case RoundingMode::Nearest: csr |= _MM_ROUND_NEAREST; break; 27 | case RoundingMode::Down: csr |= _MM_ROUND_DOWN; break; 28 | case RoundingMode::Up: csr |= _MM_ROUND_UP; break; 29 | case RoundingMode::Zero: csr |= _MM_ROUND_TOWARD_ZERO; break; 30 | } 31 | _mm_setcsr(csr); 32 | } 33 | 34 | ~set_rounding_mode() { 35 | _mm_setcsr(value); 36 | } 37 | 38 | unsigned int value; 39 | }; 40 | #else 41 | template struct set_rounding_mode { 42 | // Don't know how to change rounding modes on this platform :( 43 | }; 44 | #endif 45 | 46 | template 47 | struct StaticArrayImpl::use_rounding_fallback_impl>> 49 | : StaticArrayImpl { 50 | 51 | using Base = StaticArrayImpl; 52 | using Derived = Derived_; 53 | 54 | using Base::derived; 55 | 56 | /// Rounding mode of arithmetic operations 57 | static constexpr RoundingMode Mode = Mode_; 58 | 59 | template , Value_>> = 0> 60 | ENOKI_INLINE StaticArrayImpl(Arg&& arg) : Base(std::forward(arg)) { } 61 | 62 | template 63 | ENOKI_INLINE StaticArrayImpl(Args&&... args) : Base(std::forward(args)...) { } 64 | 65 | template , Value_>> = 0> 66 | ENOKI_NOINLINE StaticArrayImpl(Arg&& arg) { 67 | set_rounding_mode mode; (void) mode; 68 | using Base2 = std::conditional_t, 70 | Packet>; 71 | Base::operator=(Base2(std::forward(arg))); 72 | } 73 | 74 | template , Value_>> = 0> 75 | ENOKI_NOINLINE Derived& operator=(Arg&& arg) { 76 | Base::operator=(std::forward(arg)); 77 | return derived(); 78 | } 79 | 80 | template , Value_>> = 0> 81 | ENOKI_NOINLINE Derived& operator=(Arg&& arg) { 82 | set_rounding_mode mode; (void) mode; 83 | using Base2 = std::conditional_t, 85 | Packet>; 86 | Base::operator=(Base2(std::forward(arg))); 87 | return derived(); 88 | } 89 | 90 | ENOKI_NOINLINE Derived add_(const Derived &a) const { 91 | set_rounding_mode mode; (void) mode; 92 | return Base::add_(a); 93 | } 94 | 95 | ENOKI_NOINLINE Derived sub_(const Derived &a) const { 96 | set_rounding_mode mode; (void) mode; 97 | return Base::sub_(a); 98 | } 99 | 100 | ENOKI_NOINLINE Derived mul_(const Derived &a) const { 101 | set_rounding_mode mode; (void) mode; 102 | return Base::mul_(a); 103 | } 104 | 105 | ENOKI_NOINLINE Derived div_(const Derived &a) const { 106 | set_rounding_mode mode; (void) mode; 107 | return Base::div_(a); 108 | } 109 | 110 | ENOKI_NOINLINE Derived sqrt_() const { 111 | set_rounding_mode mode; (void) mode; 112 | return Base::sqrt_(); 113 | } 114 | 115 | ENOKI_NOINLINE Derived fmadd_(const Derived &b, const Derived &c) const { 116 | set_rounding_mode mode; (void) mode; 117 | return Base::fmadd_(b, c); 118 | } 119 | 120 | ENOKI_NOINLINE Derived fmsub_(const Derived &b, const Derived &c) const { 121 | set_rounding_mode mode; (void) mode; 122 | return Base::fmsub_(b, c); 123 | } 124 | 125 | ENOKI_NOINLINE Derived fnmadd_(const Derived &b, const Derived &c) const { 126 | set_rounding_mode mode; (void) mode; 127 | return Base::fnmadd_(b, c); 128 | } 129 | 130 | ENOKI_NOINLINE Derived fnmsub_(const Derived &b, const Derived &c) const { 131 | set_rounding_mode mode; (void) mode; 132 | return Base::fnmsub_(b, c); 133 | } 134 | 135 | ENOKI_NOINLINE Derived fmsubadd_(const Derived &b, const Derived &c) const { 136 | set_rounding_mode mode; (void) mode; 137 | return Base::fmsubadd_(b, c); 138 | } 139 | 140 | ENOKI_NOINLINE Derived fmaddsub_(const Derived &b, const Derived &c) const { 141 | set_rounding_mode mode; (void) mode; 142 | return Base::fmaddsub_(b, c); 143 | } 144 | 145 | ENOKI_NOINLINE Value_ hsum() const { 146 | set_rounding_mode mode; (void) mode; 147 | return Base::hsum_(); 148 | } 149 | 150 | ENOKI_NOINLINE Value_ hprod() const { 151 | set_rounding_mode mode; (void) mode; 152 | return Base::hprod_(); 153 | } 154 | }; 155 | 156 | NAMESPACE_END(enoki) 157 | -------------------------------------------------------------------------------- /include/enoki/color.h: -------------------------------------------------------------------------------- 1 | /* 2 | enoki/color.h -- Color space transformations (only sRGB so far) 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #pragma once 15 | 16 | #include 17 | 18 | NAMESPACE_BEGIN(enoki) 19 | 20 | template expr_t linear_to_srgb(const T &x) { 21 | using Value = expr_t; 22 | using Mask = mask_t; 23 | using Scalar = scalar_t; 24 | constexpr bool Single = std::is_same_v; 25 | 26 | Value r = Scalar(12.92); 27 | Mask large_mask = x > Scalar(0.0031308); 28 | 29 | if (ENOKI_LIKELY(any(large_mask))) { 30 | Value y = sqrt(x), p, q; 31 | 32 | if constexpr (Single) { 33 | p = poly5(y, -0.0016829072605308378, 0.03453868659826638, 34 | 0.7642611304733891, 2.0041169284241644, 35 | 0.7551545191665577, -0.016202083165206348); 36 | q = poly5(y, 4.178892964897981e-7, -0.00004375359692957097, 37 | 0.03467195408529984, 0.6085338522168684, 38 | 1.8970238036421054, 1.); 39 | } else { 40 | p = poly10(y, -3.7113872202050023e-6, -0.00021805827098915798, 41 | 0.002531335520959116, 0.2263810267005674, 42 | 3.0477578489880823, 15.374469584296442, 43 | 32.44669922192121, 27.901125077137042, 8.450947414259522, 44 | 0.5838023820686707, -0.0031151377052754843); 45 | q = poly10(y, 2.2380622409188757e-11, -8.387527630781522e-9, 46 | 0.00007045228641004039, 0.007244514696840552, 47 | 0.21749170309546628, 2.575446652731678, 48 | 13.297981743005433, 30.50364355650628, 29.70548706952188, 49 | 10.723011300050162, 1.); 50 | } 51 | 52 | masked(r, large_mask) = p / q; 53 | } 54 | 55 | return r * x; 56 | } 57 | 58 | template expr_t srgb_to_linear(const T &x) { 59 | using Value = expr_t; 60 | using Mask = mask_t; 61 | using Scalar = scalar_t; 62 | constexpr bool Single = std::is_same_v; 63 | 64 | Value r = Scalar(1.0 / 12.92); 65 | Mask large_mask = x > Scalar(0.04045); 66 | 67 | if (ENOKI_LIKELY(any(large_mask))) { 68 | Value p, q; 69 | 70 | if constexpr (Single) { 71 | p = poly4(x, -0.0163933279112946, -0.7386328024653209, 72 | -11.199318357635072, -47.46726633009393, 73 | -36.04572663838034); 74 | q = poly4(x, -0.004261480793199332, -19.140923959601675, 75 | -59.096406619244426, -18.225745396846637, 1.); 76 | } else { 77 | p = poly9(x, -0.008042950896814532, -0.5489744177844188, 78 | -14.786385491859248, -200.19589605282445, 79 | -1446.951694673217, -5548.704065887224, 80 | -10782.158977031822, -9735.250875334352, 81 | -3483.4445569178347, -342.62884098034357); 82 | q = poly9(x, -2.2132610916769585e-8, -9.646075249097724, 83 | -237.47722999429413, -2013.8039726540235, 84 | -7349.477378676199, -11916.470977597566, 85 | -8059.219012060384, -1884.7738197074218, 86 | -84.8098437770271, 1.); 87 | } 88 | 89 | masked(r, large_mask) = p / q; 90 | } 91 | 92 | return r * x; 93 | } 94 | 95 | NAMESPACE_END(enoki) 96 | -------------------------------------------------------------------------------- /include/enoki/morton.h: -------------------------------------------------------------------------------- 1 | /* 2 | enoki/morton.h -- Morton/Z-order curve encoding and decoding routines 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | Includes contributions by Sebastien Speierer 10 | 11 | All rights reserved. Use of this source code is governed by a BSD-style 12 | license that can be found in the LICENSE file. 13 | */ 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | #if defined(_MSC_VER) 20 | # pragma warning (push) 21 | # pragma warning (disable: 4310) // cast truncates constant value 22 | #endif 23 | 24 | NAMESPACE_BEGIN(enoki) 25 | NAMESPACE_BEGIN(detail) 26 | 27 | /// Generate bit masks for the functions \ref scatter_bits() and \ref gather_bits() 28 | template constexpr Value morton_magic(size_t dim, size_t level) { 29 | size_t n_bits = sizeof(Value) * 8; 30 | size_t max_block_size = n_bits / dim; 31 | size_t block_size = std::min(size_t(1) << (level - 1), max_block_size); 32 | size_t count = 0; 33 | 34 | Value mask = Value(1) << (n_bits - 1), 35 | value = Value(0); 36 | 37 | for (size_t i = 0; i < n_bits; ++i) { 38 | value >>= 1; 39 | 40 | if (count < max_block_size && (i / block_size) % dim == 0) { 41 | count++; 42 | value |= mask; 43 | } 44 | } 45 | 46 | return value; 47 | } 48 | 49 | /// Bit scatter function. \c Dimension defines the final distance between two output bits 50 | template = 0> 51 | ENOKI_INLINE Value scatter_bits(Value x) { return x; } 52 | 53 | template )> = 0> 56 | ENOKI_INLINE Value scatter_bits(Value x) { 57 | using Scalar = scalar_t; 58 | 59 | constexpr Scalar magic = morton_magic(Dimension, Level); 60 | constexpr size_t shift_maybe = (1 << (Level - 1)) * (Dimension - 1); 61 | constexpr size_t shift = (shift_maybe < sizeof(Scalar) * 8) ? shift_maybe : 0; 62 | 63 | if constexpr (shift != 0) 64 | x |= sl(x); 65 | 66 | x &= magic; 67 | 68 | return scatter_bits(x); 69 | } 70 | 71 | template = 0> 73 | ENOKI_INLINE Value gather_bits(Value x) { return x; } 74 | 75 | /// Bit gather function. \c Dimension defines the final distance between two input bits 76 | template )> = 0> 79 | ENOKI_INLINE Value gather_bits(Value x) { 80 | using Scalar = scalar_t; 81 | 82 | constexpr size_t ilevel = clog2i(sizeof(Value) * 8) - Level + 1; 83 | constexpr Scalar magic = morton_magic(Dimension, ilevel); 84 | constexpr size_t shift_maybe = (1 << (ilevel - 1)) * (Dimension - 1); 85 | constexpr size_t shift = (shift_maybe < sizeof(Scalar) * 8) ? shift_maybe : 0; 86 | 87 | x &= magic; 88 | 89 | if constexpr (shift != 0) 90 | x |= sr(x); 91 | 92 | return gather_bits(x); 93 | } 94 | 95 | #if defined(ENOKI_X86_AVX2) && defined(ENOKI_X86_64) 96 | template > = 0> 98 | ENOKI_INLINE Value scatter_bits(Value x) { 99 | constexpr Value magic = morton_magic(Dimension, 1); 100 | if constexpr (sizeof(Value) <= 4) 101 | return Value(_pdep_u32((uint32_t) x, (uint32_t) magic)); 102 | else 103 | return Value(_pdep_u64((uint64_t) x, (uint64_t) magic)); 104 | } 105 | 106 | template > = 0> 108 | ENOKI_INLINE Value gather_bits(Value x) { 109 | constexpr Value magic = morton_magic(Dimension, 1); 110 | if constexpr (sizeof(Value) <= 4) 111 | return Value(_pext_u32((uint32_t) x, (uint32_t) magic)); 112 | else 113 | return Value(_pext_u64((uint64_t) x, (uint64_t) magic)); 114 | } 115 | #endif 116 | 117 | template = 0> 119 | ENOKI_INLINE void morton_decode_helper(value_t value, Array &out) { 120 | out.coeff(0) = gather_bits(value); 121 | } 122 | 123 | template - 1, 124 | enable_if_t = 0> 125 | ENOKI_INLINE void morton_decode_helper(value_t value, Array &out) { 126 | out.coeff(Index) = gather_bits(sr(value)); 127 | morton_decode_helper(value, out); 128 | } 129 | 130 | NAMESPACE_END(detail) 131 | 132 | /// Convert a N-dimensional integer array into the Morton/Z-order curve encoding 133 | template , 134 | enable_if_t = 0> 135 | ENOKI_INLINE Return morton_encode(Array a) { 136 | return detail::scatter_bits(a.coeff(0)); 137 | } 138 | 139 | /// Convert a N-dimensional integer array into the Morton/Z-order curve encoding 140 | template - 1, 141 | typename Return = value_t, enable_if_t = 0> 142 | ENOKI_INLINE Return morton_encode(Array a) { 143 | static_assert(std::is_unsigned_v>, "morton_encode() requires unsigned arguments"); 144 | return sl(detail::scatter_bits(a.coeff(Index))) | 145 | morton_encode(a); 146 | } 147 | 148 | /// Convert Morton/Z-order curve encoding into a N-dimensional integer array 149 | template > 150 | ENOKI_INLINE Array morton_decode(Value value) { 151 | static_assert(std::is_unsigned_v>, "morton_decode() requires unsigned arguments"); 152 | Array result; 153 | detail::morton_decode_helper(value, result); 154 | return result; 155 | } 156 | 157 | NAMESPACE_END(enoki) 158 | 159 | #if defined(_MSC_VER) 160 | # pragma warning (pop) 161 | #endif 162 | -------------------------------------------------------------------------------- /resources/FindSphinx.cmake: -------------------------------------------------------------------------------- 1 | find_program(SPHINX_EXECUTABLE NAMES sphinx-build 2 | HINTS 3 | $ENV{SPHINX_DIR} 4 | PATH_SUFFIXES bin 5 | DOC "Sphinx documentation generator" 6 | ) 7 | 8 | include(FindPackageHandleStandardArgs) 9 | 10 | find_package_handle_standard_args(Sphinx DEFAULT_MSG 11 | SPHINX_EXECUTABLE 12 | ) 13 | 14 | mark_as_advanced(SPHINX_EXECUTABLE) 15 | -------------------------------------------------------------------------------- /resources/__init__.py: -------------------------------------------------------------------------------- 1 | __import__("enoki.core") 2 | -------------------------------------------------------------------------------- /resources/archflags_unix.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char *argv[]) { 4 | #if defined(__AVX512DQ__) 5 | std::cout << "-march=skx" << std::endl; 6 | #elif defined(__AVX512ER__) 7 | std::cout << "-march=knl" << std::endl; 8 | #elif defined(__AVX2__) 9 | std::cout << "-mavx2" << std::endl; 10 | #elif defined(__AVX__) 11 | std::cout << "-mavx" << std::endl; 12 | #elif defined(__SSE4_2__) 13 | std::cout << "-msse4.2" << std::endl; 14 | #elif defined(__aarch64__) 15 | std::cout << "-march=armv8-a+simd -mtune=cortex-a53" << std::endl; 16 | #elif defined(__arm__) 17 | std::cout << "-march=armv7-a -mtune=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard -mfp16-format=ieee" << std::endl; 18 | #endif 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /resources/archflags_win32.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | struct Flags { 8 | std::bitset<32> eax; 9 | std::bitset<32> ebx; 10 | std::bitset<32> ecx; 11 | std::bitset<32> edx; 12 | 13 | Flags(int eax, int ebx, int ecx, int edx) : 14 | eax(eax), ebx(ebx), ecx(ecx), edx(edx) { } 15 | }; 16 | 17 | int main(int argc, char *argv[]) { 18 | std::array buffer; 19 | std::vector flags; 20 | 21 | __cpuid(buffer.data(), 0); 22 | int nIDs = buffer[0]; 23 | 24 | for (int i = 0; i <= nIDs; ++i) { 25 | __cpuidex(buffer.data(), i, 0); 26 | 27 | flags.emplace_back(buffer[0], buffer[1], buffer[2], buffer[3]); 28 | } 29 | 30 | if (flags[7].ebx[5]) 31 | std::cout << "/arch:AVX2"; 32 | else if (flags[1].ecx[28]) 33 | std::cout << "/arch:AVX"; 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /resources/check-style.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Script to check include/test code for common pybind11 code style errors. 4 | # 5 | # This script currently checks for 6 | # 7 | # 1. use of tabs instead of spaces 8 | # 2. MSDOS-style CRLF endings 9 | # 3. trailing spaces 10 | # 4. missing space between keyword and parenthesis, e.g.: for(, if(, while( 11 | # 5. Missing space between right parenthesis and brace, e.g. 'for (...){' 12 | # 6. opening brace on its own line. It should always be on the same line as the 13 | # if/while/for/do statment. 14 | # 7. Leftover markers denoting incomplete implementations/tasks 15 | # 16 | # Invoke as: tools/check-style.sh 17 | # 18 | 19 | errors=0 20 | IFS=$'\n' 21 | found= 22 | # The mt=41 sets a red background for matched tabs: 23 | GREP_COLORS='mt=41' GREP_COLOR='41' grep $'\t' include/ src/ tests/*.{h,cpp} docs/*.rst -rn --color=always | 24 | while read f; do 25 | if [ -z "$found" ]; then 26 | echo -e '\033[31m\033[01mError: found tabs instead of spaces in the following files:\033[0m' 27 | found=1 28 | errors=1 29 | fi 30 | 31 | echo " $f" 32 | done 33 | 34 | found= 35 | # The mt=41 sets a red background for matched MS-DOS CRLF characters 36 | GREP_COLORS='mt=41' GREP_COLOR='41' grep -IUlr $'\r' include/ src/ tests/*.{h,cpp} docs/*.rst --color=always | 37 | while read f; do 38 | if [ -z "$found" ]; then 39 | echo -e '\033[31m\033[01mError: found CRLF characters in the following files:\033[0m' 40 | found=1 41 | errors=1 42 | fi 43 | 44 | echo " $f" 45 | done 46 | 47 | found= 48 | # The mt=41 sets a red background for matched trailing spaces 49 | GREP_COLORS='mt=41' GREP_COLOR='41' grep '[[:blank:]]\+$' include/ src/ tests/*.{h,cpp} docs/*.rst -rn --color=always | 50 | while read f; do 51 | if [ -z "$found" ]; then 52 | echo -e '\033[31m\033[01mError: found trailing spaces in the following files:\033[0m' 53 | found=1 54 | errors=1 55 | fi 56 | 57 | echo " $f" 58 | done 59 | 60 | found= 61 | grep '\<\(if\|for\|while\|catch\)(\|){' include/ src/ tests/*.{h,cpp} -rn --color=always | 62 | while read f; do 63 | if [ -z "$found" ]; then 64 | echo -e '\033[31m\033[01mError: found the following coding style problems:\033[0m' 65 | found=1 66 | errors=1 67 | fi 68 | 69 | echo " $f" 70 | done 71 | 72 | found= 73 | GREP_COLORS='mt=41' GREP_COLOR='41' grep '^\s*{\s*$' include/ src/ tests/*.{h,cpp} -rn --color=always | 74 | while read f; do 75 | if [ -z "$found" ]; then 76 | echo -e '\033[31m\033[01mError: braces should occur on the same line as the if/while/.. statement. Found issues in the following files: \033[0m' 77 | found=1 78 | errors=1 79 | fi 80 | 81 | echo " $f" 82 | done 83 | 84 | found= 85 | GREP_COLORS='mt=41' GREP_COLOR='41' grep '\<\(TODO\|XXX\)' include/ src/ tests/*.{h,cpp} -rn --color=always | 86 | while read f; do 87 | if [ -z "$found" ]; then 88 | echo -e '\033[31m\033[01mError: Incomplete implementation markers in code. Found issues in the following files: \033[0m' 89 | found=1 90 | errors=1 91 | fi 92 | 93 | echo " $f" 94 | done 95 | 96 | exit $errors 97 | -------------------------------------------------------------------------------- /resources/enoki_gdb.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # GDB Script to improve introspection of array types when debugging software 3 | # using Enoki. Copy this file to "~/.gdb" (creating the directory, if not 4 | # present) and then apppend the following line to the file "~/.gdbinit" 5 | # (again, creating it if, not already present): 6 | ############################################################################### 7 | # set print pretty 8 | # source ~/.gdb/enoki_gdb.py 9 | ############################################################################### 10 | 11 | import gdb 12 | 13 | simple_types = { 14 | 'bool', 15 | 'char', 'unsigned char', 16 | 'short', 'unsigned short', 17 | 'int', 'unsigned int', 18 | 'long', 'unsigned long', 19 | 'long long', 'unsigned long long', 20 | 'float', 'double' 21 | } 22 | 23 | 24 | class EnokiIterator: 25 | def __init__(self, instance, size): 26 | self.instance = instance 27 | self.size = size 28 | self.index = 0 29 | 30 | def __iter__(self): 31 | return self 32 | 33 | def __next__(self): 34 | if self.index >= self.size: 35 | raise StopIteration 36 | result = ('[%i]' % self.index, self.instance[self.index]) 37 | self.index += 1 38 | return result 39 | 40 | def next(self): 41 | return self.__next__() 42 | 43 | 44 | class EnokiStaticArrayPrinter: 45 | def __init__(self, instance): 46 | self.instance = instance 47 | itype = self.instance.type.strip_typedefs() 48 | 49 | # Extract derived type 50 | if 'StaticArrayImpl' in str(itype): 51 | itype = itype.template_argument(3) 52 | 53 | try: 54 | data = self.instance['m_data']['_M_elems'] 55 | self.data_type = data.type.strip_typedefs().target() 56 | except Exception: 57 | self.data_type = itype.template_argument(0) 58 | 59 | # Determine the size and data type 60 | self.size = int(str(itype.template_argument(1))) 61 | self.is_simple = str(self.data_type) in simple_types 62 | self.type_size = self.data_type.sizeof 63 | self.is_mask = 'Mask' in str(itype) 64 | 65 | try: 66 | _ = instance['k'] 67 | self.kmask = True 68 | except Exception: 69 | self.kmask = False 70 | 71 | def entry(self, i): 72 | if i < 0 or i >= self.size: 73 | return None 74 | addr = int(self.instance.address) + self.type_size * i 75 | cmd = '*((%s *) 0x%x)' % (str(self.data_type), addr) 76 | return str(gdb.parse_and_eval(cmd)) 77 | 78 | def children(self): 79 | if self.is_simple: 80 | return [] 81 | else: 82 | return EnokiIterator(self.instance['m_data']['_M_elems'], self.size) 83 | 84 | def to_string(self): 85 | if self.is_simple: 86 | if not self.is_mask: 87 | result = [self.entry(i) for i in range(self.size)] 88 | else: 89 | if self.kmask: 90 | # AVX512 mask register 91 | result = list(reversed(format(int(self.instance['k']), '0%ib' % self.size))) 92 | else: 93 | result = [None] * self.size 94 | for i in range(self.size): 95 | value = self.entry(i) 96 | result[i] = '0' if (value == '0' or value == 'false') else '1' 97 | return '[' + ', '.join(result) + ']' 98 | else: 99 | return '' 100 | 101 | 102 | class EnokiDynamicArrayPrinter: 103 | def __init__(self, instance): 104 | self.instance = instance 105 | itype = self.instance.type.strip_typedefs() 106 | self.size = int(str(self.instance['m_size'])) 107 | self.packet_count = int(str(self.instance['m_packets_allocated'])) 108 | self.packet_type = itype.template_argument(0) 109 | self.packet_size = self.packet_type.sizeof 110 | self.data = int(str(instance['m_packets']['_M_t']['_M_t']['_M_head_impl']), 0) 111 | self.limit = 20 112 | 113 | def to_string(self): 114 | values = [] 115 | for i in range(self.packet_count): 116 | addr = int(self.data) + self.packet_size * i 117 | cmd = '*((%s *) 0x%x)' % (str(self.packet_type), addr) 118 | value = str(gdb.parse_and_eval(cmd)) 119 | assert value[-1] == ']' 120 | values += value[value.rfind('[')+1:-1].split(', ') 121 | if len(values) > self.size: 122 | values = values[0:self.size] 123 | break 124 | if len(values) > self.limit: 125 | break 126 | if len(values) > self.limit: 127 | values = values[0:self.limit] 128 | values.append(".. %i skipped .." % (self.size - self.limit)) 129 | return '[' + ', '.join(values) + ']' 130 | 131 | # Static Enoki arrays 132 | regexp_1 = r'(enoki::)?(Array|Packet|Complex|Matrix|' \ 133 | 'Quaternion|StaticArrayImpl)(Mask)?<.+>' 134 | 135 | # Mitsuba 2 is one of the main users of Enoki. For convenience, also 136 | # declare its custom array types here 137 | regexp_2 = r'(mitsuba::)?(Vector|Point|Normal|Spectrum|Color)<.+>' 138 | 139 | regexp_combined = r'^(%s)|(%s)$' % (regexp_1, regexp_2) 140 | 141 | p = gdb.printing.RegexpCollectionPrettyPrinter("enoki") 142 | p.add_printer("static", regexp_combined, EnokiStaticArrayPrinter) 143 | p.add_printer("dynamic", r'^(enoki::)?DynamicArray(Impl)?<.+>$', EnokiDynamicArrayPrinter) 144 | 145 | o = gdb.current_objfile() 146 | gdb.printing.register_pretty_printer(o, p) 147 | -------------------------------------------------------------------------------- /src/cuda/common.cuh: -------------------------------------------------------------------------------- 1 | #if !defined(NAMESPACE_BEGIN) 2 | # define NAMESPACE_BEGIN(name) namespace name { 3 | #endif 4 | 5 | #if !defined(NAMESPACE_END) 6 | # define NAMESPACE_END(name) } 7 | #endif 8 | # 9 | #if defined(_MSC_VER) 10 | # define ENOKI_EXPORT __declspec(dllexport) 11 | # define ENOKI_LIKELY(x) x 12 | # define ENOKI_UNLIKELY(x) x 13 | #else 14 | # define ENOKI_EXPORT __attribute__ ((visibility("default"))) 15 | # define ENOKI_LIKELY(x) __builtin_expect(!!(x), 1) 16 | # define ENOKI_UNLIKELY(x) __builtin_expect(!!(x), 0) 17 | #endif 18 | 19 | #if defined(__SSE4_2__) 20 | # include 21 | #endif 22 | 23 | NAMESPACE_BEGIN(enoki) 24 | 25 | enum EnokiType { Invalid = 0, Int8, UInt8, Int16, UInt16, 26 | Int32, UInt32, Int64, UInt64, Float16, 27 | Float32, Float64, Bool, Pointer }; 28 | 29 | #define cuda_check(err) cuda_check_impl(err, __FILE__, __LINE__) 30 | ENOKI_EXPORT extern void cuda_check_impl(CUresult errval, const char *file, const int line); 31 | ENOKI_EXPORT extern void cuda_check_impl(cudaError_t errval, const char *file, const int line); 32 | 33 | inline uint32_t next_power_of_two(uint32_t n) { 34 | n--; 35 | n |= n >> 1; 36 | n |= n >> 2; 37 | n |= n >> 4; 38 | n |= n >> 8; 39 | n |= n >> 16; 40 | n++; 41 | return n; 42 | } 43 | 44 | ENOKI_EXPORT extern void* cuda_malloc(size_t size); 45 | ENOKI_EXPORT extern void* cuda_managed_malloc(size_t size); 46 | ENOKI_EXPORT extern void* cuda_host_malloc(size_t size); 47 | ENOKI_EXPORT extern void cuda_free(void *p, cudaStream_t stream); 48 | ENOKI_EXPORT extern void cuda_free(void *p); 49 | ENOKI_EXPORT extern void cuda_host_free(void *p, cudaStream_t stream); 50 | ENOKI_EXPORT extern void cuda_host_free(void *p); 51 | ENOKI_EXPORT extern void cuda_malloc_trim(); 52 | ENOKI_EXPORT extern void cuda_sync(); 53 | ENOKI_EXPORT void cuda_eval(bool log_assembly = false); 54 | 55 | extern std::string mem_string(size_t size); 56 | extern std::string time_string(size_t size); 57 | 58 | struct StringHasher { 59 | size_t operator()(const std::string& k) const { 60 | #if defined(__SSE4_2__) 61 | const char *ptr = k.c_str(), 62 | *end = ptr + k.length(); 63 | 64 | uint64_t state64 = 0; 65 | while (ptr + 8 < end) { 66 | state64 = _mm_crc32_u64(state64, *((uint64_t *) ptr)); 67 | ptr += 8; 68 | } 69 | uint32_t state32 = (uint32_t) state64; 70 | while (ptr < end) 71 | state32 = _mm_crc32_u8(state32, *ptr++); 72 | return (size_t) state32; 73 | #else 74 | return std::hash()(k); 75 | #endif 76 | } 77 | }; 78 | 79 | #define cuda_check_maybe_redo(expr) \ 80 | for (int i = 0; i < 2; ++i) { \ 81 | cudaError_t rv = expr; \ 82 | if (rv == cudaErrorMemoryAllocation && i == 0) { \ 83 | cuda_malloc_trim(); \ 84 | } else { \ 85 | cuda_check(rv); \ 86 | break; \ 87 | } \ 88 | } \ 89 | 90 | 91 | NAMESPACE_END(enoki) 92 | -------------------------------------------------------------------------------- /src/python/complex.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | #include 4 | 5 | template 6 | py::class_ bind_complex(py::module &m, py::module &s, const char *name) { 7 | using Value = value_t; 8 | using Scalar = scalar_t; 9 | using Mask = mask_t; 10 | 11 | auto cls = py::class_(s, name) 12 | .def(py::init<>()) 13 | .def(py::init()); 14 | if constexpr (!std::is_same_v) 15 | cls.def(py::init()); 16 | cls.def(py::init(), "real"_a, "imag"_a) 17 | .def(py::self == py::self) 18 | .def(py::self != py::self) 19 | .def(py::self - py::self) 20 | .def(py::self + py::self) 21 | .def(py::self * py::self) 22 | .def(py::self / py::self) 23 | .def(-py::self) 24 | .def("__repr__", [](const Complex &a) -> std::string { 25 | if (*implicit_conversion) 26 | return ""; 27 | std::ostringstream oss; 28 | oss << a; 29 | return oss.str(); 30 | }) 31 | .def("__getitem__", [](const Complex &a, size_t index) { 32 | if (index >= 2) 33 | throw py::index_error(); 34 | return a.coeff(index); 35 | }) 36 | .def("__setitem__", [](Complex &a, size_t index, const Value &value) { 37 | if (index >= 2) 38 | throw py::index_error(); 39 | a.coeff(index) = value; 40 | }) 41 | .def_static("identity", [](size_t size) { return identity(size); }, "size"_a = 1) 42 | .def_static("zero", [](size_t size) { return zero(size); }, "size"_a = 1) 43 | .def_static("full", [](Scalar value, size_t size) { return full(value, size); }, 44 | "value"_a, "size"_a = 1); 45 | 46 | m.def("real", [](const Complex &z) { return real(z); }); 47 | m.def("imag", [](const Complex &z) { return imag(z); }); 48 | m.def("norm", [](const Complex &z) { return norm(z); }); 49 | m.def("squared_norm", [](const Complex &z) { return squared_norm(z); }); 50 | m.def("rcp", [](const Complex &z) { return rcp(z); }); 51 | m.def("conj", [](const Complex &z) { return conj(z); }); 52 | m.def("exp", [](const Complex &z) { return exp(z); }); 53 | m.def("log", [](const Complex &z) { return log(z); }); 54 | m.def("arg", [](const Complex &z) { return arg(z); }); 55 | m.def("pow", [](const Complex &z1, const Complex &z2) { return pow(z1, z2); }); 56 | m.def("sqrt", [](const Complex &z) { return sqrt(z); }); 57 | m.def("sin", [](const Complex &z) { return sin(z); }); 58 | m.def("cos", [](const Complex &z) { return cos(z); }); 59 | m.def("sincos", [](const Complex &z) { return sincos(z); }); 60 | m.def("tan", [](const Complex &z) { return tan(z); }); 61 | m.def("asin", [](const Complex &z) { return asin(z); }); 62 | m.def("acos", [](const Complex &z) { return acos(z); }); 63 | m.def("atan", [](const Complex &z) { return atan(z); }); 64 | m.def("sinh", [](const Complex &z) { return sinh(z); }); 65 | m.def("cosh", [](const Complex &z) { return cosh(z); }); 66 | m.def("sincosh", [](const Complex &z) { return sincosh(z); }); 67 | m.def("tanh", [](const Complex &z) { return tanh(z); }); 68 | m.def("asinh", [](const Complex &z) { return asinh(z); }); 69 | m.def("acosh", [](const Complex &z) { return acosh(z); }); 70 | m.def("atanh", [](const Complex &z) { return atanh(z); }); 71 | 72 | m.def("isfinite", [](const Complex &z) -> Mask { return enoki::isfinite(z); }); 73 | m.def("isnan", [](const Complex &z) -> Mask { return enoki::isnan(z); }); 74 | m.def("isinf", [](const Complex &z) -> Mask { return enoki::isinf(z); }); 75 | 76 | if constexpr (is_diff_array_v) { 77 | using Detached = expr_t()))>; 78 | 79 | m.def("detach", [](const Complex &a) -> Detached { return detach(a); }); 80 | m.def("requires_gradient", 81 | [](const Complex &a) { return requires_gradient(a); }, 82 | "array"_a); 83 | 84 | m.def("set_requires_gradient", 85 | [](Complex &a, bool value) { set_requires_gradient(a, value); }, 86 | "array"_a, "value"_a = true); 87 | 88 | m.def("gradient", [](Complex &a) { return eval(gradient(a)); }); 89 | m.def("set_gradient", 90 | [](Complex &a, const Detached &g, bool b) { set_gradient(a, g, b); }, 91 | "array"_a, "gradient"_a, "backward"_a = true); 92 | 93 | m.def("graphviz", [](const Complex &a) { return graphviz(a); }); 94 | 95 | m.def("set_label", [](const Complex &a, const char *label) { 96 | set_label(a, label); 97 | }); 98 | } 99 | 100 | implicitly_convertible(); 101 | if constexpr (!std::is_same_v) 102 | implicitly_convertible(); 103 | 104 | return cls; 105 | } 106 | -------------------------------------------------------------------------------- /src/python/cuda.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | extern void bind_cuda_0d(py::module&, py::module&); 4 | extern void bind_cuda_1d(py::module&, py::module&); 5 | extern void bind_cuda_2d(py::module&, py::module&); 6 | extern void bind_cuda_3d(py::module&, py::module&); 7 | extern void bind_cuda_4d(py::module&, py::module&); 8 | extern void bind_cuda_complex(py::module&, py::module&); 9 | extern void bind_cuda_matrix(py::module&, py::module&); 10 | extern void bind_cuda_pcg32(py::module&, py::module&); 11 | 12 | bool *implicit_conversion = nullptr; 13 | 14 | PYBIND11_MODULE(cuda, s) { 15 | py::module m = py::module::import("enoki"); 16 | py::module::import("enoki.scalar"); 17 | 18 | implicit_conversion = (bool *) py::get_shared_data("implicit_conversion"); 19 | 20 | py::class_>(m, "GPUBuffer"); 21 | 22 | cuda_sync(); 23 | bind_cuda_1d(m, s); 24 | bind_cuda_0d(m, s); // after FloatC 25 | bind_cuda_2d(m, s); 26 | bind_cuda_3d(m, s); 27 | bind_cuda_4d(m, s); 28 | bind_cuda_complex(m, s); 29 | bind_cuda_matrix(m, s); 30 | bind_cuda_pcg32(m, s); 31 | 32 | m.def("cuda_eval", &cuda_eval, "log_assembly"_a = false, 33 | py::call_guard()); 34 | 35 | m.def("cuda_sync", &cuda_sync, 36 | py::call_guard()); 37 | 38 | m.def("cuda_malloc_trim", &cuda_malloc_trim); 39 | 40 | m.def("cuda_whos", []() { char *w = cuda_whos(); py::print(w); free(w); }); 41 | 42 | m.def("cuda_mem_get_info", []() { 43 | size_t free = 0, total = 0; 44 | cuda_mem_get_info(&free, &total); 45 | return std::make_pair(free, total); 46 | }); 47 | 48 | m.def("cuda_set_log_level", &cuda_set_log_level, 49 | "Sets the current log level (0: none, 1: kernel launches, 2: +ptxas " 50 | "statistics, 3: +ptx source, 4: +jit trace, 5: +ref counting)"); 51 | 52 | m.def("cuda_log_level", &cuda_log_level); 53 | } 54 | -------------------------------------------------------------------------------- /src/python/cuda_0d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_cuda_0d(py::module& m, py::module& s) { 4 | auto vector0m_class = bind(m, s, "Vector0m"); 5 | auto vector0i_class = bind(m, s, "Vector0i"); 6 | auto vector0u_class = bind(m, s, "Vector0u"); 7 | auto vector0f_class = bind(m, s, "Vector0f"); 8 | auto vector0d_class = bind(m, s, "Vector0d"); 9 | 10 | vector0f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector0d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()); 21 | 22 | vector0i_class 23 | .def(py::init()) 24 | .def(py::init()) 25 | .def(py::init()) 26 | .def(py::init()); 27 | 28 | vector0u_class 29 | .def(py::init()) 30 | .def(py::init()) 31 | .def(py::init()) 32 | .def(py::init()); 33 | } 34 | -------------------------------------------------------------------------------- /src/python/cuda_1d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | 4 | void bind_cuda_1d(py::module& m, py::module& s) { 5 | auto mask_class = bind(m, s, "Mask"); 6 | auto uint32_class = bind(m, s, "UInt32"); 7 | auto uint64_class = bind(m, s, "UInt64"); 8 | auto int32_class = bind(m, s, "Int32"); 9 | auto int64_class = bind(m, s, "Int64"); 10 | auto float32_class = bind(m, s, "Float32"); 11 | auto float64_class = bind(m, s, "Float64"); 12 | 13 | float32_class 14 | .def(py::init()) 15 | .def(py::init()) 16 | .def(py::init()) 17 | .def(py::init()) 18 | .def(py::init()); 19 | 20 | float64_class 21 | .def(py::init()) 22 | .def(py::init()) 23 | .def(py::init()) 24 | .def(py::init()) 25 | .def(py::init()); 26 | 27 | int32_class 28 | .def(py::init()) 29 | .def(py::init()) 30 | .def(py::init()) 31 | .def(py::init()) 32 | .def(py::init()); 33 | 34 | int64_class 35 | .def(py::init()) 36 | .def(py::init()) 37 | .def(py::init()) 38 | .def(py::init()) 39 | .def(py::init()); 40 | 41 | uint32_class 42 | .def(py::init()) 43 | .def(py::init()) 44 | .def(py::init()) 45 | .def(py::init()) 46 | .def(py::init()); 47 | 48 | uint64_class 49 | .def(py::init()) 50 | .def(py::init()) 51 | .def(py::init()) 52 | .def(py::init()) 53 | .def(py::init()); 54 | 55 | auto vector1m_class = bind(m, s, "Vector1m"); 56 | auto vector1i_class = bind(m, s, "Vector1i"); 57 | auto vector1u_class = bind(m, s, "Vector1u"); 58 | auto vector1f_class = bind(m, s, "Vector1f"); 59 | auto vector1d_class = bind(m, s, "Vector1d"); 60 | 61 | vector1f_class 62 | .def(py::init()) 63 | .def(py::init()) 64 | .def(py::init()) 65 | .def(py::init()); 66 | 67 | vector1d_class 68 | .def(py::init()) 69 | .def(py::init()) 70 | .def(py::init()) 71 | .def(py::init()); 72 | 73 | vector1i_class 74 | .def(py::init()) 75 | .def(py::init()) 76 | .def(py::init()) 77 | .def(py::init()); 78 | 79 | vector1u_class 80 | .def(py::init()) 81 | .def(py::init()) 82 | .def(py::init()) 83 | .def(py::init()); 84 | 85 | m.def( 86 | "binary_search", 87 | [](uint32_t start, 88 | uint32_t end, 89 | const std::function &pred) { 90 | return enoki::binary_search(start, end, pred); 91 | }, 92 | "start"_a, "end"_a, "pred"_a); 93 | 94 | m.def("meshgrid", [](const Float32C &x, const Float32C &y) { 95 | auto result = meshgrid(x, y); 96 | return std::make_pair(std::move(result.x()), std::move(result.y())); 97 | }); 98 | 99 | m.def("meshgrid", [](const Float64C &x, const Float64C &y) { 100 | auto result = meshgrid(x, y); 101 | return std::make_pair(std::move(result.x()), std::move(result.y())); 102 | }); 103 | 104 | m.def("partition", [](const UInt64C &x) { 105 | return partition(x); 106 | }); 107 | } 108 | -------------------------------------------------------------------------------- /src/python/cuda_2d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_cuda_2d(py::module& m, py::module& s) { 4 | auto vector2m_class = bind(m, s, "Vector2m"); 5 | auto vector2i_class = bind(m, s, "Vector2i"); 6 | auto vector2u_class = bind(m, s, "Vector2u"); 7 | auto vector2f_class = bind(m, s, "Vector2f"); 8 | auto vector2d_class = bind(m, s, "Vector2d"); 9 | 10 | vector2f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector2d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()); 21 | 22 | vector2i_class 23 | .def(py::init()) 24 | .def(py::init()) 25 | .def(py::init()) 26 | .def(py::init()); 27 | 28 | vector2u_class 29 | .def(py::init()) 30 | .def(py::init()) 31 | .def(py::init()) 32 | .def(py::init()); 33 | } 34 | -------------------------------------------------------------------------------- /src/python/cuda_3d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_cuda_3d(py::module& m, py::module& s) { 4 | auto vector3m_class = bind(m, s, "Vector3m"); 5 | auto vector3i_class = bind(m, s, "Vector3i"); 6 | auto vector3u_class = bind(m, s, "Vector3u"); 7 | auto vector3f_class = bind(m, s, "Vector3f"); 8 | auto vector3d_class = bind(m, s, "Vector3d"); 9 | 10 | vector3f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector3d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()); 21 | 22 | vector3i_class 23 | .def(py::init()) 24 | .def(py::init()) 25 | .def(py::init()) 26 | .def(py::init()); 27 | 28 | vector3u_class 29 | .def(py::init()) 30 | .def(py::init()) 31 | .def(py::init()) 32 | .def(py::init()); 33 | } 34 | -------------------------------------------------------------------------------- /src/python/cuda_4d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_cuda_4d(py::module& m, py::module& s) { 4 | auto vector4m_class = bind(m, s, "Vector4m"); 5 | auto vector4i_class = bind(m, s, "Vector4i"); 6 | auto vector4u_class = bind(m, s, "Vector4u"); 7 | auto vector4f_class = bind(m, s, "Vector4f"); 8 | auto vector4d_class = bind(m, s, "Vector4d"); 9 | 10 | vector4f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector4d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()); 21 | 22 | vector4i_class 23 | .def(py::init()) 24 | .def(py::init()) 25 | .def(py::init()) 26 | .def(py::init()); 27 | 28 | vector4u_class 29 | .def(py::init()) 30 | .def(py::init()) 31 | .def(py::init()) 32 | .def(py::init()); 33 | } 34 | -------------------------------------------------------------------------------- /src/python/cuda_autodiff.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | extern void bind_cuda_autodiff_0d(py::module&, py::module&); 4 | extern void bind_cuda_autodiff_1d(py::module&, py::module&); 5 | extern void bind_cuda_autodiff_2d(py::module&, py::module&); 6 | extern void bind_cuda_autodiff_3d(py::module&, py::module&); 7 | extern void bind_cuda_autodiff_4d(py::module&, py::module&); 8 | extern void bind_cuda_autodiff_complex(py::module&, py::module&); 9 | extern void bind_cuda_autodiff_matrix(py::module&, py::module&); 10 | 11 | bool *implicit_conversion = nullptr; 12 | 13 | PYBIND11_MODULE(cuda_autodiff, s) { 14 | py::module m = py::module::import("enoki"); 15 | py::module::import("enoki.cuda"); 16 | 17 | implicit_conversion = (bool *) py::get_shared_data("implicit_conversion"); 18 | 19 | bind_cuda_autodiff_1d(m, s); 20 | bind_cuda_autodiff_0d(m, s); // after FloatD 21 | bind_cuda_autodiff_2d(m, s); 22 | bind_cuda_autodiff_3d(m, s); 23 | bind_cuda_autodiff_4d(m, s); 24 | bind_cuda_autodiff_complex(m, s); 25 | bind_cuda_autodiff_matrix(m, s); 26 | 27 | m.def("set_requires_gradient", 28 | [](py::object o, bool value) { 29 | throw py::type_error("set_requires_gradient(): requires a differentiable type as input!"); 30 | }, "array"_a, "value"_a = true); 31 | } 32 | -------------------------------------------------------------------------------- /src/python/cuda_autodiff_0d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_cuda_autodiff_0d(py::module& m, py::module& s) { 4 | auto vector0m_class = bind(m, s, "Vector0m"); 5 | auto vector0i_class = bind(m, s, "Vector0i"); 6 | auto vector0u_class = bind(m, s, "Vector0u"); 7 | auto vector0f_class = bind(m, s, "Vector0f"); 8 | auto vector0d_class = bind(m, s, "Vector0d"); 9 | 10 | vector0f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()) 15 | .def(py::init()); 16 | 17 | vector0d_class 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()) 21 | .def(py::init()) 22 | .def(py::init()); 23 | 24 | vector0i_class 25 | .def(py::init()) 26 | .def(py::init()) 27 | .def(py::init()) 28 | .def(py::init()) 29 | .def(py::init()); 30 | 31 | vector0u_class 32 | .def(py::init()) 33 | .def(py::init()) 34 | .def(py::init()) 35 | .def(py::init()) 36 | .def(py::init()); 37 | } 38 | -------------------------------------------------------------------------------- /src/python/cuda_autodiff_1d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | 4 | void bind_cuda_autodiff_1d(py::module& m, py::module& s) { 5 | auto mask_class = bind>(m, s, "Mask"); 6 | auto uint32_class = bind(m, s, "UInt32"); 7 | auto uint64_class = bind(m, s, "UInt64"); 8 | auto int32_class = bind(m, s, "Int32"); 9 | auto int64_class = bind(m, s, "Int64"); 10 | auto float32_class = bind(m, s, "Float32"); 11 | auto float64_class = bind(m, s, "Float64"); 12 | 13 | mask_class 14 | .def(py::init()); 15 | 16 | float32_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()) 21 | .def(py::init()) 22 | .def(py::init()) 23 | .def("set_graph_simplification", [](bool value) { Float32D::set_graph_simplification_(value); }) 24 | .def("whos", []() { py::print(Float32D::whos_()); }) 25 | .def_static("set_log_level", 26 | [](int log_level) { Float32D::set_log_level_(log_level); }, 27 | "Sets the current log level (0 == none, 1 == minimal, 2 == moderate, 3 == high, 4 == everything)") 28 | .def_static("log_level", []() { return Float32D::log_level_(); }) 29 | .def_static("simplify_graph", []() { Float32D::simplify_graph_(); }) 30 | .def_static("backward", 31 | [](bool free_graph) { backward(free_graph); }, 32 | "free_graph"_a = true) 33 | .def_static("forward", 34 | [](bool free_graph) { forward(free_graph); }, 35 | "free_graph"_a = true); 36 | 37 | float64_class 38 | .def(py::init()) 39 | .def(py::init()) 40 | .def(py::init()) 41 | .def(py::init()) 42 | .def(py::init()) 43 | .def(py::init()) 44 | .def("set_graph_simplification", [](bool value) { Float64D::set_graph_simplification_(value); }) 45 | .def("whos", []() { py::print(Float64D::whos_()); }) 46 | .def_static("set_log_level", 47 | [](int log_level) { Float64D::set_log_level_(log_level); }, 48 | "Sets the current log level (0 == none, 1 == minimal, 2 == moderate, 3 == high, 4 == everything)") 49 | .def_static("log_level", []() { return Float64D::log_level_(); }) 50 | .def_static("simplify_graph", []() { Float64D::simplify_graph_(); }) 51 | .def_static("backward", 52 | [](bool free_graph) { backward(free_graph); }, 53 | "free_graph"_a = true) 54 | .def_static("forward", 55 | [](bool free_graph) { forward(free_graph); }, 56 | "free_graph"_a = true); 57 | 58 | int32_class 59 | .def(py::init()) 60 | .def(py::init()) 61 | .def(py::init()) 62 | .def(py::init()) 63 | .def(py::init()) 64 | .def(py::init()); 65 | 66 | int64_class 67 | .def(py::init()) 68 | .def(py::init()) 69 | .def(py::init()) 70 | .def(py::init()) 71 | .def(py::init()) 72 | .def(py::init()); 73 | 74 | uint32_class 75 | .def(py::init()) 76 | .def(py::init()) 77 | .def(py::init()) 78 | .def(py::init()) 79 | .def(py::init()) 80 | .def(py::init()); 81 | 82 | uint64_class 83 | .def(py::init()) 84 | .def(py::init()) 85 | .def(py::init()) 86 | .def(py::init()) 87 | .def(py::init()) 88 | .def(py::init()); 89 | 90 | auto vector1m_class = bind(m, s, "Vector1m"); 91 | auto vector1i_class = bind(m, s, "Vector1i"); 92 | auto vector1u_class = bind(m, s, "Vector1u"); 93 | auto vector1f_class = bind(m, s, "Vector1f"); 94 | auto vector1d_class = bind(m, s, "Vector1d"); 95 | 96 | vector1f_class 97 | .def(py::init()) 98 | .def(py::init()) 99 | .def(py::init()) 100 | .def(py::init()) 101 | .def(py::init()); 102 | 103 | vector1d_class 104 | .def(py::init()) 105 | .def(py::init()) 106 | .def(py::init()) 107 | .def(py::init()) 108 | .def(py::init()); 109 | 110 | vector1i_class 111 | .def(py::init()) 112 | .def(py::init()) 113 | .def(py::init()) 114 | .def(py::init()) 115 | .def(py::init()); 116 | 117 | vector1u_class 118 | .def(py::init()) 119 | .def(py::init()) 120 | .def(py::init()) 121 | .def(py::init()) 122 | .def(py::init()); 123 | 124 | m.def( 125 | "binary_search", 126 | [](uint32_t start, 127 | uint32_t end, 128 | const std::function &pred) { 129 | return enoki::binary_search(start, end, pred); 130 | }, 131 | "start"_a, "end"_a, "pred"_a); 132 | 133 | m.def("meshgrid", [](const Float32D &x, const Float32D &y) { 134 | auto result = meshgrid(x, y); 135 | return std::make_pair(std::move(result.x()), std::move(result.y())); 136 | }); 137 | 138 | m.def("meshgrid", [](const Float64D &x, const Float64D &y) { 139 | auto result = meshgrid(x, y); 140 | return std::make_pair(std::move(result.x()), std::move(result.y())); 141 | }); 142 | 143 | struct Scope { 144 | Scope(const std::string &name) : name(name) { } 145 | 146 | void enter() { Float32D::push_prefix_(name.c_str()); } 147 | void exit(py::handle, py::handle, py::handle) { Float32D::pop_prefix_(); } 148 | 149 | std::string name; 150 | }; 151 | 152 | py::class_(float32_class, "Scope") 153 | .def(py::init()) 154 | .def("__enter__", &Scope::enter) 155 | .def("__exit__", &Scope::exit); 156 | } 157 | -------------------------------------------------------------------------------- /src/python/cuda_autodiff_2d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_cuda_autodiff_2d(py::module& m, py::module& s) { 4 | auto vector2m_class = bind(m, s, "Vector2m"); 5 | auto vector2i_class = bind(m, s, "Vector2i"); 6 | auto vector2u_class = bind(m, s, "Vector2u"); 7 | auto vector2f_class = bind(m, s, "Vector2f"); 8 | auto vector2d_class = bind(m, s, "Vector2d"); 9 | 10 | vector2f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()) 15 | .def(py::init()); 16 | 17 | vector2d_class 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()) 21 | .def(py::init()) 22 | .def(py::init()); 23 | 24 | vector2i_class 25 | .def(py::init()) 26 | .def(py::init()) 27 | .def(py::init()) 28 | .def(py::init()) 29 | .def(py::init()); 30 | 31 | vector2u_class 32 | .def(py::init()) 33 | .def(py::init()) 34 | .def(py::init()) 35 | .def(py::init()) 36 | .def(py::init()); 37 | } 38 | -------------------------------------------------------------------------------- /src/python/cuda_autodiff_3d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_cuda_autodiff_3d(py::module& m, py::module& s) { 4 | auto vector3m_class = bind(m, s, "Vector3m"); 5 | auto vector3i_class = bind(m, s, "Vector3i"); 6 | auto vector3u_class = bind(m, s, "Vector3u"); 7 | auto vector3f_class = bind(m, s, "Vector3f"); 8 | auto vector3d_class = bind(m, s, "Vector3d"); 9 | 10 | vector3f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()) 15 | .def(py::init()); 16 | 17 | vector3d_class 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()) 21 | .def(py::init()) 22 | .def(py::init()); 23 | 24 | vector3i_class 25 | .def(py::init()) 26 | .def(py::init()) 27 | .def(py::init()) 28 | .def(py::init()) 29 | .def(py::init()); 30 | 31 | vector3u_class 32 | .def(py::init()) 33 | .def(py::init()) 34 | .def(py::init()) 35 | .def(py::init()) 36 | .def(py::init()); 37 | } 38 | -------------------------------------------------------------------------------- /src/python/cuda_autodiff_4d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_cuda_autodiff_4d(py::module& m, py::module& s) { 4 | auto vector4m_class = bind(m, s, "Vector4m"); 5 | auto vector4i_class = bind(m, s, "Vector4i"); 6 | auto vector4u_class = bind(m, s, "Vector4u"); 7 | auto vector4f_class = bind(m, s, "Vector4f"); 8 | auto vector4d_class = bind(m, s, "Vector4d"); 9 | 10 | vector4f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()) 15 | .def(py::init()); 16 | 17 | vector4d_class 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()) 21 | .def(py::init()) 22 | .def(py::init()); 23 | 24 | vector4i_class 25 | .def(py::init()) 26 | .def(py::init()) 27 | .def(py::init()) 28 | .def(py::init()) 29 | .def(py::init()); 30 | 31 | vector4u_class 32 | .def(py::init()) 33 | .def(py::init()) 34 | .def(py::init()) 35 | .def(py::init()) 36 | .def(py::init()); 37 | } 38 | -------------------------------------------------------------------------------- /src/python/cuda_autodiff_complex.cpp: -------------------------------------------------------------------------------- 1 | #include "complex.h" 2 | 3 | void bind_cuda_autodiff_complex(py::module& m, py::module& s) { 4 | bind_complex(m, s, "Complex2f") 5 | .def(py::init()) 6 | .def(py::init()); 7 | 8 | bind_complex(m, s, "Complex24f") 9 | .def(py::init()) 10 | .def(py::init()); 11 | 12 | bind_complex(m, s, "Complex2d") 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | bind_complex(m, s, "Complex24d") 17 | .def(py::init()) 18 | .def(py::init()); 19 | } 20 | -------------------------------------------------------------------------------- /src/python/cuda_autodiff_matrix.cpp: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | 3 | void bind_cuda_autodiff_matrix(py::module& m, py::module& s) { 4 | bind_matrix_mask(m, s, "Matrix2m"); 5 | bind_matrix_mask(m, s, "Matrix3m"); 6 | bind_matrix_mask(m, s, "Matrix4m"); 7 | bind_matrix_mask(m, s, "Matrix44m"); 8 | 9 | bind_matrix(m, s, "Matrix2f"); 10 | bind_matrix(m, s, "Matrix3f"); 11 | bind_matrix(m, s, "Matrix4f"); 12 | bind_matrix(m, s, "Matrix44f"); 13 | 14 | bind_matrix(m, s, "Matrix2d"); 15 | bind_matrix(m, s, "Matrix3d"); 16 | bind_matrix(m, s, "Matrix4d"); 17 | bind_matrix(m, s, "Matrix44d"); 18 | } 19 | -------------------------------------------------------------------------------- /src/python/cuda_complex.cpp: -------------------------------------------------------------------------------- 1 | #include "complex.h" 2 | 3 | void bind_cuda_complex(py::module& m, py::module& s) { 4 | bind_complex(m, s, "Complex2f") 5 | .def(py::init()); 6 | 7 | bind_complex(m, s, "Complex24f") 8 | .def(py::init()); 9 | 10 | bind_complex(m, s, "Complex2d") 11 | .def(py::init()); 12 | 13 | bind_complex(m, s, "Complex24d") 14 | .def(py::init()); 15 | } 16 | -------------------------------------------------------------------------------- /src/python/cuda_matrix.cpp: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | 3 | void bind_cuda_matrix(py::module& m, py::module& s) { 4 | bind_matrix_mask(m, s, "Matrix2m"); 5 | bind_matrix_mask(m, s, "Matrix3m"); 6 | bind_matrix_mask(m, s, "Matrix4m"); 7 | bind_matrix_mask(m, s, "Matrix44m"); 8 | 9 | bind_matrix(m, s, "Matrix2f"); 10 | bind_matrix(m, s, "Matrix3f"); 11 | bind_matrix(m, s, "Matrix4f"); 12 | bind_matrix(m, s, "Matrix44f"); 13 | 14 | bind_matrix(m, s, "Matrix2d"); 15 | bind_matrix(m, s, "Matrix3d"); 16 | bind_matrix(m, s, "Matrix4d"); 17 | bind_matrix(m, s, "Matrix44d"); 18 | } 19 | -------------------------------------------------------------------------------- /src/python/cuda_pcg32.cpp: -------------------------------------------------------------------------------- 1 | #include "random.h" 2 | 3 | void bind_cuda_pcg32(py::module& m, py::module& s) { 4 | bind_pcg32>(m, s, "PCG32"); 5 | } 6 | -------------------------------------------------------------------------------- /src/python/docstr.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file contains docstrings for the Python bindings. 3 | Do not edit! These were automatically extracted by mkdoc.py 4 | */ 5 | 6 | #define __EXPAND(x) x 7 | #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT 8 | #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) 9 | #define __CAT1(a, b) a ## b 10 | #define __CAT2(a, b) __CAT1(a, b) 11 | #define __DOC1(n1) __doc_##n1 12 | #define __DOC2(n1, n2) __doc_##n1##_##n2 13 | #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 14 | #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 15 | #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5 16 | #define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 17 | #define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 18 | #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) 19 | 20 | #if defined(__GNUG__) 21 | #pragma GCC diagnostic push 22 | #pragma GCC diagnostic ignored "-Wunused-variable" 23 | #endif 24 | 25 | static const char *__doc_PCG32 = R"doc(PCG32 pseudorandom number generator proposed by Melissa O'Neill)doc"; 26 | 27 | static const char *__doc_PCG32_PCG32 = R"doc(Initialize the pseudorandom number generator with the seed() function)doc"; 28 | 29 | static const char *__doc_PCG32_advance = 30 | R"doc(Multi-step advance function (jump-ahead, jump-back) 31 | 32 | The method used here is based on Brown, "Random Number Generation with 33 | Arbitrary Stride", Transactions of the American Nuclear Society (Nov. 34 | 1994). The algorithm is very similar to fast exponentiation.)doc"; 35 | 36 | static const char *__doc_PCG32_inc = R"doc()doc"; 37 | 38 | static const char *__doc_PCG32_next_float32 = 39 | R"doc(Generate a single precision floating point value on the interval [0, 40 | 1))doc"; 41 | 42 | static const char *__doc_PCG32_next_float32_2 = R"doc(Masked version of next_float32)doc"; 43 | 44 | static const char *__doc_PCG32_next_float64 = 45 | R"doc(Generate a double precision floating point value on the interval [0, 46 | 1) 47 | 48 | Remark: 49 | Since the underlying random number generator produces 32 bit 50 | output, only the first 32 mantissa bits will be filled (however, 51 | the resolution is still finer than in next_float(), which only 52 | uses 23 mantissa bits))doc"; 53 | 54 | static const char *__doc_PCG32_next_float64_2 = R"doc(Masked version of next_float64)doc"; 55 | 56 | static const char *__doc_PCG32_next_uint32 = R"doc(Generate a uniformly distributed unsigned 32-bit random number)doc"; 57 | 58 | static const char *__doc_PCG32_next_uint32_2 = R"doc(Masked version of next_uint32)doc"; 59 | 60 | static const char *__doc_PCG32_next_uint32_bounded = R"doc(Generate a uniformly distributed integer r, where 0 <= r < bound)doc"; 61 | 62 | static const char *__doc_PCG32_next_uint64 = R"doc(Generate a uniformly distributed unsigned 64-bit random number)doc"; 63 | 64 | static const char *__doc_PCG32_next_uint64_2 = R"doc(Masked version of next_uint64)doc"; 65 | 66 | static const char *__doc_PCG32_next_uint64_bounded = R"doc(Generate a uniformly distributed integer r, where 0 <= r < bound)doc"; 67 | 68 | static const char *__doc_PCG32_operator_eq = R"doc(Equality operator)doc"; 69 | 70 | static const char *__doc_PCG32_operator_ne = R"doc(Inequality operator)doc"; 71 | 72 | static const char *__doc_PCG32_operator_sub = R"doc(Compute the distance between two PCG32 pseudorandom number generators)doc"; 73 | 74 | static const char *__doc_PCG32_seed = 75 | R"doc(Seed the pseudorandom number generator 76 | 77 | Specified in two parts: a state initializer and a sequence selection 78 | constant (a.k.a. stream id))doc"; 79 | 80 | static const char *__doc_PCG32_shuffle = 81 | R"doc(Draw uniformly distributed permutation and permute the given container 82 | 83 | From: Knuth, TAoCP Vol. 2 (3rd 3d), Section 3.4.2)doc"; 84 | 85 | static const char *__doc_PCG32_state = R"doc()doc"; 86 | 87 | static const char *__doc_operator_lshift = R"doc(Prints the canonical representation of a PCG32 object.)doc"; 88 | 89 | -------------------------------------------------------------------------------- /src/python/dynamic.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | extern void bind_dynamic_0d(py::module&, py::module&); 4 | extern void bind_dynamic_1d(py::module&, py::module&); 5 | extern void bind_dynamic_2d(py::module&, py::module&); 6 | extern void bind_dynamic_3d(py::module&, py::module&); 7 | extern void bind_dynamic_4d(py::module&, py::module&); 8 | extern void bind_dynamic_complex(py::module&, py::module&); 9 | extern void bind_dynamic_matrix(py::module&, py::module&); 10 | extern void bind_dynamic_pcg32(py::module&, py::module&); 11 | 12 | bool *implicit_conversion = nullptr; 13 | 14 | PYBIND11_MODULE(dynamic, s) { 15 | py::module m = py::module::import("enoki"); 16 | py::module::import("enoki.scalar"); 17 | 18 | implicit_conversion = (bool *) py::get_shared_data("implicit_conversion"); 19 | 20 | bind_dynamic_1d(m, s); 21 | bind_dynamic_0d(m, s); // after FloatX 22 | bind_dynamic_2d(m, s); 23 | bind_dynamic_3d(m, s); 24 | bind_dynamic_4d(m, s); 25 | bind_dynamic_complex(m, s); 26 | bind_dynamic_matrix(m, s); 27 | bind_dynamic_pcg32(m, s); 28 | } 29 | -------------------------------------------------------------------------------- /src/python/dynamic_0d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_dynamic_0d(py::module& m, py::module& s) { 4 | auto vector0m_class = bind(m, s, "Vector0m"); 5 | auto vector0i_class = bind(m, s, "Vector0i"); 6 | auto vector0u_class = bind(m, s, "Vector0u"); 7 | auto vector0f_class = bind(m, s, "Vector0f"); 8 | auto vector0d_class = bind(m, s, "Vector0d"); 9 | 10 | vector0f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector0d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()); 21 | 22 | vector0i_class 23 | .def(py::init()) 24 | .def(py::init()) 25 | .def(py::init()) 26 | .def(py::init()); 27 | 28 | vector0u_class 29 | .def(py::init()) 30 | .def(py::init()) 31 | .def(py::init()) 32 | .def(py::init()); 33 | } 34 | -------------------------------------------------------------------------------- /src/python/dynamic_1d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | 4 | void bind_dynamic_1d(py::module& m, py::module& s) { 5 | auto mask_class = bind(m, s, "Mask"); 6 | auto mask64_class = bind(m, s, "Mask64"); 7 | auto uint32_class = bind(m, s, "UInt32"); 8 | auto uint64_class = bind(m, s, "UInt64"); 9 | auto int32_class = bind(m, s, "Int32"); 10 | auto int64_class = bind(m, s, "Int64"); 11 | auto float32_class = bind(m, s, "Float32"); 12 | auto float64_class = bind(m, s, "Float64"); 13 | 14 | mask_class 15 | .def(py::init()); 16 | 17 | mask64_class 18 | .def(py::init()); 19 | 20 | implicitly_convertible(); 21 | implicitly_convertible(); 22 | 23 | float32_class 24 | .def(py::init()) 25 | .def(py::init()) 26 | .def(py::init()) 27 | .def(py::init()) 28 | .def(py::init()); 29 | 30 | float64_class 31 | .def(py::init()) 32 | .def(py::init()) 33 | .def(py::init()) 34 | .def(py::init()) 35 | .def(py::init()); 36 | 37 | int32_class 38 | .def(py::init()) 39 | .def(py::init()) 40 | .def(py::init()) 41 | .def(py::init()) 42 | .def(py::init()); 43 | 44 | int64_class 45 | .def(py::init()) 46 | .def(py::init()) 47 | .def(py::init()) 48 | .def(py::init()) 49 | .def(py::init()); 50 | 51 | uint32_class 52 | .def(py::init()) 53 | .def(py::init()) 54 | .def(py::init()) 55 | .def(py::init()) 56 | .def(py::init()); 57 | 58 | uint64_class 59 | .def(py::init()) 60 | .def(py::init()) 61 | .def(py::init()) 62 | .def(py::init()) 63 | .def(py::init()); 64 | 65 | auto vector1m_class = bind(m, s, "Vector1m"); 66 | auto vector1i_class = bind(m, s, "Vector1i"); 67 | auto vector1u_class = bind(m, s, "Vector1u"); 68 | auto vector1f_class = bind(m, s, "Vector1f"); 69 | auto vector1d_class = bind(m, s, "Vector1d"); 70 | 71 | vector1f_class 72 | .def(py::init()) 73 | .def(py::init()) 74 | .def(py::init()) 75 | .def(py::init()); 76 | 77 | vector1d_class 78 | .def(py::init()) 79 | .def(py::init()) 80 | .def(py::init()) 81 | .def(py::init()); 82 | 83 | vector1i_class 84 | .def(py::init()) 85 | .def(py::init()) 86 | .def(py::init()) 87 | .def(py::init()); 88 | 89 | vector1u_class 90 | .def(py::init()) 91 | .def(py::init()) 92 | .def(py::init()) 93 | .def(py::init()); 94 | 95 | m.def( 96 | "binary_search", 97 | [](uint32_t start, 98 | uint32_t end, 99 | const std::function &pred) { 100 | return enoki::binary_search(start, end, pred); 101 | }, 102 | "start"_a, "end"_a, "pred"_a); 103 | 104 | m.def("meshgrid", [](const Float32X &x, const Float32X &y) { 105 | auto result = meshgrid(x, y); 106 | return std::make_pair(std::move(result.x()), std::move(result.y())); 107 | }); 108 | 109 | m.def("meshgrid", [](const Float64X &x, const Float64X &y) { 110 | auto result = meshgrid(x, y); 111 | return std::make_pair(std::move(result.x()), std::move(result.y())); 112 | }); 113 | } 114 | -------------------------------------------------------------------------------- /src/python/dynamic_2d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_dynamic_2d(py::module& m, py::module& s) { 4 | auto vector2m_class = bind(m, s, "Vector2m"); 5 | auto vector2i_class = bind(m, s, "Vector2i"); 6 | auto vector2u_class = bind(m, s, "Vector2u"); 7 | auto vector2f_class = bind(m, s, "Vector2f"); 8 | auto vector2d_class = bind(m, s, "Vector2d"); 9 | 10 | vector2f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector2d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()); 21 | 22 | vector2i_class 23 | .def(py::init()) 24 | .def(py::init()) 25 | .def(py::init()) 26 | .def(py::init()); 27 | 28 | vector2u_class 29 | .def(py::init()) 30 | .def(py::init()) 31 | .def(py::init()) 32 | .def(py::init()); 33 | } 34 | -------------------------------------------------------------------------------- /src/python/dynamic_3d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_dynamic_3d(py::module& m, py::module& s) { 4 | auto vector3m_class = bind(m, s, "Vector3m"); 5 | auto vector3i_class = bind(m, s, "Vector3i"); 6 | auto vector3u_class = bind(m, s, "Vector3u"); 7 | auto vector3f_class = bind(m, s, "Vector3f"); 8 | auto vector3d_class = bind(m, s, "Vector3d"); 9 | 10 | vector3f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector3d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()); 21 | 22 | vector3i_class 23 | .def(py::init()) 24 | .def(py::init()) 25 | .def(py::init()) 26 | .def(py::init()); 27 | 28 | vector3u_class 29 | .def(py::init()) 30 | .def(py::init()) 31 | .def(py::init()) 32 | .def(py::init()); 33 | } 34 | -------------------------------------------------------------------------------- /src/python/dynamic_4d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_dynamic_4d(py::module& m, py::module& s) { 4 | auto vector4m_class = bind(m, s, "Vector4m"); 5 | auto vector4i_class = bind(m, s, "Vector4i"); 6 | auto vector4u_class = bind(m, s, "Vector4u"); 7 | auto vector4f_class = bind(m, s, "Vector4f"); 8 | auto vector4d_class = bind(m, s, "Vector4d"); 9 | 10 | vector4f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector4d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()) 20 | .def(py::init()); 21 | 22 | vector4i_class 23 | .def(py::init()) 24 | .def(py::init()) 25 | .def(py::init()) 26 | .def(py::init()); 27 | 28 | vector4u_class 29 | .def(py::init()) 30 | .def(py::init()) 31 | .def(py::init()) 32 | .def(py::init()); 33 | } 34 | -------------------------------------------------------------------------------- /src/python/dynamic_complex.cpp: -------------------------------------------------------------------------------- 1 | #include "complex.h" 2 | 3 | void bind_dynamic_complex(py::module& m, py::module& s) { 4 | bind_complex(m, s, "Complex2f") 5 | .def(py::init()); 6 | 7 | bind_complex(m, s, "Complex24f") 8 | .def(py::init()); 9 | 10 | bind_complex(m, s, "Complex2d") 11 | .def(py::init()); 12 | 13 | bind_complex(m, s, "Complex24d") 14 | .def(py::init()); 15 | } 16 | -------------------------------------------------------------------------------- /src/python/dynamic_matrix.cpp: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | 3 | void bind_dynamic_matrix(py::module& m, py::module& s) { 4 | bind_matrix_mask(m, s, "Matrix2m"); 5 | bind_matrix_mask(m, s, "Matrix3m"); 6 | bind_matrix_mask(m, s, "Matrix4m"); 7 | bind_matrix_mask(m, s, "Matrix44m"); 8 | 9 | bind_matrix(m, s, "Matrix2f"); 10 | bind_matrix(m, s, "Matrix3f"); 11 | bind_matrix(m, s, "Matrix4f"); 12 | bind_matrix(m, s, "Matrix44f"); 13 | 14 | bind_matrix(m, s, "Matrix2d"); 15 | bind_matrix(m, s, "Matrix3d"); 16 | bind_matrix(m, s, "Matrix4d"); 17 | bind_matrix(m, s, "Matrix44d"); 18 | } 19 | -------------------------------------------------------------------------------- /src/python/dynamic_pcg32.cpp: -------------------------------------------------------------------------------- 1 | #include "random.h" 2 | 3 | void bind_dynamic_pcg32(py::module& m, py::module& s) { 4 | bind_pcg32>(m, s, "PCG32"); 5 | } 6 | -------------------------------------------------------------------------------- /src/python/main.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | 4 | bool __implicit_conversion = false; 5 | 6 | bool allclose_py(const py::object &a, const py::object &b, 7 | const py::float_ &rtol, const py::float_ &atol, 8 | bool equal_nan) { 9 | const char *tp_name_a = a.ptr()->ob_type->tp_name, 10 | *tp_name_b = b.ptr()->ob_type->tp_name; 11 | 12 | ssize_t la = PyObject_Length(a.ptr()), 13 | lb = PyObject_Length(b.ptr()); 14 | 15 | bool num_a = PyNumber_Check(a.ptr()) && la == -1, 16 | num_b = PyNumber_Check(b.ptr()) && lb == -1, 17 | enoki_a = strncmp(tp_name_a, "enoki.", 6) == 0, 18 | enoki_b = strncmp(tp_name_b, "enoki.", 6) == 0, 19 | ndarray_a = strcmp(tp_name_a, "numpy.ndarray") == 0, 20 | ndarray_b = strcmp(tp_name_b, "numpy.ndarray") == 0; 21 | 22 | if (la == -1 || lb == -1) 23 | PyErr_Clear(); 24 | 25 | if (enoki_a && (ndarray_b || num_b)) 26 | return allclose_py(a, a.get_type()(b), rtol, atol, equal_nan); 27 | else if (enoki_b && (ndarray_a || num_a)) 28 | return allclose_py(b.get_type()(a), b, rtol, atol, equal_nan); 29 | 30 | if (la != lb && !(((num_a || la == 1) && lb > 0) || ((num_b || lb == 1) && la > 0))) 31 | throw std::runtime_error("enoki.allclose(): length mismatch!"); 32 | 33 | if ((enoki_a && enoki_b) || (num_a && num_b)) { 34 | py::module ek = py::module::import("enoki"); 35 | 36 | py::object abs = ek.attr("abs"), 37 | eq = ek.attr("eq"), 38 | isnan = ek.attr("isnan"), 39 | isinf = ek.attr("isinf"), 40 | full = ek.attr("full"), 41 | all_nested = ek.attr("all_nested"); 42 | 43 | py::object lhs = abs(a - b), 44 | rhs = (num_b ? atol + abs(b) * rtol 45 | : full(b.get_type(), atol) + abs(b) * rtol); 46 | 47 | py::object cond = 48 | py::reinterpret_steal(PyObject_RichCompare(lhs.ptr(), rhs.ptr(), Py_LE)); 49 | 50 | if (!cond) 51 | throw py::error_already_set(); 52 | 53 | cond = cond | (isinf(a) & isinf(b)); 54 | 55 | if (equal_nan) 56 | cond = cond | (isnan(a) & isnan(b)); 57 | 58 | return py::cast(all_nested(cond)); 59 | } else if (la >= 0) { 60 | for (size_t i = 0; i < (size_t) la; ++i) { 61 | py::int_ key(i); 62 | py::object ai = num_a ? a : a[key], 63 | bi = num_b ? b : b[key]; 64 | if (!allclose_py(ai, bi, rtol, atol, equal_nan)) 65 | return false; 66 | } 67 | } else { 68 | throw std::runtime_error("enoki.allclose(): unsupported type!"); 69 | } 70 | 71 | return true; 72 | } 73 | 74 | bool is_enoki_type(py::handle h) { 75 | return PyType_Check(h.ptr()) && 76 | strncmp(((PyTypeObject *) h.ptr())->tp_name, "enoki.", 6) == 0; 77 | } 78 | 79 | PYBIND11_MODULE(core, m_) { 80 | ENOKI_MARK_USED(m_); 81 | py::module m = py::module::import("enoki"); 82 | 83 | m.attr("__version__") = ENOKI_VERSION; 84 | py::set_shared_data("implicit_conversion", &__implicit_conversion); 85 | 86 | py::class_>(m, "CPUBuffer"); 87 | 88 | m.def("empty", 89 | [](py::handle h, size_t size) { 90 | if (!is_enoki_type(h) && size == 1) 91 | return h(); 92 | else 93 | return h.attr("empty")(size); 94 | }, 95 | "type"_a, "size"_a = 1); 96 | 97 | m.def("zero", 98 | [](py::handle h, size_t size) { 99 | if (!is_enoki_type(h) && size == 1) 100 | return h(0); 101 | else 102 | return h.attr("zero")(size); 103 | }, 104 | "type"_a, "size"_a = 1); 105 | 106 | m.def("arange", 107 | [](py::handle h, size_t size) { 108 | if (!is_enoki_type(h) && size == 1) 109 | return h(0); 110 | else 111 | return h.attr("arange")(size); 112 | }, 113 | "type"_a, "size"_a = 1); 114 | 115 | m.def("full", 116 | [](py::handle h, py::handle value, size_t size) { 117 | if (!is_enoki_type(h) && size == 1) 118 | return h(value); 119 | else 120 | return h.attr("full")(value, size); 121 | }, 122 | "type"_a, "value"_a, "size"_a = 1); 123 | 124 | m.def("linspace", 125 | [](py::handle h, py::handle start, py::handle end, size_t size) { 126 | if (!is_enoki_type(h)) 127 | return h(start); 128 | else 129 | return h.attr("linspace")(start, end, size); 130 | }, 131 | "type"_a, "start"_a, "end"_a, "size"_a = 1); 132 | 133 | m.def("allclose", &allclose_py, 134 | "a"_a, "b"_a, "rtol"_a = 1e-5, "atol"_a = 1e-8, 135 | "equal_nan"_a = false 136 | ); 137 | 138 | m.attr("pi") = M_PI; 139 | m.attr("e") = M_E; 140 | m.attr("inf") = std::numeric_limits::infinity(); 141 | m.attr("nan") = std::numeric_limits::quiet_NaN(); 142 | } 143 | -------------------------------------------------------------------------------- /src/python/quat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | #include 4 | 5 | template 6 | py::class_ bind_quaternion(py::module &m, py::module &s, const char *name) { 7 | using Value = value_t; 8 | using Scalar = scalar_t; 9 | using Mask = mask_t; 10 | 11 | auto cls = py::class_(s, name) 12 | .def(py::init<>()) 13 | .def(py::init(), "w"_a) 14 | .def(py::init(), "x"_a, "y"_a, "z"_a, "w"_a) 15 | .def(py::self == py::self) 16 | .def(py::self != py::self) 17 | .def(py::self - py::self) 18 | .def(py::self + py::self) 19 | .def(py::self * Value()) 20 | .def(py::self / Value()) 21 | .def(py::self * py::self) 22 | .def(py::self / py::self) 23 | .def(-py::self) 24 | .def("__repr__", [](const Quat &a) -> std::string { 25 | if (*implicit_conversion) 26 | return ""; 27 | std::ostringstream oss; 28 | oss << a; 29 | return oss.str(); 30 | }) 31 | .def("__getitem__", [](const Quat &a, size_t index) { 32 | if (index >= 4) 33 | throw py::index_error(); 34 | return a.coeff(index); 35 | }) 36 | .def("__setitem__", [](Quat &a, size_t index, const Value &value) { 37 | if (index >= 4) 38 | throw py::index_error(); 39 | a.coeff(index) = value; 40 | }) 41 | .def("__setitem__", [](Quat &a, const mask_t &m, const Quat &b) { 42 | a[m] = b; 43 | }) 44 | .def_static("identity", [](size_t size) { return identity(size); }, "size"_a = 1) 45 | .def_static("zero", [](size_t size) { return zero(size); }, "size"_a = 1) 46 | .def_static("full", [](Scalar value, size_t size) { return full(value, size); }, 47 | "value"_a, "size"_a = 1); 48 | 49 | m.def("real", [](const Quat &a) { return real(a); }); 50 | m.def("imag", [](const Quat &a) { return imag(a); }); 51 | m.def("norm", [](const Quat &a) { return norm(a); }); 52 | m.def("squared_norm", [](const Quat &a) { return squared_norm(a); }); 53 | m.def("rcp", [](const Quat &a) { return rcp(a); }); 54 | m.def("normalize", [](const Quat &a) { return normalize(a); }); 55 | m.def("dot", [](const Quat &a, const Quat &b) { return dot(a, b); }); 56 | 57 | m.def("abs", [](const Quat &a) { return abs(a); }); 58 | m.def("sqrt", [](const Quat &a) { return sqrt(a); }); 59 | m.def("exp", [](const Quat &a) { return exp(a); }); 60 | m.def("log", [](const Quat &a) { return log(a); }); 61 | m.def("pow", [](const Quat &a, const Quat &b) { return pow(a, b); }); 62 | 63 | cls.def_property("x", [](const Quat &a) { return a.x(); }, 64 | [](Quat &a, const Value &v) { a.x() = v; }); 65 | cls.def_property("y", [](const Quat &a) { return a.y(); }, 66 | [](Quat &a, const Value &v) { a.y() = v; }); 67 | cls.def_property("z", [](const Quat &a) { return a.z(); }, 68 | [](Quat &a, const Value &v) { a.z() = v; }); 69 | cls.def_property("w", [](const Quat &a) { return a.w(); }, 70 | [](Quat &a, const Value &v) { a.w() = v; }); 71 | 72 | m.def("isfinite", [](const Quat &a) -> Mask { return enoki::isfinite(a); }); 73 | m.def("isnan", [](const Quat &a) -> Mask { return enoki::isnan(a); }); 74 | m.def("isinf", [](const Quat &a) -> Mask { return enoki::isinf(a); }); 75 | 76 | using Vector3f = Array; 77 | using Matrix4f = Matrix; 78 | 79 | m.def("slerp", 80 | [](const Quat &a, const Quat &b, const Value &t) { 81 | return slerp(a, b, t); 82 | }, 83 | "a"_a, "b"_a, "t"_a); 84 | 85 | m.def("quat_to_euler", [](const Quat &q) { return quat_to_euler(q); }); 86 | m.def("quat_to_matrix", [](const Quat &q) { return quat_to_matrix(q); }); 87 | m.def("matrix_to_quat", [](const Matrix4f &m) { return matrix_to_quat(m); }); 88 | 89 | m.def("rotate", [](const Vector3f &axis, const Value &angle) { 90 | return rotate(axis, angle); 91 | }, "axis"_a, "angle"_a); 92 | 93 | return cls; 94 | } 95 | -------------------------------------------------------------------------------- /src/python/random.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "common.h" 5 | #include "docstr.h" 6 | 7 | #define D(...) DOC(__VA_ARGS__) 8 | 9 | template 10 | void bind_pcg32(py::module &m, py::module s, const char *name) { 11 | using UInt64 = typename PCG32::UInt64; 12 | using Mask = mask_t; 13 | 14 | py::class_(s, name, D(PCG32)) 15 | .def(py::init(), 16 | "initstate"_a = PCG32_DEFAULT_STATE, 17 | "initseq"_a = PCG32_DEFAULT_STREAM, 18 | D(PCG32, PCG32)) 19 | .def("seed", &PCG32::seed, 20 | "initstate"_a = PCG32_DEFAULT_STATE, 21 | "initseq"_a = PCG32_DEFAULT_STREAM, 22 | D(PCG32, seed)) 23 | .def(py::self - py::self, D(PCG32, operator, sub)) 24 | .def(py::self == py::self) 25 | .def(py::self != py::self) 26 | .def("advance", &PCG32::advance, D(PCG32, advance)) 27 | .def("next_uint32", 28 | [](PCG32 &pcg) { return pcg.next_uint32(); }, 29 | D(PCG32, next_uint32)) 30 | .def("next_uint32", 31 | [](PCG32 &pcg, const Mask &mask) { 32 | return pcg.next_uint32(mask); 33 | }, 34 | "mask"_a, D(PCG32, next_uint32, 2)) 35 | .def("next_uint64", 36 | [](PCG32 &pcg) { return pcg.next_uint64(); }, 37 | D(PCG32, next_uint64)) 38 | .def("next_uint64", 39 | [](PCG32 &pcg, const Mask &mask) { 40 | return pcg.next_uint64(mask); 41 | }, 42 | "mask"_a, D(PCG32, next_uint64, 2)) 43 | .def("next_uint32_bounded", 44 | [](PCG32 &pcg, uint32_t bound) { 45 | return pcg.next_uint32_bounded(bound); 46 | }, 47 | "bound"_a, D(PCG32, next_uint32_bounded)) 48 | .def("next_uint32_bounded", 49 | [](PCG32 &pcg, uint32_t bound, const Mask &mask) { 50 | return pcg.next_uint32_bounded(bound, mask); 51 | }, 52 | "bound"_a, "mask"_a) 53 | .def("next_uint64_bounded", 54 | [](PCG32 &pcg, uint64_t bound) { 55 | return pcg.next_uint64_bounded(bound); 56 | }, 57 | "bound"_a, D(PCG32, next_uint64_bounded)) 58 | .def("next_uint64_bounded", 59 | [](PCG32 &pcg, uint64_t bound, const Mask &mask) { 60 | return pcg.next_uint64_bounded(bound, mask); 61 | }, 62 | "bound"_a, "mask"_a) 63 | .def("next_float32", 64 | [](PCG32 &pcg) { return pcg.next_float32(); }, 65 | D(PCG32, next_float32)) 66 | .def("next_float32", 67 | [](PCG32 &pcg, const Mask &mask) { 68 | return pcg.next_float32(mask); 69 | }, 70 | "mask"_a, D(PCG32, next_float32, 2)) 71 | .def_readwrite("state", &PCG32::state) 72 | .def_readwrite("inc", &PCG32::inc) 73 | .def("__repr__", [](const PCG32 &pcg) { 74 | std::ostringstream oss; 75 | oss << "PCG32[state=" << pcg.state << ", inc=" << pcg.inc << "]"; 76 | return oss.str(); 77 | }); 78 | } 79 | -------------------------------------------------------------------------------- /src/python/scalar_0d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_scalar_0d(py::module& m, py::module& s) { 4 | auto vector0m_class = bind(m, s, "Vector0m"); 5 | auto vector0i_class = bind(m, s, "Vector0i"); 6 | auto vector0u_class = bind(m, s, "Vector0u"); 7 | auto vector0f_class = bind(m, s, "Vector0f"); 8 | auto vector0d_class = bind(m, s, "Vector0d"); 9 | 10 | vector0f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()); 14 | 15 | vector0d_class 16 | .def(py::init()) 17 | .def(py::init()) 18 | .def(py::init()); 19 | 20 | vector0i_class 21 | .def(py::init()) 22 | .def(py::init()) 23 | .def(py::init()); 24 | 25 | vector0u_class 26 | .def(py::init()) 27 | .def(py::init()) 28 | .def(py::init()); 29 | } 30 | -------------------------------------------------------------------------------- /src/python/scalar_1d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | 4 | void bind_scalar_1d(py::module& m, py::module& s) { 5 | auto vector1m_class = bind(m, s, "Vector1m"); 6 | auto vector1i_class = bind(m, s, "Vector1i"); 7 | auto vector1u_class = bind(m, s, "Vector1u"); 8 | auto vector1f_class = bind(m, s, "Vector1f"); 9 | auto vector1d_class = bind(m, s, "Vector1d"); 10 | 11 | vector1f_class 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector1d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()); 20 | 21 | vector1i_class 22 | .def(py::init()) 23 | .def(py::init()) 24 | .def(py::init()); 25 | 26 | vector1u_class 27 | .def(py::init()) 28 | .def(py::init()) 29 | .def(py::init()); 30 | 31 | m.def( 32 | "binary_search", 33 | [](uint32_t start, 34 | uint32_t end, 35 | const std::function &pred) { 36 | return enoki::binary_search(start, end, pred); 37 | }, 38 | "start"_a, "end"_a, "pred"_a); 39 | } 40 | -------------------------------------------------------------------------------- /src/python/scalar_2d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_scalar_2d(py::module& m, py::module& s) { 4 | auto vector2m_class = bind(m, s, "Vector2m"); 5 | auto vector2i_class = bind(m, s, "Vector2i"); 6 | auto vector2u_class = bind(m, s, "Vector2u"); 7 | auto vector2f_class = bind(m, s, "Vector2f"); 8 | auto vector2d_class = bind(m, s, "Vector2d"); 9 | 10 | vector2f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()); 14 | 15 | vector2d_class 16 | .def(py::init()) 17 | .def(py::init()) 18 | .def(py::init()); 19 | 20 | vector2i_class 21 | .def(py::init()) 22 | .def(py::init()) 23 | .def(py::init()); 24 | 25 | vector2u_class 26 | .def(py::init()) 27 | .def(py::init()) 28 | .def(py::init()); 29 | } 30 | -------------------------------------------------------------------------------- /src/python/scalar_3d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void bind_scalar_3d(py::module& m, py::module& s) { 4 | auto vector3m_class = bind(m, s, "Vector3m"); 5 | auto vector3i_class = bind(m, s, "Vector3i"); 6 | auto vector3u_class = bind(m, s, "Vector3u"); 7 | auto vector3f_class = bind(m, s, "Vector3f"); 8 | auto vector3d_class = bind(m, s, "Vector3d"); 9 | 10 | vector3f_class 11 | .def(py::init()) 12 | .def(py::init()) 13 | .def(py::init()); 14 | 15 | vector3d_class 16 | .def(py::init()) 17 | .def(py::init()) 18 | .def(py::init()); 19 | 20 | vector3i_class 21 | .def(py::init()) 22 | .def(py::init()) 23 | .def(py::init()); 24 | 25 | vector3u_class 26 | .def(py::init()) 27 | .def(py::init()) 28 | .def(py::init()); 29 | } 30 | -------------------------------------------------------------------------------- /src/python/scalar_4d.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | 4 | void bind_scalar_4d(py::module& m, py::module& s) { 5 | auto vector4m_class = bind(m, s, "Vector4m"); 6 | auto vector4i_class = bind(m, s, "Vector4i"); 7 | auto vector4u_class = bind(m, s, "Vector4u"); 8 | auto vector4f_class = bind(m, s, "Vector4f"); 9 | auto vector4d_class = bind(m, s, "Vector4d"); 10 | 11 | vector4f_class 12 | .def(py::init()) 13 | .def(py::init()) 14 | .def(py::init()); 15 | 16 | vector4d_class 17 | .def(py::init()) 18 | .def(py::init()) 19 | .def(py::init()); 20 | 21 | vector4i_class 22 | .def(py::init()) 23 | .def(py::init()) 24 | .def(py::init()); 25 | 26 | vector4u_class 27 | .def(py::init()) 28 | .def(py::init()) 29 | .def(py::init()); 30 | } 31 | -------------------------------------------------------------------------------- /src/python/scalar_complex.cpp: -------------------------------------------------------------------------------- 1 | #include "complex.h" 2 | 3 | void bind_scalar_complex(py::module& m, py::module& s) { 4 | bind_complex(m, s, "Complex2f"); 5 | bind_complex(m, s, "Complex24f"); 6 | 7 | bind_complex(m, s, "Complex2d"); 8 | bind_complex(m, s, "Complex24d"); 9 | } 10 | -------------------------------------------------------------------------------- /src/python/scalar_matrix.cpp: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | #include 3 | 4 | void bind_scalar_matrix(py::module& m, py::module& s) { 5 | bind_matrix_mask(m, s, "Matrix2m"); 6 | bind_matrix_mask(m, s, "Matrix3m"); 7 | bind_matrix_mask(m, s, "Matrix4m"); 8 | bind_matrix_mask(m, s, "Matrix44m"); 9 | 10 | bind_matrix(m, s, "Matrix2f"); 11 | bind_matrix(m, s, "Matrix3f"); 12 | bind_matrix(m, s, "Matrix4f"); 13 | bind_matrix(m, s, "Matrix44f"); 14 | 15 | bind_matrix(m, s, "Matrix2d"); 16 | bind_matrix(m, s, "Matrix3d"); 17 | bind_matrix(m, s, "Matrix4d"); 18 | bind_matrix(m, s, "Matrix44d"); 19 | 20 | bind_matrix_mask(m, s, "Matrix41m"); 21 | bind_matrix(m, s, "Matrix41f"); 22 | bind_matrix(m, s, "Matrix41d"); 23 | 24 | m.def("transform_decompose", [](const Matrix4f &m) { return transform_decompose(m); }); 25 | m.def("transform_compose", 26 | [](const Matrix3f &m, const Quaternion &q, 27 | const Array &v) { return transform_compose(m, q, v); }); 28 | } 29 | -------------------------------------------------------------------------------- /src/python/scalar_pcg32.cpp: -------------------------------------------------------------------------------- 1 | #include "random.h" 2 | 3 | void bind_scalar_pcg32(py::module& m, py::module& s) { 4 | bind_pcg32>(m, s, "PCG32"); 5 | } 6 | -------------------------------------------------------------------------------- /src/python/scalar_quat.cpp: -------------------------------------------------------------------------------- 1 | #include "quat.h" 2 | 3 | void bind_scalar_quaternion(py::module& m, py::module& s) { 4 | bind_quaternion(m, s, "Quaternion4f"); 5 | bind_quaternion(m, s, "Quaternion4d"); 6 | } 7 | -------------------------------------------------------------------------------- /tests/call.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | tests/complex.cpp -- tests vectorized function calls 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #include "test.h" 15 | #include 16 | #include "ray.h" 17 | #include 18 | 19 | struct Test; 20 | struct TestChild; 21 | 22 | using Int32P = Array; 23 | using TestP = Array; 24 | using ChildP = Array; 25 | 26 | using TestX = DynamicArray; 27 | using Int32X = DynamicArray; 28 | 29 | using TestPMask = mask_t; 30 | using TestXMask = mask_t; 31 | 32 | using FloatP = Packet; 33 | using Vector3f = Array; 34 | using Vector3fP = Array; 35 | using Ray3fP = Ray; 36 | 37 | 38 | struct Test { 39 | ENOKI_CALL_SUPPORT_FRIEND() 40 | 41 | Test(int32_t value) : value(value) { } 42 | virtual ~Test() { } 43 | 44 | // Vectorized function (accepts a mask, which is ignored here) 45 | virtual Int32P func1(Int32P i, TestPMask /* unused */) const { return i + value; } 46 | virtual Int32X func1(Int32X i, TestXMask /* unused */) const { return i + value; } 47 | 48 | // Vectorized function (accepts a mask, which is ignored here) 49 | virtual void func2(Int32P &i, TestPMask mask) const { i[mask] += value; } 50 | 51 | bool func3() const { return value == 20; } 52 | 53 | std::pair func4(TestPMask) const { return std::make_pair(value, value+1); } 54 | 55 | Ray3fP make_ray(TestPMask) const { return Ray3fP(Vector3f(1, 1, 1), Vector3f(1, 2, 3));} 56 | 57 | protected: 58 | int32_t value; 59 | }; 60 | 61 | struct TestChild : public Test { 62 | TestChild() : Test(42) { } 63 | 64 | bool is_child() const { return value == 42; } 65 | }; 66 | 67 | // Allow Enoki arrays containing pointers to transparently forward function 68 | // calls (with the appropriate masks). 69 | ENOKI_CALL_SUPPORT_BEGIN(Test) 70 | ENOKI_CALL_SUPPORT_METHOD(func1) 71 | ENOKI_CALL_SUPPORT_METHOD(func2) 72 | ENOKI_CALL_SUPPORT_METHOD(func3) 73 | ENOKI_CALL_SUPPORT_GETTER(get_value, value) 74 | ENOKI_CALL_SUPPORT_METHOD(func4) 75 | ENOKI_CALL_SUPPORT_METHOD(make_ray) 76 | ENOKI_CALL_SUPPORT_END(Test) 77 | 78 | ENOKI_CALL_SUPPORT_BEGIN(TestChild) 79 | ENOKI_CALL_SUPPORT_METHOD(is_child) 80 | ENOKI_CALL_SUPPORT_END(TestChild) 81 | 82 | 83 | ENOKI_TEST(test01_call) { 84 | size_t offset = std::min((size_t) 2, TestP::Size-1); 85 | Test *a = new Test(10); 86 | Test *b = new Test(20); 87 | 88 | TestP pointers(a); 89 | pointers.coeff(offset) = b; 90 | 91 | Int32P index = arange(); 92 | Int32P ref = arange() + 10; 93 | if (offset < Int32P::Size) 94 | ref.coeff(offset) += 10; 95 | 96 | Int32P result = pointers->func1(index); 97 | assert(result == ref); 98 | 99 | Int32P ref2 = 10; 100 | if (offset < Int32P::Size) 101 | ref2.coeff(offset) += 10; 102 | 103 | assert(pointers->get_value() == ref2); 104 | 105 | std::pair result2 = pointers->func4(); 106 | assert(result2.first == ref2); 107 | assert(result2.second == ref2+1); 108 | 109 | TestX pointers_x; 110 | Int32X index_x; 111 | set_slices(pointers_x, TestP::Size); 112 | set_slices(index_x, TestP::Size); 113 | packet(pointers_x, 0) = pointers; 114 | packet(index_x, 0) = index; 115 | Int32X result_x = pointers_x->func1(index_x); 116 | assert(packet(result_x, 0) == ref); 117 | 118 | pointers->func2(index); 119 | assert(index == ref); 120 | 121 | auto mask = mask_t(pointers->func3()); 122 | assert(mask == eq(pointers, b)); 123 | 124 | /* The following should not crash */ 125 | pointers.coeff(0) = nullptr; 126 | pointers->func3(); 127 | 128 | delete a; 129 | delete b; 130 | } 131 | 132 | 133 | ENOKI_TEST(test02_reinterpret_pointer_array) { 134 | using Mask = mask_t; 135 | Test *a = new Test(1); 136 | Test *b = new TestChild(); 137 | 138 | TestP objects(b); 139 | objects[std::min((size_t) 2, TestP::Size-1)] = a; 140 | 141 | auto children = reinterpret_array(objects); 142 | // is_child returns an Array of bools, need to cast to a mask type for the 143 | // comparison to be correct. 144 | assert(all(Mask(children->is_child()) == eq(objects, b))); 145 | 146 | delete a; 147 | delete b; 148 | } 149 | 150 | ENOKI_TEST(test03_call_with_structure) { 151 | Test *a = new Test(1); 152 | TestP objects(a); 153 | Vector3fP t = objects->make_ray()(1); 154 | assert(all_nested(eq(t, Vector3f(2, 3, 4)))); 155 | delete a; 156 | } 157 | -------------------------------------------------------------------------------- /tests/color.cpp: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | #include 3 | 4 | ENOKI_TEST_FLOAT(test01_linear_to_srgb) { 5 | test::probe_accuracy( 6 | [](const T &a) -> T { return linear_to_srgb(a); }, 7 | [](double value) { 8 | auto branch1 = 12.92 * value; 9 | auto branch2 = 1.055 * std::pow(value, 1.0 / 2.4) - 0.055; 10 | return select(value <= 0.0031308, branch1, branch2); 11 | }, 12 | Value(0), Value(1), 13 | 60, 14 | false 15 | ); 16 | } 17 | 18 | ENOKI_TEST_FLOAT(test02_srgb_to_linear) { 19 | test::probe_accuracy( 20 | [](const T &a) -> T { return srgb_to_linear(a); }, 21 | [](double value) { 22 | auto branch1 = (1.0 / 12.92) * value; 23 | auto branch2 = std::pow((value + 0.055) * (1.0 / 1.055), 2.4); 24 | return select(value <= 0.04045, branch1, branch2); 25 | }, 26 | Value(0), Value(1), 27 | 60, 28 | false 29 | ); 30 | } 31 | 32 | -------------------------------------------------------------------------------- /tests/complex.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | tests/complex.cpp -- tests complex numbers and quaternions 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #include "test.h" 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | using Cf = Complex; 21 | using Qf = Quaternion; 22 | using V3 = Array; 23 | 24 | ENOKI_TEST(test00_complex_str) { 25 | assert(to_string(Cf(1.0)) == "1 + 0i"); 26 | assert(to_string(Cf(1.0, 2.0)) == "1 + 2i"); 27 | assert(to_string(conj(Cf(1.0, 2.0))) == "1 - 2i"); 28 | } 29 | 30 | ENOKI_TEST(test01_quat_str) { 31 | assert(to_string(Qf(2.0, 3.0, 4.0, 1.0)) == "1 + 2i + 3j + 4k"); 32 | assert(to_string(conj(Qf(2.0, 3.0, 4.0, 1.0))) == "1 - 2i - 3j - 4k"); 33 | } 34 | 35 | ENOKI_TEST(test02_complex_mult) { 36 | assert(to_string(Cf(1.0, 2.0) * Cf(-4.f, 3.0)) == "-10 - 5i"); 37 | } 38 | 39 | ENOKI_TEST(test03_quat_mult) { 40 | assert(to_string(Qf(1.0, 2.0, -1.0, 3.0) * Qf(-4.f, 3.0, 5.f, -2.f)) == "-3 - 1i + 4j + 28k"); 41 | } 42 | 43 | ENOKI_TEST(test04_complex_rcp) { 44 | auto a = Cf(2.f, 1.f); 45 | assert(to_string(rcp(a)) == "0.4 - 0.2i"); 46 | assert(abs(a/a - Cf(1.f)) < 1e-5); 47 | } 48 | 49 | ENOKI_TEST(test05_quat_rcp) { 50 | auto a = normalize(Qf(1.f, 2.f, 3.f, 4.f)); 51 | auto b = rcp(a); 52 | auto c = a*b - Qf(1.f); 53 | assert(abs(c) < 1e-5); 54 | assert(abs(a/a - Qf(1.f)) < 1e-5); 55 | } 56 | 57 | ENOKI_TEST(test06_complex_decomp) { 58 | assert(real(Cf(1, 2)) == 1); 59 | assert(imag(Cf(1, 2)) == 2); 60 | } 61 | 62 | ENOKI_TEST(test07_quat_decomp) { 63 | assert(real(Qf(1, 2, 3, 4)) == 4); 64 | assert(imag(Qf(1, 2, 3, 4)) == V3(1, 2, 3)); 65 | } 66 | 67 | ENOKI_TEST(test08_complex_exp) { 68 | assert(abs(exp(Cf(1, 2)) - Cf(-1.1312, 2.47173)) < 1e-5); 69 | } 70 | 71 | ENOKI_TEST(test09_quat_exp) { 72 | assert(abs(exp(Qf(2, 3, 4, 1)) - Qf(-0.78956, -1.18434, -1.57912, 1.69392)) < 1e-5); 73 | } 74 | 75 | ENOKI_TEST(test10_complex_log) { 76 | assert(abs(log(Cf(1, 2)) - Cf(0.804719, 1.10715)) < 1e-5); 77 | } 78 | 79 | ENOKI_TEST(test11_quat_log) { 80 | assert(abs(log(Qf(2, 3, 4, 1)) - Qf(0.51519, 0.772785, 1.03038, 1.7006)) < 1e-5); 81 | } 82 | 83 | ENOKI_TEST(test12_complex_sqrt) { 84 | assert(abs(sqrt(Cf(1, 2)) - Cf(1.27202, 0.786151)) < 1e-6); 85 | } 86 | 87 | ENOKI_TEST(test13_quat_sqrt) { 88 | assert(abs(sqrt(Qf(2, 3, 4, 1)) - Qf(0.555675, 0.833512, 1.11135, 1.79961)) < 1e-5); 89 | } 90 | 91 | ENOKI_TEST(test14_complex_sin_cos_tan) { 92 | assert(abs(sin(Cf(1, 2)) - Cf(3.16578, 1.9596)) < 1e-5); 93 | assert(abs(cos(Cf(1, 2)) - Cf(2.03272, -3.0519)) < 1e-5); 94 | assert(abs(tan(Cf(1, 2)) - Cf(0.0338128, 1.01479)) < 1e-5); 95 | auto sc = sincos(Cf(1, 2)); 96 | assert(abs(sc.first - Cf(3.16578, 1.9596)) < 1e-5); 97 | assert(abs(sc.second - Cf(2.03272, - 3.0519)) < 1e-5); 98 | } 99 | 100 | ENOKI_TEST(test15_complex_sinh_cosh_tanh) { 101 | assert(abs(sinh(Cf(1, 2)) - Cf(-0.489056, 1.40312)) < 1e-5); 102 | assert(abs(cosh(Cf(1, 2)) - Cf(-0.642148, 1.06861)) < 1e-5); 103 | assert(abs(tanh(Cf(1, 2)) - Cf(1.16674, -0.243458)) < 1e-5); 104 | auto sc = sincosh(Cf(1, 2)); 105 | assert(abs(sc.first - Cf(-0.489056, 1.40312)) < 1e-5); 106 | assert(abs(sc.second - Cf(-0.642148, 1.06861)) < 1e-5); 107 | } 108 | 109 | ENOKI_TEST(test16_complex_asin_acos_atan) { 110 | assert(abs(asin(Cf(1, 2)) - Cf(0.427079, 1.52857)) < 1e-5); 111 | assert(abs(acos(Cf(1, 2)) - Cf(1.14372, -1.52857)) < 1e-5); 112 | assert(abs(atan(Cf(1, 2)) - Cf(1.33897, 0.402359)) < 1e-5); 113 | } 114 | 115 | ENOKI_TEST(test17_complex_asinh_acosh_atanh) { 116 | assert(abs(asinh(Cf(1, 2)) - Cf(1.46935, 1.06344)) < 1e-5); 117 | assert(abs(acosh(Cf(1, 2)) - Cf(1.52857, 1.14372)) < 1e-5); 118 | assert(abs(atanh(Cf(1, 2)) - Cf(0.173287, 1.1781)) < 1e-5); 119 | } 120 | 121 | using FloatP = Packet; 122 | using FloatX = DynamicArray; 123 | using Quaternion4f = Quaternion; 124 | using Quaternion4X = Quaternion; 125 | using Matrix4X = Matrix; 126 | using Matrix4f = Matrix; 127 | using Matrix4fP = Matrix; 128 | using Vector3f = Array; 129 | using Vector4f = Array; 130 | 131 | Matrix4X slerp_matrix(const Quaternion4X &x, const Quaternion4X &y, float t) { 132 | return vectorize([t](auto &&x, auto &&y) { return quat_to_matrix(slerp(x, y, t)); }, x, y); 133 | }; 134 | 135 | Quaternion4X to_quat(const Matrix4X &m) { 136 | return vectorize([](auto &&m) { return matrix_to_quat(m); }, m); 137 | }; 138 | 139 | ENOKI_TEST(test18_complex_vectorize_scalar) { 140 | Quaternion4f a = normalize(Quaternion4f(1, 2, 3, 4)); 141 | Quaternion4f b = normalize(Quaternion4f(0, 0, 0, 1)); 142 | 143 | Quaternion4X x, y; 144 | set_slices(x, 1); 145 | set_slices(y, 1); 146 | slice(x, 0) = a; 147 | slice(y, 0) = b; 148 | auto tmp0 = slerp_matrix(x, y, 0.5f); 149 | auto tmp1 = to_quat(tmp0); 150 | Quaternion4f result = slice(tmp1, 0); 151 | Quaternion4f ref = normalize(a+b); 152 | assert(abs(result - ref) < 1e-5f); 153 | } 154 | 155 | ENOKI_TEST(test19_rotation) { 156 | auto axis = normalize(Vector3f(1.f, 2.f, 3.f)); 157 | Vector4f input(0.8f, 0.3f, 0.2f, 0.0f); 158 | float angle = 0.5f; 159 | 160 | auto quat1 = rotate(axis, angle); 161 | auto r1 = Vector4f(quat1 * Quaternion4f(input) * conj(quat1)); 162 | 163 | auto mtx2 = rotate(axis, angle); 164 | auto r2 = mtx2 * input; 165 | 166 | auto mtx1 = quat_to_matrix(quat1); 167 | auto r3 = mtx1 * input; 168 | 169 | auto quat2 = matrix_to_quat(mtx2); 170 | auto r4 = Vector4f(quat2 * Quaternion4f(input) * conj(quat2)); 171 | 172 | assert(norm(r1-r2) < 1e-6f); 173 | assert(norm(r1-r3) < 1e-6f); 174 | assert(norm(r1-r4) < 1e-6f); 175 | } 176 | 177 | ENOKI_TEST(test20_sincos_arg) { 178 | auto result = sincos_arg_diff(Cf(-1.01264771f, 1.1261553f), Cf(-0.70017226f, 1.24072149f)); 179 | assert(abs(result.first - 0.2168644f) < 1e-6f); 180 | assert(abs(result.second - 0.97620174f) < 1e-6f); 181 | 182 | result = sincos_arg_diff(Cf(-0.08012004f, 0.86251237f), Cf(-1.22284338f, 0.86829703f)); 183 | assert(abs(result.first + 0.75831358f) < 1e-6f); 184 | assert(abs(result.second - 0.65188996f) < 1e-6f); 185 | } 186 | -------------------------------------------------------------------------------- /tests/conv.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | tests/conv.cpp -- tests value and mask conversion routines 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #if defined(__GNUG__) 15 | # pragma GCC diagnostic ignored "-Wconversion" 16 | #endif 17 | 18 | #include "test.h" 19 | 20 | template void convtest() { 21 | using T2 = replace_scalar_t; 22 | auto value1 = arange(); 23 | auto value2 = T2(value1); 24 | auto value3 = T(value2); 25 | assert(value1 == value3); 26 | } 27 | 28 | template void masktest() { 29 | using Value = typename T::Value; 30 | using T2 = replace_scalar_t; 31 | for (size_t i = 0; i < T::Size; ++i) { 32 | mask_t mask = eq(arange() - T(Value(i)), T(0)); 33 | mask_t mask2(mask); 34 | T2 result = select(mask2, T2(Value2(1)), T2(Value2(0))); 35 | Value2 out[T::Size]; 36 | store_unaligned(out, result); 37 | for (size_t j = 0; j < T::Size; ++j) 38 | assert(out[j] == ((j == i) ? Value2(1) : Value2(0))); 39 | } 40 | } 41 | 42 | ENOKI_TEST_ALL(test01_conv_int32_t) { convtest(); } 43 | ENOKI_TEST_ALL(test02_conv_uint32_t) { convtest(); } 44 | ENOKI_TEST_ALL(test03_conv_int64_t) { convtest(); } 45 | ENOKI_TEST_ALL(test04_conv_uint64_t) { convtest(); } 46 | ENOKI_TEST_ALL(test05_conv_half) { convtest(); } 47 | ENOKI_TEST_ALL(test06_conv_float) { convtest(); } 48 | ENOKI_TEST_ALL(test07_conv_double) { convtest(); } 49 | 50 | ENOKI_TEST_ALL(test08_mask_int32_t) { masktest(); } 51 | ENOKI_TEST_ALL(test09_mask_uint32_t) { masktest(); } 52 | ENOKI_TEST_ALL(test10_mask_int64_t) { masktest(); } 53 | ENOKI_TEST_ALL(test11_mask_uint64_t) { masktest(); } 54 | ENOKI_TEST_ALL(test12_mask_float) { masktest(); } 55 | ENOKI_TEST_ALL(test13_mask_double) { masktest(); } 56 | ENOKI_TEST_ALL(test14_mask_half) { masktest(); } 57 | 58 | ENOKI_TEST_ALL(test15_bool_conv) { 59 | for (size_t i = 0; i < T::Size; ++i) { 60 | mask_t mask = eq(arange() - T(Value(i)), T(0)); 61 | bool_array_t mask3(mask); 62 | mask_t mask4(mask3); 63 | T result = select(mask, T(Value(1)), T(Value(0))); 64 | T result2 = select(mask4, T(Value(1)), T(Value(0))); 65 | 66 | assert(result == result2); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /tests/custom.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | tests/custom.cpp -- tests operations involving custom data structures 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #include "test.h" 15 | #include 16 | #include 17 | 18 | template struct Custom { 19 | using Value = Value_; 20 | using FloatVector = Array; 21 | using DoubleVector = float64_array_t; 22 | using IntVector = int32_array_t; 23 | 24 | FloatVector o; 25 | DoubleVector d; 26 | IntVector i = 0; 27 | 28 | template 29 | bool operator==(const Custom &other) const { 30 | return other.o == o && other.d == d && other.i == i; 31 | } 32 | 33 | template 34 | bool operator!=(const Custom &other) const { 35 | return !operator==(other); 36 | } 37 | 38 | ENOKI_STRUCT(Custom, o, d, i) 39 | }; 40 | 41 | ENOKI_STRUCT_SUPPORT(Custom, o, d, i) 42 | 43 | ENOKI_TEST(test01_mask_slice_custom) { 44 | using FloatP = Packet; 45 | using Vector3f = Array; 46 | using Vector3d = Array; 47 | using Custom3f = Custom; 48 | using Custom3fP = Custom; 49 | 50 | Custom3fP x = zero(); 51 | Custom3fP y; 52 | y.o = Vector3f(1, 2, 3); 53 | y.d = Vector3d(4, 5, 6); 54 | auto mask = arange() > 0.f; 55 | 56 | masked(x, mask) = y; 57 | 58 | assert((slice(x, 0) == Custom3f(Vector3f(0, 0, 0), Vector3f(0, 0, 0), 0))); 59 | if (FloatP::Size > 1) 60 | assert((slice(x, 1) == Custom3f(Vector3f(1, 2, 3), Vector3f(4, 5, 6), 0))); 61 | } 62 | 63 | ENOKI_TEST(test02_mask_slice_custom_scalar) { 64 | using Custom3f = Custom; 65 | using Vector3f = Array; 66 | 67 | Custom3f x = zero(); 68 | Custom3f y(Vector3f(1, 2, 3), Vector3f(4, 5, 6), 0); 69 | Custom3f z = zero(); 70 | masked(z, true) = y; 71 | 72 | assert(y != x); 73 | assert(y == y); 74 | } 75 | 76 | struct Test { }; 77 | 78 | template struct TrickyStruct { 79 | using Ptr = replace_scalar_t; 80 | using Mask = mask_t; 81 | 82 | Ptr ptr; 83 | Mask mask; 84 | 85 | ENOKI_STRUCT(TrickyStruct, ptr, mask); 86 | }; 87 | 88 | ENOKI_STRUCT_SUPPORT(TrickyStruct, ptr, mask); 89 | 90 | ENOKI_TEST(test03_tricky) { 91 | using FloatP = Packet; 92 | using FloatX = DynamicArray; 93 | using Tricky = TrickyStruct; 94 | using TrickyP = TrickyStruct; 95 | using TrickyX = TrickyStruct; 96 | 97 | TrickyP x; 98 | for (size_t i = 0; i); 111 | *pslice.ptr = (Test *) 0xdeadbeef; 112 | assert(slice(xl.ptr, 1) == (Test *) 0xdeadbeef); 113 | } 114 | 115 | ENOKI_TEST(test04_gather_custom_struct) { 116 | using FloatP = Packet; 117 | using UInt32P = Packet; 118 | using FloatX = DynamicArray; 119 | using UInt32X = DynamicArray; 120 | using Custom3f = Custom; 121 | using Custom3fP = Custom; 122 | using Custom3fX = Custom; 123 | 124 | Custom3fX z; 125 | z.o.x() = arange(20) + 1.f; 126 | z.o.y() = arange(20) + 100.f; 127 | z.o.z() = arange(20) + 1000.f; 128 | z.d.x() = arange(20) + 2.f; 129 | z.d.y() = arange(20) + 200.f; 130 | z.d.z() = arange(20) + 2000.f; 131 | z.i = arange(20) + 1234u; 132 | 133 | Custom3fP p = gather(z, arange() + 1u); 134 | 135 | assert(p.o.x() == arange() + 2.f); 136 | assert(p.o.y() == arange() + 101.f); 137 | assert(p.o.z() == arange() + 1001.f); 138 | 139 | assert(p.d.x() == arange() + 3.f); 140 | assert(p.d.y() == arange() + 201.f); 141 | assert(p.d.z() == arange() + 2001.f); 142 | 143 | assert(p.i == arange() + 1235u); 144 | 145 | Custom3f s = gather(z, 1u); 146 | 147 | assert(s.o.x() == 2.f); 148 | assert(s.o.y() == 101.f); 149 | assert(s.o.z() == 1001.f); 150 | 151 | assert(s.d.x() == 3.f); 152 | assert(s.d.y() == 201.f); 153 | assert(s.d.z() == 2001.f); 154 | 155 | assert(s.i == 1235u); 156 | } 157 | -------------------------------------------------------------------------------- /tests/explog.cpp: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | ENOKI_TEST_FLOAT(test01_ldexp) { 4 | const Value inf = std::numeric_limits::infinity(); 5 | const Value nan = std::numeric_limits::quiet_NaN(); 6 | 7 | for (int i = -10; i < 10; ++i) { 8 | for (int j = -10; j < 10; ++j) { 9 | T f = T(std::ldexp(Value(i), j)); 10 | T f2 = ldexp(T(Value(i)), T(Value(j))); 11 | assert(f == f2); 12 | } 13 | } 14 | 15 | assert(T(ldexp(T(inf), T(Value(2)))) == T(inf)); 16 | assert(T(ldexp(T(-inf), T(Value(2)))) == T(-inf)); 17 | assert(all(enoki::isnan(ldexp(T(nan), T(Value(2)))))); 18 | } 19 | 20 | // AVX512F frexp() uses slightly different conventions 21 | // It is used by log() where this is not a problem though 22 | ENOKI_TEST_FLOAT(test02_frexp) { 23 | const Value inf = std::numeric_limits::infinity(); 24 | const Value nan = std::numeric_limits::quiet_NaN(); 25 | using int_array_t = enoki::int_array_t; 26 | using Int = typename int_array_t::Value; 27 | 28 | for (int i = -10; i < 10; ++i) { 29 | if (i == 0) 30 | continue; 31 | int e; 32 | Value f = std::frexp(Value(i), &e); 33 | T e2, f2; 34 | std::tie(f2, e2) = frexp(T(Value(i))); 35 | assert(T(Value(e)) == e2 + 1.f); 36 | assert(T(f) == f2); 37 | } 38 | 39 | T e, f; 40 | 41 | std::tie(f, e) = frexp(T(inf)); 42 | assert((std::isinf(f[0]) && !std::isinf(e[0])) || 43 | (std::isinf(e[0]) && !std::isinf(f[0]))); 44 | assert(!std::isnan(f[0]) && !std::isnan(e[0])); 45 | assert(f[0] > 0); 46 | 47 | std::tie(f, e) = frexp(T(-inf)); 48 | assert((std::isinf(f[0]) && !std::isinf(e[0])) || 49 | (std::isinf(e[0]) && !std::isinf(f[0]))); 50 | assert(!std::isnan(f[0]) && !std::isnan(e[0])); 51 | assert(f[0] < 0); 52 | 53 | if (!has_avx512f) { 54 | std::tie(f, e) = frexp(T(+0.f)); 55 | assert((reinterpret_array(f) == int_array_t(memcpy_cast(Value(+0.f))))); 56 | 57 | std::tie(f, e) = frexp(T(-0.f)); 58 | assert((reinterpret_array(f) == int_array_t(memcpy_cast(Value(-0.f))))); 59 | } 60 | 61 | std::tie(f, e) = frexp(T(nan)); 62 | assert(std::isnan(f[0]) || std::isnan(e[0])); 63 | } 64 | 65 | ENOKI_TEST_FLOAT(test03_exp) { 66 | test::probe_accuracy( 67 | [](const T &a) -> T { return exp(a); }, 68 | [](double a) { return std::exp(a); }, 69 | Value(-20), Value(30), 70 | #if defined(ENOKI_X86_AVX512ER) 71 | 27 72 | #else 73 | 3 74 | #endif 75 | ); 76 | 77 | Array x((Value) M_PI); 78 | Array y(x); 79 | assert(exp(x) == exp(y)); 80 | } 81 | 82 | ENOKI_TEST_FLOAT(test04_log) { 83 | test::probe_accuracy( 84 | [](const T &a) -> T { return log(a); }, 85 | [](double a) { return std::log(a); }, 86 | Value(0), Value(2e30), 87 | 2 88 | ); 89 | 90 | Array x((Value) M_PI); 91 | Array y(x); 92 | assert(log(x) == log(y)); 93 | } 94 | 95 | ENOKI_TEST_FLOAT(test05_pow) { 96 | assert(T(abs(pow(T(Value(M_PI)), T(Value(-2))) - 97 | T(Value(0.101321183642338))))[0] < 1e-6f); 98 | } 99 | -------------------------------------------------------------------------------- /tests/histogram.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | test/histogram.cpp -- Test which uses transform_<> to build a histogram 3 | of a set of normally distributed pseudorandom samples 4 | 5 | Enoki is a C++ template library that enables transparent vectorization 6 | of numerical kernels using SIMD instruction sets available on current 7 | processor architectures. 8 | 9 | Copyright (c) 2019 Wenzel Jakob 10 | 11 | All rights reserved. Use of this source code is governed by a BSD-style 12 | license that can be found in the LICENSE file. 13 | */ 14 | 15 | #if defined(NDEBUG) 16 | # undef NDEBUG 17 | #endif 18 | 19 | #include 20 | #include 21 | 22 | using namespace enoki; 23 | 24 | int main(int /* argc */, char * /* argv */[]) { 25 | using UInt32 = Packet; 26 | using UInt32Mask = mask_t; 27 | using RNG = PCG32; 28 | using Float32 = RNG::Float32; 29 | using UInt64 = RNG::UInt64; 30 | 31 | /* Bin configuration */ 32 | const float min_value = -4; 33 | const float max_value = 4; 34 | const uint32_t bin_count = 31; 35 | uint32_t bins[bin_count] { }; 36 | 37 | for (size_t j = 0; j < 16 / UInt32::Size; ++j) { 38 | RNG rng(PCG32_DEFAULT_STATE, arange() + (j * UInt32::Size)); 39 | 40 | for (size_t i = 0; i < 1024 * 1024; ++i) { 41 | /* Generate a uniform variate */ 42 | Float32 x = rng.next_float32(); 43 | 44 | /* Importance sample a normal distribution */ 45 | Float32 y = float(M_SQRT2) * erfinv(2.f*x - 1.f); 46 | 47 | /* Compute bin index */ 48 | UInt32 idx((y - min_value) * float(bin_count) / (max_value - min_value)); 49 | 50 | /* Discard samples that are out of bounds */ 51 | UInt32Mask mask = idx >= zero() && idx < bin_count; 52 | 53 | /* Increment the bin indices */ 54 | scatter_add(bins, UInt32(1), idx, mask); 55 | } 56 | } 57 | 58 | uint32_t sum = 0; 59 | for (uint32_t i = 0; i < bin_count; ++i) { 60 | std::cout << "bin[" << i << "] = "; 61 | for (uint32_t j = 0; j < bins[i] / 50000; ++j) 62 | std::cout << "*"; 63 | std::cout << " " << bins[i] << std::endl; 64 | sum += bins[i]; 65 | } 66 | 67 | #if defined(__aarch64__) 68 | assert(std::abs(int(16 * 1024 * 1024 - sum) - 743) <= 200); 69 | #else 70 | assert(std::abs(int(16 * 1024 * 1024 - sum) - 743) <= 3); 71 | #endif 72 | assert(bins[1] == 2558); 73 | assert(bins[2] == 6380); 74 | 75 | return 0; 76 | } 77 | -------------------------------------------------------------------------------- /tests/hyperbolic.cpp: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | ENOKI_TEST_FLOAT(test01_sinh) { 4 | test::probe_accuracy( 5 | [](const T &a) -> T { return sinh(a); }, 6 | [](double a) { return std::sinh(a); }, 7 | Value(-10), Value(10), 8 | 8 9 | ); 10 | 11 | Array x((Value) 1); 12 | Array y(x); 13 | assert(sinh(x) == sinh(y)); 14 | } 15 | 16 | ENOKI_TEST_FLOAT(test02_cosh) { 17 | test::probe_accuracy( 18 | [](const T &a) -> T { return cosh(a); }, 19 | [](double a) { return std::cosh(a); }, 20 | Value(-10), Value(10), 21 | 8 22 | ); 23 | 24 | Array x((Value) 1); 25 | Array y(x); 26 | assert(cosh(x) == cosh(y)); 27 | } 28 | 29 | ENOKI_TEST_FLOAT(test03_sincosh_sin) { 30 | test::probe_accuracy( 31 | [](const T &a) -> T { return sincosh(a).first; }, 32 | [](double a) { return std::sinh(a); }, 33 | Value(-10), Value(10), 34 | 8 35 | ); 36 | 37 | Array x((Value) 1), s, c; 38 | Array y(x); 39 | auto result = sincosh(y); 40 | #if !defined(_WIN32) 41 | assert(result.first == sinh(y) && result.second == cosh(y)); 42 | #else 43 | assert(all_nested(abs(result.first - sinh(y)) < T(1e-6f)) && 44 | all_nested(abs(result.second - cosh(y)) < T(1e-6f))); 45 | #endif 46 | } 47 | 48 | ENOKI_TEST_FLOAT(test04_sincosh_cos) { 49 | test::probe_accuracy( 50 | [](const T &a) -> T { return sincosh(a).second; }, 51 | [](double a) { return std::cosh(a); }, 52 | Value(-10), Value(10), 53 | 8 54 | ); 55 | } 56 | 57 | ENOKI_TEST_FLOAT(test05_tanh) { 58 | test::probe_accuracy( 59 | [](const T &a) -> T { return tanh(a); }, 60 | [](double a) { return std::tanh(a); }, 61 | Value(-10), Value(10), 62 | 7 63 | ); 64 | 65 | Array x((Value) 1); 66 | Array y(x); 67 | assert(tanh(x) == tanh(y)); 68 | } 69 | 70 | ENOKI_TEST_FLOAT(test06_csch) { 71 | test::probe_accuracy( 72 | [](const T &a) -> T { return csch(a); }, 73 | [](double a) { return 1/std::sinh(a); }, 74 | Value(-10), Value(10), 75 | 8 76 | ); 77 | } 78 | 79 | ENOKI_TEST_FLOAT(test07_sech) { 80 | test::probe_accuracy( 81 | [](const T &a) -> T { return sech(a); }, 82 | [](double a) { return 1/std::cosh(a); }, 83 | Value(-10), Value(10), 84 | 9 85 | ); 86 | } 87 | 88 | ENOKI_TEST_FLOAT(test08_coth) { 89 | test::probe_accuracy( 90 | [](const T &a) -> T { return coth(a); }, 91 | [](double a) { return 1/std::tanh(a); }, 92 | Value(-10), Value(10), 93 | 8 94 | ); 95 | } 96 | 97 | ENOKI_TEST_FLOAT(test09_asinh) { 98 | test::probe_accuracy( 99 | [](const T &a) -> T { return asinh(a); }, 100 | [](double a) { return std::asinh(a); }, 101 | Value(-30), Value(30), 102 | 3 103 | ); 104 | Array x((Value) 2); 105 | Array y(x); 106 | assert(asinh(x) == asinh(y)); 107 | } 108 | 109 | ENOKI_TEST_FLOAT(test11_acosh) { 110 | if (Size == 2 && has_avx512er) 111 | return; /// Skip for KNL, Clang 7 generates an unsupported SKX+ instruction :( 112 | test::probe_accuracy( 113 | [](const T &a) -> T { return acosh(a); }, 114 | [](double a) { return std::acosh(a); }, 115 | Value(1), Value(10), 116 | 5 117 | ); 118 | Array x((Value) 2); 119 | Array y(x); 120 | assert(acosh(x) == acosh(y)); 121 | } 122 | 123 | ENOKI_TEST_FLOAT(test12_atanh) { 124 | test::probe_accuracy( 125 | [](const T &a) -> T { return atanh(a); }, 126 | [](double a) { return std::atanh(a); }, 127 | Value(-1 + 0.001), Value(1 - 0.001), 128 | 3 129 | ); 130 | Array x((Value) 0.5); 131 | Array y(x); 132 | assert(atanh(x) == atanh(y)); 133 | } 134 | 135 | -------------------------------------------------------------------------------- /tests/idiv.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | tests/idiv.cpp -- tests integer division by constants 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #include "test.h" 15 | #include 16 | 17 | #define ITERATIONS 1000000 18 | 19 | ENOKI_TEST(test01_idiv_u64) { 20 | std::mt19937_64 mt; 21 | 22 | for (uint64_t i = 2; i < ITERATIONS; ++i) { 23 | uint64_t x = (uint64_t) mt(), y = (uint64_t) mt(), z = y / x; 24 | 25 | divisor precomp(x); 26 | uint64_t q = precomp(y); 27 | if (q != z) 28 | std::cout << y << " / " << x << " = " << q << " vs " << z 29 | << std::endl; 30 | assert(q == z); 31 | 32 | divisor precomp2(i); 33 | q = precomp2(y); 34 | z = y / i; 35 | if (q != z) 36 | std::cout << y << " / " << i << " = " << q << " vs " << z 37 | << std::endl; 38 | assert(q == z); 39 | } 40 | } 41 | 42 | ENOKI_TEST(test02_idiv_u32) { 43 | std::mt19937 mt; 44 | 45 | for (uint32_t i = 2; i < ITERATIONS; ++i) { 46 | uint32_t x = (uint32_t) mt(), y = (uint32_t) mt(), z = y / x; 47 | 48 | divisor precomp(x); 49 | uint32_t q = precomp(y); 50 | if (q != z) 51 | std::cout << y << " / " << x << " = " << q << " vs " << z 52 | << std::endl; 53 | assert(q == z); 54 | 55 | divisor precomp2(i); 56 | q = precomp2(y); 57 | z = y / i; 58 | if (q != z) 59 | std::cout << y << " / " << i << " = " << q << " vs " << z 60 | << std::endl; 61 | assert(q == z); 62 | } 63 | } 64 | 65 | ENOKI_TEST(test03_idiv_s64) { 66 | std::mt19937_64 mt; 67 | 68 | for (uint64_t i = 2; i < ITERATIONS; ++i) { 69 | int64_t x = (int64_t) mt(), y = (int64_t) mt(), z = y / x; 70 | 71 | divisor precomp(x); 72 | int64_t q = precomp(y); 73 | if (q != z) 74 | std::cout << y << " / " << x << " = " << q << " vs " << z 75 | << std::endl; 76 | assert(q == z); 77 | 78 | divisor precomp2((int64_t) i); 79 | q = precomp2(y); 80 | z = y / (int64_t) i; 81 | if (q != z) 82 | std::cout << y << " / " << i << " = " << q << " vs " << z 83 | << std::endl; 84 | assert(q == z); 85 | 86 | divisor precomp3(-(int64_t) i); 87 | q = precomp3(y); 88 | z = y / -(int64_t) i; 89 | if (q != z) 90 | std::cout << y << " / " << i << " = " << q << " vs " << z 91 | << std::endl; 92 | assert(q == z); 93 | } 94 | } 95 | 96 | ENOKI_TEST(test03_idiv_s32) { 97 | std::mt19937 mt; 98 | 99 | for (uint32_t i = 2; i < ITERATIONS; ++i) { 100 | int32_t x = (int32_t) mt(), y = (int32_t) mt(), z = y / x; 101 | 102 | divisor precomp(x); 103 | int32_t q = precomp(y); 104 | if (q != z) 105 | std::cout << y << " / " << x << " = " << q << " vs " << z 106 | << std::endl; 107 | assert(q == z); 108 | 109 | divisor precomp2((int32_t) i); 110 | q = precomp2(y); 111 | z = y / (int32_t) i; 112 | if (q != z) 113 | std::cout << y << " / " << i << " = " << q << " vs " << z 114 | << std::endl; 115 | assert(q == z); 116 | 117 | divisor precomp3(-(int32_t) i); 118 | q = precomp3(y); 119 | z = y / -(int32_t) i; 120 | if (q != z) 121 | std::cout << y << " / " << i << " = " << q << " vs " << z 122 | << std::endl; 123 | assert(q == z); 124 | } 125 | } 126 | 127 | ENOKI_TEST_INT(test04_idiv_vector) { 128 | std::mt19937_64 mt; 129 | for (Value i = 2; i < 1000; ++i) { 130 | Value x = (Value) mt(), y = (Value) mt(); 131 | assert((T(y) / x)[0] == y / x); 132 | assert((T(y) / i)[0] == y / i); 133 | assert((T(y) % x)[0] == y % x); 134 | assert((T(y) % i)[0] == y % i); 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /tests/morton.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | tests/morton.cpp -- tests Morton/Z-order curve encoding and decoding 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #include "test.h" 15 | #include 16 | 17 | ENOKI_TEST(test01_morton_u32_2d_scalar) { 18 | using T = uint32_t; 19 | using T2 = enoki::Array; 20 | 21 | T2 value = T2(123u, 456u); 22 | T value2 = morton_encode(value); 23 | T2 value3 = morton_decode(value2); 24 | 25 | assert(value == value3); 26 | } 27 | 28 | ENOKI_TEST(test02_morton_u32_2d_array) { 29 | using T = enoki::Array; 30 | using T2 = enoki::Array; 31 | 32 | T2 value = T2(123u, 456u); 33 | T value2 = morton_encode(value); 34 | T2 value3 = morton_decode(value2); 35 | 36 | assert(value == value3); 37 | } 38 | 39 | ENOKI_TEST(test03_morton_u32_3d_scalar) { 40 | using T = uint32_t; 41 | using T2 = enoki::Array; 42 | 43 | T2 value = T2(123u, 456u, 789u); 44 | T value2 = morton_encode(value); 45 | T2 value3 = morton_decode(value2); 46 | 47 | assert(value == value3); 48 | } 49 | 50 | ENOKI_TEST(test04_morton_u32_3d_array) { 51 | using T = enoki::Array; 52 | using T2 = enoki::Array; 53 | 54 | T2 value = T2(123u, 456u, 789u); 55 | T value2 = morton_encode(value); 56 | T2 value3 = morton_decode(value2); 57 | 58 | assert(value == value3); 59 | } 60 | 61 | ENOKI_TEST(test05_morton_u64_2d_scalar) { 62 | using T = uint64_t; 63 | using T2 = enoki::Array; 64 | 65 | T2 value = T2(123u, 456u); 66 | T value2 = morton_encode(value); 67 | T2 value3 = morton_decode(value2); 68 | 69 | assert(value == value3); 70 | } 71 | 72 | ENOKI_TEST(test06_morton_u64_2d_array) { 73 | using T = enoki::Array; 74 | using T2 = enoki::Array; 75 | 76 | T2 value = T2(123u, 456u); 77 | T value2 = morton_encode(value); 78 | T2 value3 = morton_decode(value2); 79 | 80 | assert(value == value3); 81 | } 82 | 83 | ENOKI_TEST(test07_morton_u64_3d_scalar) { 84 | using T = uint64_t; 85 | using T2 = enoki::Array; 86 | 87 | T2 value = T2(123u, 456u, 789u); 88 | T value2 = morton_encode(value); 89 | T2 value3 = morton_decode(value2); 90 | 91 | assert(value == value3); 92 | } 93 | 94 | ENOKI_TEST(test08_morton_u64_3d_array) { 95 | using T = enoki::Array; 96 | using T2 = enoki::Array; 97 | 98 | T2 value = T2(123u, 456u, 789u); 99 | T value2 = morton_encode(value); 100 | T2 value3 = morton_decode(value2); 101 | 102 | assert(value == value3); 103 | } 104 | -------------------------------------------------------------------------------- /tests/nested.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | tests/nested.cpp -- tests nested arrays and other fancy scalar types 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #include "test.h" 15 | 16 | ENOKI_TEST(test01_string) { /* Arrays can be instantiated for all sorts of types */ 17 | Array v1("Hello ", " How are "); 18 | Array v2("world!", "you?"); 19 | 20 | assert(v1.x() == "Hello "); 21 | assert(to_string(v1) == "[Hello , How are ]"); 22 | assert(to_string(v1 + v2) == "[Hello world!, How are you?]"); 23 | assert(hsum(v1 + v2) == "Hello world! How are you?"); 24 | assert(hsum(v1 + std::string("you!")) == "Hello you! How are you!"); 25 | } 26 | 27 | ENOKI_TEST(test02_float_array) { 28 | /* Value initialization */ 29 | Array a(1.f); 30 | assert(to_string(a) == "[1, 1, 1, 1]"); 31 | 32 | /* Value initialization */ 33 | Array b(1.f, 2.f, 3.f, 4.f); 34 | assert(to_string(b) == "[1, 2, 3, 4]"); 35 | assert(b.x() == 1.f && b.y() == 2.f && b.z() == 3.f && b.w() == 4.f); 36 | 37 | /* Copy initialization */ 38 | Array c(b); 39 | assert(to_string(c) == "[1, 2, 3, 4]"); 40 | 41 | /* Operations involving scalars (left) */ 42 | assert(to_string(c + 1.f) == "[2, 3, 4, 5]"); 43 | 44 | /* Operations involving scalars (right) */ 45 | assert(to_string(1.f + c) == "[2, 3, 4, 5]"); 46 | 47 | /* Binary operations */ 48 | assert(to_string(c + c) == "[2, 4, 6, 8]"); 49 | } 50 | 51 | ENOKI_TEST(test03_floatref_array) { 52 | float tmp1 = 1.f; 53 | Array tmp2(1.f, 2.f, 3.f, 4.f); 54 | 55 | /* Value initialization */ 56 | Array a(tmp1, tmp1, tmp1, tmp1); 57 | assert(to_string(a) == "[1, 1, 1, 1]"); 58 | a.x() = 2.f; 59 | assert(to_string(a) == "[2, 2, 2, 2]"); 60 | 61 | /* Reference an existing array */ 62 | Array b(tmp2); 63 | assert(to_string(b) == "[1, 2, 3, 4]"); 64 | assert(to_string(a + b) == "[3, 4, 5, 6]"); 65 | 66 | /* .. and reference it once more */ 67 | Array c(b); 68 | 69 | /* Convert back into a regular array */ 70 | Array d(c); 71 | assert(to_string(d) == "[1, 2, 3, 4]"); 72 | 73 | /* Operations involving scalars (left) */ 74 | assert(to_string(c + 1.f) == "[2, 3, 4, 5]"); 75 | 76 | /* Operations involving scalars (right) */ 77 | assert(to_string(1.f + c) == "[2, 3, 4, 5]"); 78 | 79 | /* Binary operations */ 80 | assert(to_string(c + c) == "[2, 4, 6, 8]"); 81 | assert(to_string(d + c) == "[2, 4, 6, 8]"); 82 | assert(to_string(c + d) == "[2, 4, 6, 8]"); 83 | 84 | c += c; c += d; c += 1.f; 85 | 86 | assert(to_string(c) == "[4, 7, 10, 13]"); 87 | } 88 | 89 | ENOKI_TEST(test04_array_of_arrays) { 90 | using Vector4f = Array; 91 | using Vector4fP = Array; 92 | 93 | Vector4f a(1, 2, 3, 4); 94 | Vector4f b(1, 1, 1, 1); 95 | Vector4fP c(a, b); 96 | 97 | assert(to_string(c) == "[[1, 1],\n [2, 1],\n [3, 1],\n [4, 1]]"); 98 | assert(to_string(c + c) == "[[2, 2],\n [4, 2],\n [6, 2],\n [8, 2]]"); 99 | assert(to_string(c + c.x()) == "[[2, 2],\n [4, 3],\n [6, 4],\n [8, 5]]"); 100 | assert(to_string(c + 1.f) == "[[2, 2],\n [3, 2],\n [4, 2],\n [5, 2]]"); 101 | assert(to_string(1.f + c) == "[[2, 2],\n [3, 2],\n [4, 2],\n [5, 2]]"); 102 | 103 | assert((std::is_same, Vector4f>::value)); 104 | assert((std::is_same, float>::value)); 105 | } 106 | 107 | ENOKI_TEST(test05_mask_types) { 108 | assert((std::is_same, bool>::value)); 109 | assert((std::is_same, float>::value)); 110 | assert((std::is_same>, float>::value)); 111 | } 112 | 113 | ENOKI_TEST(test06_nested_reductions) { 114 | using FloatP = Array; 115 | using IntP = Array; 116 | using Vector3fP = Array; 117 | 118 | auto my_all = [](Vector3fP x) { return all(x > 4.f); }; 119 | auto my_none = [](Vector3fP x) { return none(x > 4.f); }; 120 | auto my_any = [](Vector3fP x) { return any(x > 4.f); }; 121 | auto my_count = [](Vector3fP x) { return count(x > 4.f); }; 122 | 123 | auto my_all_nested = [](Vector3fP x) { return all_nested(x > 4.f); }; 124 | auto my_none_nested = [](Vector3fP x) { return none_nested(x > 4.f); }; 125 | auto my_any_nested = [](Vector3fP x) { return any_nested(x > 4.f); }; 126 | auto my_count_nested = [](Vector3fP x) { return count_nested(x > 4.f); }; 127 | 128 | auto data = 129 | Vector3fP(arange() + 0.f, arange() + 1.f, 130 | arange() + 2.f); 131 | 132 | auto to_string = [](auto value) { 133 | std::ostringstream oss; 134 | detail::print(oss, value, false, shape(value)); 135 | return oss.str(); 136 | }; 137 | 138 | auto str = [&](auto x) { 139 | return to_string(select(reinterpret_array>(x), IntP(1), IntP(0))); 140 | }; 141 | 142 | assert(str(my_all(data)) == "[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"); 143 | assert(str(my_none(data)) == "[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"); 144 | assert(str(my_any(data)) == "[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"); 145 | assert(to_string(my_count(data)) == "[0, 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]"); 146 | assert(!my_all_nested(data)); 147 | assert(!my_none_nested(data)); 148 | assert(my_any_nested(data)); 149 | assert(my_count_nested(data) == 36); 150 | } 151 | -------------------------------------------------------------------------------- /tests/python/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace enoki; 6 | 7 | Array a1() { 8 | auto result = Array(1, 2, 3, 4); 9 | std::cout << result << std::endl; 10 | return result; 11 | } 12 | 13 | Array, 4> a2() { 14 | auto result = Array, 4>(1, 2, 3, 4); 15 | std::cout << result << std::endl; 16 | return result; 17 | } 18 | 19 | void a3(Array value) { 20 | std::cout << value << std::endl; 21 | } 22 | 23 | void a4(Array, 4> value) { 24 | std::cout << value << std::endl; 25 | } 26 | 27 | Complex c1() { 28 | auto result = Complex(1, 2); 29 | std::cout << result << std::endl; 30 | return result; 31 | } 32 | 33 | Complex> c2() { 34 | auto result = Complex>(1, 2); 35 | std::cout << result << std::endl; 36 | return result; 37 | } 38 | 39 | Complex, 4>> c2_b() { 40 | auto result = Complex, 4>>(1.f, 2.f); 41 | std::cout << result << std::endl; 42 | return result; 43 | } 44 | 45 | 46 | void c3(Complex value) { 47 | std::cout << value << std::endl; 48 | } 49 | 50 | void c4(Complex> value) { 51 | std::cout << value << std::endl; 52 | } 53 | 54 | template Float atan(Float x) { 55 | return enoki::atan(x); 56 | } 57 | 58 | PYBIND11_MODULE(test, m) { 59 | /* Real */ 60 | m.def("a1", &a1); 61 | m.def("a2", &a2); 62 | m.def("a3", &a3); 63 | m.def("a4", &a4); 64 | 65 | /* Complex */ 66 | m.def("c1", &c1); 67 | m.def("c2", &c2); 68 | m.def("c2_b", &c2_b); 69 | m.def("c3", &c3); 70 | m.def("c4", &c4); 71 | 72 | using FloatP = Packet; 73 | m.def("atan", enoki::vectorize_wrapper(&atan)); 74 | } 75 | 76 | -------------------------------------------------------------------------------- /tests/python/test_pytorch.py: -------------------------------------------------------------------------------- 1 | import enoki as ek 2 | import numpy as np 3 | import pytest 4 | import torch 5 | 6 | class EnokiAtan2(torch.autograd.Function): 7 | """PyTorch function example from the documentation.""" 8 | @staticmethod 9 | def forward(ctx, arg1, arg2): 10 | ctx.in1 = ek.FloatD(arg1) 11 | ctx.in2 = ek.FloatD(arg2) 12 | ek.set_requires_gradient(ctx.in1, arg1.requires_grad) 13 | ek.set_requires_gradient(ctx.in2, arg2.requires_grad) 14 | ctx.out = ek.atan2(ctx.in1, ctx.in2) 15 | out_torch = ctx.out.torch() 16 | ek.cuda_malloc_trim() 17 | return out_torch 18 | 19 | @staticmethod 20 | def backward(ctx, grad_out): 21 | ek.set_gradient(ctx.out, ek.FloatC(grad_out)) 22 | ek.FloatD.backward() 23 | result = (ek.gradient(ctx.in1).torch() 24 | if ek.requires_gradient(ctx.in1) else None, 25 | ek.gradient(ctx.in2).torch() 26 | if ek.requires_gradient(ctx.in2) else None) 27 | del ctx.out, ctx.in1, ctx.in2 28 | ek.cuda_malloc_trim() 29 | return result 30 | 31 | 32 | def test01_set_gradient(): 33 | a = ek.FloatD(42, 10) 34 | ek.set_requires_gradient(a) 35 | 36 | with pytest.raises(TypeError): 37 | grad = ek.FloatD(-1, 10) 38 | ek.set_gradient(a, grad) 39 | 40 | grad = ek.FloatC(-1, 10) 41 | ek.set_gradient(a, grad) 42 | assert np.allclose(grad.numpy(), ek.gradient(a).numpy()) 43 | 44 | # Note: if `backward` is not called here, test03 segfaults later. 45 | # TODO: we should not need this, there's most likely some missing cleanup when `a` is destructed 46 | ek.FloatD.backward() 47 | del a, grad 48 | 49 | 50 | def test02_array_to_torch(): 51 | a = ek.FloatD(42, 10) 52 | a_torch = a.torch() 53 | assert isinstance(a_torch, torch.Tensor) 54 | a_torch += 8 55 | a_np = a_torch.cpu().numpy() 56 | assert isinstance(a_np, np.ndarray) 57 | assert np.allclose(a_np, 50) 58 | 59 | 60 | def test03_pytorch_function(): 61 | enoki_atan2 = EnokiAtan2.apply 62 | 63 | y = torch.tensor(1.0, device='cuda') 64 | x = torch.tensor(2.0, device='cuda') 65 | y.requires_grad_() 66 | x.requires_grad_() 67 | 68 | o = enoki_atan2(y, x) 69 | o.backward() 70 | assert np.allclose(y.grad.cpu(), 0.4) 71 | assert np.allclose(x.grad.cpu(), -0.2) 72 | -------------------------------------------------------------------------------- /tests/ray.h: -------------------------------------------------------------------------------- 1 | /* 2 | tests/ray.h -- showcases how to extend Enoki vectorization to custom 3 | data types 4 | 5 | Enoki is a C++ template library that enables transparent vectorization 6 | of numerical kernels using SIMD instruction sets available on current 7 | processor architectures. 8 | 9 | Copyright (c) 2018 Wenzel Jakob 10 | 11 | All rights reserved. Use of this source code is governed by a BSD-style 12 | license that can be found in the LICENSE file. 13 | */ 14 | 15 | #include 16 | 17 | using namespace enoki; 18 | 19 | /** 20 | * Generic 3D ray class: can represent either a single ray, a static ray 21 | * bundle, or a dynamic heap-allocated bundle of rays 22 | */ 23 | template struct Ray { 24 | using Vector = Vector_; 25 | using Value = value_t; 26 | 27 | Vector o; 28 | Vector d; 29 | 30 | /// Compute a position along a ray 31 | Vector operator()(const Value &t) const { return o + t*d; } 32 | 33 | ENOKI_STRUCT(Ray, o, d) 34 | }; 35 | 36 | ENOKI_STRUCT_SUPPORT(Ray, o, d) 37 | 38 | -------------------------------------------------------------------------------- /tests/sh.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | tests/sh.cpp -- tests spherical harmonics evaluation code 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #include "test.h" 15 | #include 16 | 17 | ENOKI_TEST(test00_sh) { 18 | using T = Array; 19 | 20 | /* From mathematica */ 21 | const float ref[100] = { 22 | 0.2820947918f, -0.2611690283f, 0.3917535424f, -0.1305845141f, 23 | 0.1560783472f, -0.4682350417f, 0.2928635963f, -0.2341175208f, 24 | -0.1170587604f, 0.02252796895f, 0.3310921732f, -0.5409527810f, 25 | 0.06411572364f, -0.2704763905f, -0.2483191299f, 0.1239038292f, 26 | -0.07663294720f, 0.05418767663f, 0.4730873479f, -0.4301013494f, 27 | -0.1926808176f, -0.2150506747f, -0.3548155109f, 0.2980322214f, 28 | -0.02235127627f, 0.03401106316f, -0.2037835426f, 0.08939333855f, 29 | 0.5098360937f, -0.1642890433f, -0.3717265730f, -0.08214452164f, 30 | -0.3823770702f, 0.4916633621f, -0.05943686658f, -0.03669614710f, 31 | 0.01095484717f, 0.09832164158f, -0.3751138475f, 0.1148149412f, 32 | 0.4035308753f, 0.1617920839f, -0.4010373398f, 0.08089604195f, 33 | -0.3026481565f, 0.6314821768f, -0.1094082055f, -0.1060838764f, 34 | 0.02912993451f, -0.01914767451f, 0.03401803232f, 0.1978196393f, 35 | -0.5458487747f, 0.1174915463f, 0.1714596820f, 0.4252852218f, 36 | -0.2693765472f, 0.2126426109f, -0.1285947615f, 0.6462035049f, 37 | -0.1592058926f, -0.2134369792f, 0.09045704049f, -0.001997419283f, 38 | 0.006375451838f, -0.06329912897f, 0.07377498039f, 0.3173673505f, 39 | -0.6530654560f, 0.09087000611f, -0.1162228838f, 0.5245254059f, 40 | -0.02984068370f, 0.2622627030f, 0.08716716288f, 0.4997850336f, 41 | -0.1904774247f, -0.3424226677f, 0.1961743797f, -0.006603146547f, 42 | -0.009999592615f, 0.003740870431f, 0.02228152988f, -0.1464888644f, 43 | 0.1282747834f, 0.4253232566f, -0.6393164269f, 0.03874458847f, 44 | -0.3611982142f, 0.4202921001f, 0.2217097319f, 0.2101460500f, 45 | 0.2708986607f, 0.2130952366f, -0.1864672912f, -0.4589014084f, 46 | 0.3410943104f, -0.01528121247f, -0.03494751858f, 0.006246941013f 47 | }; 48 | 49 | using Vector3f = Array; 50 | using Scalar = scalar_t; 51 | 52 | Vector3f d = normalize(Vector3f(Scalar(1), Scalar(2), Scalar(3))); 53 | 54 | T out[100]; 55 | for (size_t i = 0; i < 10; ++i) { 56 | sh_eval(d, i, out); 57 | for (size_t j = 0; j<(i+1)*(i+1); ++j) { 58 | assert(std::abs(out[j].coeff(0) - ref[j]) < 5e-6f); 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /tests/sphere.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | tests/sphere.cpp -- a simple sphere ray tracer 3 | 4 | Enoki is a C++ template library that enables transparent vectorization 5 | of numerical kernels using SIMD instruction sets available on current 6 | processor architectures. 7 | 8 | Copyright (c) 2019 Wenzel Jakob 9 | 10 | All rights reserved. Use of this source code is governed by a BSD-style 11 | license that can be found in the LICENSE file. 12 | */ 13 | 14 | #if defined(_MSC_VER) 15 | # pragma warning(disable: 4723) /// potential divide by 0 16 | #endif 17 | 18 | #include 19 | #include 20 | #include "ray.h" 21 | #include 22 | 23 | // ----------------------------------------------------------------------- 24 | //! @{ \name Convenient type aliases 25 | // ----------------------------------------------------------------------- 26 | 27 | /* Floats and packets of floats */ 28 | using FloatP = Array; 29 | using FloatX = DynamicArray; 30 | 31 | /* 2D vectors and static/dynamic packets of 2D vectors */ 32 | using Vector2f = Array; 33 | using Vector2fP = Array; 34 | using Vector2fX = Array; 35 | 36 | /* 3D vectors and static/dynamic packets of 3D vectors */ 37 | using Vector3f = Array; 38 | using Vector3fP = Array; 39 | using Vector3fX = Array; 40 | 41 | /* rays and static/dynamic packets of rays */ 42 | using Ray3f = Ray; 43 | using Ray3fP = Ray; 44 | using Ray3fX = Ray; 45 | 46 | /* Aliases to create types that are compatible with other type */ 47 | template using vector3f_t = Array, 3>; 48 | template using ray3f_t = Ray>; 49 | 50 | //! @} 51 | // ----------------------------------------------------------------------- 52 | 53 | // ----------------------------------------------------------------------- 54 | //! @{ \name Computational kernels of the ray tracer 55 | // ----------------------------------------------------------------------- 56 | 57 | /// "Sensor": trace rays for a given X and Y coordinate 58 | template ray3f_t make_rays(const Vector2f &p) { 59 | using Vector3f = vector3f_t; 60 | using Ray3f = ray3f_t; 61 | 62 | return Ray3f(Vector3f(p.x(), p.y(), -1.f), 63 | Vector3f(0.f, 0.f, 1.f)); 64 | } 65 | 66 | /// "Shape": intersect against sphere 67 | template ENOKI_INLINE typename Ray::Vector intersect_rays(const Ray &r) { 68 | /* Coefficients of quadratic */ 69 | auto a = dot(r.d, r.d); 70 | auto b = 2.f * dot(r.o, r.d); 71 | auto c = dot(r.o, r.o) - 1.f; 72 | 73 | /* Solve quadratic equation */ 74 | auto discrim = b*b - 4.f*a*c; 75 | auto t = (-b + sqrt(discrim)) / (2.f * a); 76 | 77 | return select(discrim >= 0.f, r(t), 0.f); 78 | } 79 | 80 | /// "Shader": directional illumination 81 | template ENOKI_INLINE typename Vector3f::Value shade_hits(Vector3f n) { 82 | return 0.2f + max(dot(n, Vector3f(-1.f, -1.f, 2.f)), 0.f) * 90.f; 83 | } 84 | 85 | /// All three kernels combined into one 86 | template ENOKI_INLINE typename Vector2::Value combined(Vector2 n) { 87 | return shade_hits(intersect_rays(make_rays(n))); 88 | } 89 | 90 | //! @} 91 | // ----------------------------------------------------------------------- 92 | 93 | // ----------------------------------------------------------------------- 94 | //! @{ \name Wrappers which execute the above kernels for dynamic arrays 95 | // ----------------------------------------------------------------------- 96 | 97 | Ray3fX make_rays_dynamic(const Vector2fX &p) { 98 | return vectorize([](auto &&p) { return make_rays(p); }, p); 99 | } 100 | 101 | Vector3fX intersect_rays_dynamic(const Ray3fX &r) { 102 | return vectorize([](auto &&r) { return intersect_rays(r); }, r); 103 | } 104 | 105 | FloatX shade_hits_dynamic(const Vector3fX &n) { 106 | return vectorize([](auto &&n) { return shade_hits(n); }, n); 107 | } 108 | 109 | FloatX combined_dynamic(const Vector2fX &p) { 110 | return vectorize([](auto &&p) { return combined(p); }, p); 111 | } 112 | 113 | //! @} 114 | // ----------------------------------------------------------------------- 115 | 116 | auto clk() { return std::chrono::high_resolution_clock::now(); } 117 | 118 | template float clkdiff(T a, T b) { 119 | return std::chrono::duration(b - a).count() * 1000; 120 | } 121 | 122 | void write_image(const std::string &filename, const FloatX &image) { 123 | std::ofstream os(filename); 124 | os << "P3\n1024 1024\n255\n"; 125 | for (float v : image) 126 | os << (int) v << " " << (int) v << " " << (int) v << "\n"; 127 | } 128 | 129 | int main(int /* argc */, char ** /* argv */) { 130 | auto idx = linspace(-1.2f, 1.2f, 1024); 131 | auto grid = meshgrid(idx, idx); 132 | 133 | /* benchmark1 */ { 134 | auto time_start = clk(); 135 | Ray3fX rays = make_rays_dynamic(grid); 136 | Vector3fX hits = intersect_rays_dynamic(rays); 137 | FloatX image = shade_hits_dynamic(hits); 138 | auto time_end = clk(); 139 | std::cerr << "Separate kernels: " << clkdiff(time_start, time_end) << " ms " << std::endl; 140 | write_image("sphere1.ppm", image); 141 | } 142 | 143 | /* benchmark2 */ { 144 | auto time_start = clk(); 145 | FloatX image = combined_dynamic(grid); 146 | auto time_end = clk(); 147 | std::cerr << "Combined kernels: " << clkdiff(time_start, time_end) << " ms " << std::endl; 148 | write_image("sphere2.ppm", image); 149 | } 150 | 151 | return 0; 152 | } 153 | -------------------------------------------------------------------------------- /tests/trig.cpp: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | ENOKI_TEST_FLOAT(test01_sin) { 4 | test::probe_accuracy( 5 | [](const T &a) -> T { return sin(a); }, 6 | [](double a) { return std::sin(a); }, 7 | Value(-8192), Value(8192), 8 | 19 9 | ); 10 | 11 | Array x((Value) M_PI); 12 | Array y(x); 13 | assert(sin(x) == sin(y)); 14 | } 15 | 16 | ENOKI_TEST_FLOAT(test02_cos) { 17 | test::probe_accuracy( 18 | [](const T &a) -> T { return cos(a); }, 19 | [](double a) { return std::cos(a); }, 20 | Value(-8192), Value(8192), 21 | 47 22 | ); 23 | 24 | Array x((Value) M_PI); 25 | Array y(x); 26 | assert(cos(x) == cos(y)); 27 | } 28 | 29 | ENOKI_TEST_FLOAT(test03_sincos_sin) { 30 | test::probe_accuracy( 31 | [](const T &a) -> T { return sincos(a).first; }, 32 | [](double a) { return std::sin(a); }, 33 | Value(-8192), Value(8192), 34 | 19 35 | ); 36 | } 37 | 38 | ENOKI_TEST_FLOAT(test04_sincos_cos) { 39 | test::probe_accuracy( 40 | [](const T &a) -> T { return sincos(a).second; }, 41 | [](double a) { return std::cos(a); }, 42 | Value(-8192), Value(8192), 43 | 47 44 | ); 45 | 46 | Array x((Value) M_PI), s, c; 47 | Array y(x); 48 | auto result = sincos(y); 49 | assert(result.first == sin(y) && result.second == cos(y)); 50 | } 51 | 52 | ENOKI_TEST_FLOAT(test05_tan) { 53 | test::probe_accuracy( 54 | [](const T &a) -> T { return tan(a); }, 55 | [](double a) { return std::tan(a); }, 56 | Value(-8192), Value(8192), 57 | 30 58 | ); 59 | 60 | Array x((Value) M_PI); 61 | Array y(x); 62 | assert(tan(x) == tan(y)); 63 | } 64 | 65 | ENOKI_TEST_FLOAT(test06_cot) { 66 | test::probe_accuracy( 67 | [](const T &a) -> T { return cot(a); }, 68 | [](double a) { return 1.0 / std::tan(a); }, 69 | Value(-8192), Value(8192), 70 | 47 71 | ); 72 | 73 | Array x((Value) M_PI); 74 | Array y(x); 75 | assert(tan(x) == tan(y)); 76 | } 77 | 78 | ENOKI_TEST_FLOAT(test07_asin) { 79 | test::probe_accuracy( 80 | [](const T &a) -> T { return asin(a); }, 81 | [](double a) { return std::asin(a); }, 82 | Value(-1), Value(1), 83 | 61 84 | ); 85 | 86 | Array x((Value) 0.5); 87 | Array y(x); 88 | assert(asin(x) == asin(y)); 89 | } 90 | 91 | ENOKI_TEST_FLOAT(test08_acos) { 92 | test::probe_accuracy( 93 | [](const T &a) -> T { return acos(a); }, 94 | [](double a) { return std::acos(a); }, 95 | Value(-1), Value(1), 96 | 4 97 | ); 98 | 99 | Array x((Value) 0.5); 100 | Array y(x); 101 | assert(acos(x) == acos(y)); 102 | } 103 | 104 | ENOKI_TEST_FLOAT(test09_atan) { 105 | test::probe_accuracy( 106 | [](const T &a) -> T { return atan(a); }, 107 | [](double a) { return std::atan(a); }, 108 | Value(-1), Value(1), 109 | 12 110 | ); 111 | 112 | Array x((Value) 0.5); 113 | Array y(x); 114 | assert(atan(x) == atan(y)); 115 | } 116 | 117 | ENOKI_TEST_FLOAT(test10_atan2) { 118 | for (int ix = 0; ix <= 100; ++ix) { 119 | for (int iy = 0; iy <= 100; ++iy) { 120 | Value x = Value(ix) / Value(100) * 2 - 1; 121 | Value y = Value(iy) / Value(100) * 2 - 1; 122 | T atan2_ = T(atan2(T(y), T(x)))[0]; 123 | Value atan2_ref = std::atan2(y, x); 124 | if (x == 0 || y == 0) 125 | continue; 126 | assert(std::abs(atan2_[0] - atan2_ref) < 3.58e-6f); 127 | } 128 | } 129 | } 130 | 131 | ENOKI_TEST_FLOAT(test11_csc_sec_cot) { 132 | assert(std::abs(T(csc(T(1.f)) - 1 / std::sin(1.f))[0]) < 1e-6f); 133 | assert(std::abs(T(sec(T(1.f)) - 1 / std::cos(1.f))[0]) < 1e-6f); 134 | assert(std::abs(T(cot(T(1.f)) - 1 / std::tan(1.f))[0]) < 1e-6f); 135 | } 136 | 137 | ENOKI_TEST_FLOAT(test12_safe_math) { 138 | #if defined(_MSC_VER) 139 | // MSVC codegen issue :-| 140 | std::cout << abs(safe_asin(T(Value(-10))) - Value(-M_PI / 2)) << std::endl; 141 | #endif 142 | assert(all(abs(safe_asin(T(Value(-10))) - Value(-M_PI / 2)) < 1e-6f)); 143 | assert(all(abs(safe_asin(T(Value( 10))) - Value( M_PI / 2)) < 1e-6f)); 144 | assert(all(abs(safe_acos(T(Value(-10))) - Value(M_PI)) < 1e-6f)); 145 | assert(all(abs(safe_acos(T(Value( 10))) - Value(0)) < 1e-6f)); 146 | assert(all(abs(safe_sqrt(T(Value(4))) - Value(2)) < 1e-6f)); 147 | assert(all(abs(safe_sqrt(T(Value(-1))) - Value(0)) < 1e-6f)); 148 | } 149 | --------------------------------------------------------------------------------