├── .appveyor.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── docs
    ├── _static
    │   └── theme_overrides.css
    ├── _templates
    │   └── page.html
    ├── advanced-01.svg
    ├── advanced-02.svg
    ├── advanced-03.svg
    ├── advanced.rst
    ├── autodiff-01.svg
    ├── autodiff-02.svg
    ├── autodiff-03.svg
    ├── autodiff-04.svg
    ├── autodiff.rst
    ├── basics-01.svg
    ├── basics-02.svg
    ├── basics-03.svg
    ├── basics-04.svg
    ├── basics.rst
    ├── calls.rst
    ├── changelog.rst
    ├── color.rst
    ├── complex.rst
    ├── conf.py
    ├── custom.rst
    ├── demo.rst
    ├── dynamic-01.svg
    ├── dynamic-02.svg
    ├── dynamic-03.svg
    ├── dynamic-04.svg
    ├── dynamic-05.svg
    ├── dynamic-06.svg
    ├── dynamic.rst
    ├── enoki-logo.png
    ├── enoki-logo.svg
    ├── gpu.rst
    ├── half.rst
    ├── index.rst
    ├── intro-01.png
    ├── intro-01.svg
    ├── intro-02.png
    ├── intro-02.svg
    ├── intro-03.png
    ├── intro.rst
    ├── matrix.rst
    ├── morton-01.svg
    ├── morton.rst
    ├── nested-01.svg
    ├── nested-02.svg
    ├── nested-03.svg
    ├── nested-04.svg
    ├── nested-05.svg
    ├── nested.rst
    ├── python.rst
    ├── quaternions.rst
    ├── random.rst
    ├── reference.rst
    ├── requirements.txt
    ├── sh.rst
    ├── stl.rst
    └── transform.rst
├── include
    └── enoki
    │   ├── array.h
    │   ├── array_avx.h
    │   ├── array_avx2.h
    │   ├── array_avx512.h
    │   ├── array_base.h
    │   ├── array_call.h
    │   ├── array_enum.h
    │   ├── array_fallbacks.h
    │   ├── array_generic.h
    │   ├── array_idiv.h
    │   ├── array_intrin.h
    │   ├── array_kmask.h
    │   ├── array_macro.h
    │   ├── array_masked.h
    │   ├── array_math.h
    │   ├── array_neon.h
    │   ├── array_recursive.h
    │   ├── array_round.h
    │   ├── array_router.h
    │   ├── array_sse42.h
    │   ├── array_static.h
    │   ├── array_struct.h
    │   ├── array_traits.h
    │   ├── array_utils.h
    │   ├── autodiff.h
    │   ├── color.h
    │   ├── complex.h
    │   ├── cuda.h
    │   ├── dynamic.h
    │   ├── fwd.h
    │   ├── half.h
    │   ├── matrix.h
    │   ├── morton.h
    │   ├── python.h
    │   ├── quaternion.h
    │   ├── random.h
    │   ├── sh.h
    │   ├── special.h
    │   ├── stl.h
    │   └── transform.h
├── resources
    ├── FindSphinx.cmake
    ├── __init__.py
    ├── archflags_unix.cpp
    ├── archflags_win32.cpp
    ├── check-style.sh
    ├── enoki_gdb.py
    └── enoki_lldb.py
├── src
    ├── autodiff
    │   └── autodiff.cpp
    ├── cuda
    │   ├── common.cu
    │   ├── common.cuh
    │   ├── horiz.cu
    │   └── jit.cu
    └── python
    │   ├── common.h
    │   ├── complex.h
    │   ├── cuda.cpp
    │   ├── cuda_0d.cpp
    │   ├── cuda_1d.cpp
    │   ├── cuda_2d.cpp
    │   ├── cuda_3d.cpp
    │   ├── cuda_4d.cpp
    │   ├── cuda_autodiff.cpp
    │   ├── cuda_autodiff_0d.cpp
    │   ├── cuda_autodiff_1d.cpp
    │   ├── cuda_autodiff_2d.cpp
    │   ├── cuda_autodiff_3d.cpp
    │   ├── cuda_autodiff_4d.cpp
    │   ├── cuda_autodiff_complex.cpp
    │   ├── cuda_autodiff_matrix.cpp
    │   ├── cuda_complex.cpp
    │   ├── cuda_matrix.cpp
    │   ├── cuda_pcg32.cpp
    │   ├── docstr.h
    │   ├── dynamic.cpp
    │   ├── dynamic_0d.cpp
    │   ├── dynamic_1d.cpp
    │   ├── dynamic_2d.cpp
    │   ├── dynamic_3d.cpp
    │   ├── dynamic_4d.cpp
    │   ├── dynamic_complex.cpp
    │   ├── dynamic_matrix.cpp
    │   ├── dynamic_pcg32.cpp
    │   ├── main.cpp
    │   ├── matrix.h
    │   ├── quat.h
    │   ├── random.h
    │   ├── scalar.cpp
    │   ├── scalar_0d.cpp
    │   ├── scalar_1d.cpp
    │   ├── scalar_2d.cpp
    │   ├── scalar_3d.cpp
    │   ├── scalar_4d.cpp
    │   ├── scalar_complex.cpp
    │   ├── scalar_matrix.cpp
    │   ├── scalar_pcg32.cpp
    │   └── scalar_quat.cpp
└── tests
    ├── CMakeLists.txt
    ├── autodiff.cpp
    ├── basic.cpp
    ├── call.cpp
    ├── color.cpp
    ├── complex.cpp
    ├── conv.cpp
    ├── custom.cpp
    ├── dynamic.cpp
    ├── explog.cpp
    ├── float.cpp
    ├── histogram.cpp
    ├── horiz.cpp
    ├── hyperbolic.cpp
    ├── idiv.cpp
    ├── integer.cpp
    ├── memory.cpp
    ├── memory2.cpp
    ├── morton.cpp
    ├── nested.cpp
    ├── python
        ├── test.cpp
        └── test_pytorch.py
    ├── ray.h
    ├── sh.cpp
    ├── special.cpp
    ├── sphere.cpp
    ├── test.h
    ├── trig.cpp
    └── vector.cpp


/.appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: 1.0.{build}
 2 | image:
 3 | - Visual Studio 2017
 4 | test: off
 5 | skip_branch_with_pr: true
 6 | platform:
 7 | - x64
 8 | matrix:
 9 |   fast_finish: true
10 | configuration:
11 |  - Debug
12 |  - Release
13 | environment:
14 |   matrix:
15 |     - TEST_NAME: basic
16 |     - TEST_NAME: call
17 |     - TEST_NAME: color
18 |     - TEST_NAME: complex
19 |     - TEST_NAME: conv
20 |     - TEST_NAME: custom
21 |     - TEST_NAME: dynamic
22 |     - TEST_NAME: explog
23 |     - TEST_NAME: float
24 |     - TEST_NAME: histogram
25 |     - TEST_NAME: horiz
26 |     - TEST_NAME: hyperbolic
27 |     - TEST_NAME: idiv
28 |     - TEST_NAME: integer
29 |     - TEST_NAME: memory
30 |     - TEST_NAME: memory2
31 |     - TEST_NAME: morton
32 |     - TEST_NAME: nested
33 |     - TEST_NAME: sh
34 |     - TEST_NAME: special
35 |     - TEST_NAME: sphere
36 |     - TEST_NAME: trig
37 |     - TEST_NAME: vector
38 | build:
39 |   parallel:true
40 | build_script:
41 | - set preferredToolArchitecture=x64
42 | - cmake -G "Visual Studio 15 2017" -A "x64" -DCMAKE_SUPPRESS_REGENERATION=1 -DENOKI_TEST=1 -DENOKI_TEST_NAME=%TEST_NAME% .
43 | - set MSBuildLogger="C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
44 | - cmake --build . --config %CONFIGURATION% -- /v:m /m /logger:%MSBuildLogger%
45 | - ctest
46 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # CMake
 2 | /CMakeCache.txt
 3 | /CPackConfig.cmake
 4 | /CPackSourceConfig.cmake
 5 | CMakeFiles
 6 | cmake_install.cmake
 7 | CTestTestfile.cmake
 8 | CPackConfig.cmake
 9 | CPackSourceConfig.cmake
10 | Makefile
11 | 
12 | # Ninja
13 | .ninja_deps
14 | .ninja_log
15 | *.ninja
16 | 
17 | # Miscellaneous;
18 | /build*
19 | /Testing
20 | /tests/Testing
21 | *.dir
22 | *~
23 | \.DS_Store
24 | *.dSYM
25 | 
26 | # Visual Studio
27 | *.vcxproj
28 | enoki.sdf
29 | enoki.sln
30 | enoki.opensdf
31 | enoki.VC.VC.opendb
32 | enoki.VC.db
33 | Debug
34 | Release
35 | *.filters
36 | /.vs
37 | /x64
38 | 
39 | # Visual Studio Code
40 | /.vscode
41 | 
42 | # Build products
43 | *_none
44 | *_sse42
45 | *_avx
46 | *_avx2
47 | *_avx512_knl
48 | *_avx512_skx
49 | *_neon
50 | *.ppm
51 | libenoki-cuda.so
52 | libenoki-cuda.dylib
53 | enoki-cuda.dll
54 | libenoki-autodiff.so
55 | libenoki-autodiff.dylib
56 | enoki-autodiff.dll
57 | enoki.cpython*
58 | /enoki
59 | compile_commands.json
60 | \.clangd
61 | 
62 | # Documentation
63 | /html
64 | /docs/.build
65 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "ext/cub"]
2 | 	path = ext/cub
3 | 	url = https://github.com/NVlabs/cub
4 | 
5 | [submodule "ext/pybind11"]
6 |     path = ext/pybind11
7 |     url = https://github.com/pybind/pybind11
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 | 1. Redistributions of source code must retain the above copyright notice, this
 7 |    list of conditions and the following disclaimer.
 8 | 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution.
12 | 
13 | 3. Neither the name of the copyright holder nor the names of its contributors
14 |    may be used to endorse or promote products derived from this software
15 |    without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | You are under no obligation whatsoever to provide any bug fixes, patches, or
29 | upgrades to the features, functionality or performance of the source code
30 | ("Enhancements") to anyone; however, if you choose to make your Enhancements
31 | available either publicly, or directly to the author of this software, without
32 | imposing a separate written license agreement for such Enhancements, then you
33 | hereby grant the following license: a non-exclusive, royalty-free perpetual
34 | license to install, use, modify, prepare derivative works, incorporate into
35 | other computer software, distribute, and sublicense such enhancements or
36 | derivative works thereof, in binary and source code form.
37 | 


--------------------------------------------------------------------------------
/docs/_static/theme_overrides.css:
--------------------------------------------------------------------------------
 1 | table.docutils td, table.docutils th {
 2 |     padding:.5em;
 3 | }
 4 | 
 5 | table.docutils th {
 6 |     vertical-align:middle;
 7 | }
 8 | 
 9 | .toggle .header {
10 |     display: block;
11 |     clear: both;
12 |     padding-bottom: 1em;
13 | }
14 | 
15 | .toggle .header:after {
16 |     content: " ▼";
17 | }
18 | 
19 | .toggle .header.open:after {
20 |     content: " ▲";
21 | }
22 | 
23 | .MathJax .mo { color: inherit }
24 | .MathJax .mi { color: inherit }
25 | 
26 | div.sphinxsidebar h4 {
27 |     font-family: "Open Sans", Helvetica, Arial, sans-serif;
28 |     font-size: 14px;
29 |     font-weight: bold;
30 |     text-transform: uppercase;
31 |     color: #606060;
32 | }
33 | 
34 | div.sphinxsidebar {
35 |     margin: 1em;
36 | }
37 | 
38 | div.sidebar-wrapper {
39 |     padding:0px;
40 | }
41 | 


--------------------------------------------------------------------------------
/docs/_templates/page.html:
--------------------------------------------------------------------------------
 1 | <!-- Based on https://stackoverflow.com/a/25543713/1130282 -->
 2 | {% extends "!page.html" %}
 3 | 
 4 | {% block footer %}
 5 |  <script type="text/javascript">
 6 |     $(document).ready(function() {
 7 |         $(".toggle > *").hide();
 8 |         $(".toggle .header").show();
 9 |         $(".toggle .header").click(function() {
10 |             $(this).parent().children().not(".header").toggle(400);
11 |             $(this).parent().children(".header").toggleClass("open");
12 |         })
13 |     });
14 | </script>
15 | {% endblock %}
16 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | Change log
2 | ==========
3 | 
4 | Version 0.1.0 (Sep 2, 2019)
5 | -------------------
6 | 
7 | - First public release of Enoki
8 | 


--------------------------------------------------------------------------------
/docs/color.rst:
--------------------------------------------------------------------------------
 1 | .. cpp:namespace:: enoki
 2 | 
 3 | Color space transformations
 4 | ===========================
 5 | 
 6 | Enoki provides a set of helper functions for color space transformations. For
 7 | now, only sRGB and inverse sRGB gamma correction are available. To use them,
 8 | include the following header file:
 9 | 
10 | .. code-block:: cpp
11 | 
12 |     #include <enoki/color.h>
13 | 
14 | 
15 | Functions
16 | *********
17 | 
18 | .. cpp:function:: template <typename Value> Value linear_to_srgb(Value value)
19 | 
20 |     Efficiently applies the sRGB gamma correction
21 | 
22 |     .. math ::
23 | 
24 |         x\mapsto\begin{cases}12.92x,&x\leq 0.0031308\\1.055x^{1/2.4}-0.055,&x>0.0031308\end{cases}
25 | 
26 |     to an input value in the interval :math:`(0, 1)`.
27 | 
28 | .. cpp:function:: template <typename Value> Value srgb_to_linear(Value value)
29 | 
30 |     Efficiently applies the inverse sRGB gamma correction
31 | 
32 |     .. math ::
33 | 
34 |         x\mapsto{\begin{cases}{\frac {x}{12.92}},&x\leq 0.04045\\\left({\frac {x+0.055}{1.055}}\right)^{2.4},&x>0.04045\end{cases}}
35 | 
36 |     to an input value in the interval :math:`(0, 1)`.
37 | 
38 | 


--------------------------------------------------------------------------------
/docs/enoki-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitsuba-renderer/enoki/141cf4bd18eee674841a7c3e3c28f3db44adc6fa/docs/enoki-logo.png


--------------------------------------------------------------------------------
/docs/half.rst:
--------------------------------------------------------------------------------
  1 | .. cpp:namespace:: enoki
  2 | 
  3 | Half-precision floats
  4 | =====================
  5 | 
  6 | Enoki provides a compact implementation of a 16 bit *half-precision* floating
  7 | point type that is compatible with the FP16 format on GPUs and high dynamic
  8 | range image libraries such as OpenEXR. To use this feature, include the
  9 | following header:
 10 | 
 11 | .. code-block:: cpp
 12 | 
 13 |     #include <enoki/half.h>
 14 | 
 15 | 
 16 | Current processors don't natively implement half precision arithmetic, hence
 17 | mathematical operations involving this type always involve a
 18 | ``half``:math:`\to` ``float``:math:`\to` ``half`` roundtrip. For this reason,
 19 | it is unwise to rely on it for expensive parts of a computation.
 20 | 
 21 | The main reason for including a dedicated half precision type in Enoki is that
 22 | it provides an ideal storage format for floating point data that does not
 23 | require the full accuracy of the single precision representation, which leads
 24 | to an immediate storage savings of :math:`2\times`.
 25 | 
 26 | .. note::
 27 | 
 28 |     If supported by the target architecture, Enoki uses the *F16C* instruction
 29 |     set to perform efficient vectorized conversion between half and single
 30 |     precision variables (however, this only affects conversion and no other
 31 |     arithmetic operations). ARM NEON also provides native conversion
 32 |     instructions.
 33 | 
 34 | Usage
 35 | -----
 36 | 
 37 | The following example shows how to use the :cpp:class:`enoki::half` type in a
 38 | typical use case.
 39 | 
 40 | .. code-block:: cpp
 41 | 
 42 |     using Color4f = Array<float, 4>;
 43 |     using Color4h = Array<half, 4>;
 44 | 
 45 |     uint8_t *image_ptr = ...;
 46 | 
 47 |     Color4f pixel(load<Color4h>(image_ptr)); // <- conversion vectorized using F16C
 48 | 
 49 |     /* ... update 'pixel' using single-precision arithmetic ... */
 50 | 
 51 |     store(image_ptr, Color4h(pixel)); // <- conversion vectorized using F16C
 52 | 
 53 | Reference
 54 | ---------
 55 | 
 56 | .. cpp:class:: half
 57 | 
 58 |     A :cpp:class:`half` instance encodes a sign bit, an exponent width of 5
 59 |     bits, and 10 explicitly stored mantissa bits.
 60 | 
 61 |     All standard mathematical operators are overloaded and implemented using
 62 |     the processor's floating point unit after a conversion to a IEEE754 single
 63 |     precision. The result of the operation is then converted back to half
 64 |     precision.
 65 | 
 66 |     .. cpp:var:: uint16_t value
 67 | 
 68 |         Stores the represented half precision value as an unsigned 16-bit integer.
 69 | 
 70 |     .. cpp:function:: half(float value)
 71 | 
 72 |         Constructs a half-precision value from the given single precision
 73 |         argument.
 74 | 
 75 |     .. cpp:function:: operator float() const
 76 | 
 77 |         Implicit ``half`` to ``float`` conversion operator.
 78 | 
 79 |     .. cpp:function:: static half from_binary(uint16_t value)
 80 | 
 81 |         Reinterpret a 16-bit unsigned integer as a half-precision variable.
 82 | 
 83 |     .. cpp:function:: half operator+(half h) const
 84 | 
 85 |         Addition operator.
 86 | 
 87 |     .. cpp:function:: half& operator+=(half h)
 88 | 
 89 |         Addition compound assignment operator.
 90 | 
 91 |     .. cpp:function:: half operator-() const
 92 | 
 93 |         Unary minus operator
 94 | 
 95 |     .. cpp:function:: half operator*(half h) const
 96 | 
 97 |         Multiplication operator.
 98 | 
 99 |     .. cpp:function:: half& operator*=(half h)
100 | 
101 |         Multiplication compound assignment operator.
102 | 
103 |     .. cpp:function:: half operator/(half h) const
104 | 
105 |         Division operator.
106 | 
107 |     .. cpp:function:: half& operator/=(half h)
108 | 
109 |         Division compound assignment operator.
110 | 
111 |     .. cpp:function:: bool operator<(half h) const
112 | 
113 |         Less-than comparison operator.
114 | 
115 |     .. cpp:function:: bool operator<=(half h) const
116 | 
117 |         Less-than-or-equal comparison operator.
118 | 
119 |     .. cpp:function:: bool operator>(half h) const
120 | 
121 |         Greater-than comparison operator.
122 | 
123 |     .. cpp:function:: bool operator>=(half h) const
124 | 
125 |         Greater-than-or-equal comparison operator.
126 | 
127 |     .. cpp:function:: bool operator==(half h) const
128 | 
129 |         Equality operator.
130 | 
131 |     .. cpp:function:: bool operator!=(half h) const
132 | 
133 |         Inequality operator.
134 | 
135 |     .. cpp:function:: friend std::ostream& operator<<(std::ostream &os, const half &h)
136 | 
137 |         Stream insertion operator.
138 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Enoki: structured vectorization and differentiation on modern processor architectures
 2 | ======================================================================================
 3 | 
 4 | .. only:: not latex
 5 | 
 6 |     .. image:: enoki-logo.svg
 7 |         :width: 400px
 8 |         :align: center
 9 | 
10 | .. toctree::
11 |    :maxdepth: 1
12 |    :caption: Core features
13 | 
14 |    intro
15 |    demo
16 |    basics
17 |    nested
18 |    gpu
19 |    autodiff
20 |    calls
21 |    custom
22 |    dynamic
23 |    advanced
24 |    changelog
25 |    reference
26 |    → GitHub <https://github.com/mitsuba-renderer/enoki>
27 | 
28 | .. toctree::
29 |    :maxdepth: 1
30 |    :caption: Extras
31 | 
32 |    random
33 |    morton
34 |    complex
35 |    quaternions
36 |    matrix
37 |    transform
38 |    sh
39 |    color
40 |    half
41 |    stl
42 |    python
43 | 
44 | 


--------------------------------------------------------------------------------
/docs/intro-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitsuba-renderer/enoki/141cf4bd18eee674841a7c3e3c28f3db44adc6fa/docs/intro-01.png


--------------------------------------------------------------------------------
/docs/intro-01.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 326 151.83"><defs><style>.cls-1{fill:#f8f8f8;}.cls-10,.cls-2,.cls-3,.cls-4,.cls-5{fill:none;stroke-miterlimit:10;}.cls-2,.cls-3,.cls-4,.cls-5{stroke:#ccc;}.cls-3{stroke-dasharray:3.96 3.96;}.cls-4{stroke-dasharray:3.88 3.88;}.cls-5{stroke-dasharray:4.06 4.06;}.cls-11,.cls-6{font-size:12px;}.cls-6,.cls-9{font-family:Consolas-Bold, Consolas;}.cls-7{fill:#45960e;}.cls-7,.cls-9{font-weight:700;}.cls-8{font-family:Consolas, Consolas;}.cls-9{fill:#c1272d;}.cls-10{stroke:#000;stroke-width:2px;}.cls-11{font-family:AvenirNext-Medium, Avenir Next;}.cls-12{letter-spacing:-0.07em;}.cls-13{letter-spacing:0em;}.cls-14{letter-spacing:-0.01em;}.cls-15{letter-spacing:0.02em;}.cls-16{letter-spacing:-0.01em;}.cls-17{letter-spacing:-0.02em;}.cls-18{letter-spacing:-0.02em;}.cls-19{letter-spacing:0em;}</style></defs><title>soa</title><rect class="cls-1" x="181.5" y="2.22" width="142.46" height="116.45"/><polyline class="cls-2" points="323.96 116.67 323.96 118.67 321.96 118.67"/><line class="cls-3" x1="318" y1="118.67" x2="185.48" y2="118.67"/><polyline class="cls-2" points="183.5 118.67 181.5 118.67 181.5 116.67"/><line class="cls-4" x1="181.5" y1="112.79" x2="181.5" y2="6.16"/><polyline class="cls-2" points="181.5 4.22 181.5 2.22 183.5 2.22"/><line class="cls-3" x1="187.46" y1="2.22" x2="319.98" y2="2.22"/><polyline class="cls-2" points="321.96 2.22 323.96 2.22 323.96 4.22"/><line class="cls-4" x1="323.96" y1="8.1" x2="323.96" y2="114.73"/><rect class="cls-1" x="5" y="2.22" width="145.98" height="116.45"/><polyline class="cls-2" points="150.98 116.67 150.98 118.67 148.98 118.67"/><line class="cls-5" x1="144.92" y1="118.67" x2="9.03" y2="118.67"/><polyline class="cls-2" points="7 118.67 5 118.67 5 116.67"/><line class="cls-4" x1="5" y1="112.79" x2="5" y2="6.16"/><polyline class="cls-2" points="5 4.22 5 2.22 7 2.22"/><line class="cls-5" x1="11.06" y1="2.22" x2="146.95" y2="2.22"/><polyline class="cls-2" points="148.98 2.22 150.98 2.22 150.98 4.22"/><line class="cls-4" x1="150.98" y1="8.1" x2="150.98" y2="114.73"/><text class="cls-6" transform="translate(12.5 20)"><tspan class="cls-7">struct </tspan><tspan class="cls-8" x="46.18" y="0">Vector3f {</tspan><tspan class="cls-8"><tspan x="0" y="14.4" xml:space="preserve">    </tspan><tspan class="cls-9" x="26.39" y="14.4">float </tspan><tspan x="65.98" y="14.4">x;</tspan><tspan x="0" y="28.8" xml:space="preserve">    </tspan><tspan class="cls-9" x="26.39" y="28.8">float </tspan><tspan x="65.98" y="28.8">y;</tspan><tspan x="0" y="43.2" xml:space="preserve">    </tspan><tspan class="cls-9" x="26.39" y="43.2">float </tspan><tspan x="65.98" y="43.2">z;</tspan><tspan x="0" y="57.6">};</tspan><tspan x="0" y="86.4">Vector3f vectors[N];</tspan></tspan></text><text class="cls-6" transform="translate(191 20)"><tspan class="cls-7">struct </tspan><tspan class="cls-8" x="46.18" y="0">Vector3fP {</tspan><tspan class="cls-8"><tspan x="0" y="14.4" xml:space="preserve">    </tspan><tspan class="cls-9" x="26.39" y="14.4">float </tspan><tspan x="65.98" y="14.4">x[N];</tspan><tspan x="0" y="28.8" xml:space="preserve">    </tspan><tspan class="cls-9" x="26.39" y="28.8">float </tspan><tspan x="65.98" y="28.8">y[N];</tspan><tspan x="0" y="43.2" xml:space="preserve">    </tspan><tspan class="cls-9" x="26.39" y="43.2">float </tspan><tspan x="65.98" y="43.2">z[N];</tspan><tspan x="0" y="57.6">};</tspan><tspan x="0" y="86.4">Vector3fP vectors;</tspan></tspan></text><path class="cls-10" d="M128.5,95c0-8.5-3.09-8.07,50-7.5,84,.9,94.61,1,95.1-16"/><polygon points="277.25 74.26 273.54 72.7 269.85 74.3 273.5 65.5 277.25 74.26"/><text class="cls-11" transform="translate(22.23 137.5)"><tspan class="cls-12">“</tspan><tspan class="cls-13" x="4.25" y="0">Ar</tspan><tspan class="cls-14" x="17.12" y="0">r</tspan><tspan x="21.31" y="0">ay o</tspan><tspan class="cls-15" x="44.18" y="0">f</tspan><tspan x="48.19" y="0"> </tspan><tspan class="cls-16" x="51.19" y="0">S</tspan><tspan x="57.95" y="0">tructu</tspan><tspan class="cls-17" x="90.08" y="0">r</tspan><tspan x="94.21" y="0">es”</tspan></text><text class="cls-11" transform="translate(196.64 137.5)">“<tspan class="cls-16" x="5.11" y="0">S</tspan><tspan class="cls-13" x="11.87" y="0">tructu</tspan><tspan class="cls-18" x="44" y="0">r</tspan><tspan x="48.13" y="0">e o</tspan><tspan class="cls-15" x="65.33" y="0">f</tspan><tspan class="cls-18" x="69.33" y="0"> </tspan><tspan class="cls-19" x="72.12" y="0">Ar</tspan><tspan class="cls-14" x="85" y="0">r</tspan><tspan x="89.18" y="0">ays”</tspan></text></svg>


--------------------------------------------------------------------------------
/docs/intro-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitsuba-renderer/enoki/141cf4bd18eee674841a7c3e3c28f3db44adc6fa/docs/intro-02.png


--------------------------------------------------------------------------------
/docs/intro-02.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 444.25 319.67"><defs><style>.cls-1{fill:#f7f7f7;}.cls-2,.cls-3,.cls-4{fill:none;stroke:#cbcbcb;stroke-miterlimit:10;}.cls-3{stroke-dasharray:3.97 3.97;}.cls-4{stroke-dasharray:3.97 3.97;}.cls-10,.cls-5,.cls-6,.cls-8,.cls-9{isolation:isolate;}.cls-10,.cls-6,.cls-7,.cls-8,.cls-9{font-size:13.7px;}.cls-6{fill:green;font-family:Consolas-Bold, Consolas;font-weight:700;}.cls-10,.cls-7,.cls-8,.cls-9{font-family:Consolas, Consolas;}.cls-8{fill:blue;}.cls-10,.cls-11{fill:#666;}.cls-11{letter-spacing:0em;}</style></defs><title>soa2</title><rect class="cls-1" x="4.34" y="6.48" width="436.88" height="302.12"/><polyline class="cls-2" points="441.22 306.59 441.22 308.59 439.22 308.59"/><line class="cls-3" x1="435.25" y1="308.59" x2="8.32" y2="308.59"/><polyline class="cls-2" points="6.34 308.59 4.34 308.59 4.34 306.59"/><line class="cls-4" x1="4.34" y1="302.62" x2="4.34" y2="10.47"/><polyline class="cls-2" points="4.34 8.48 4.34 6.48 6.34 6.48"/><line class="cls-3" x1="10.31" y1="6.48" x2="437.24" y2="6.48"/><polyline class="cls-2" points="439.22 6.48 441.22 6.48 441.22 8.48"/><line class="cls-4" x1="441.22" y1="12.45" x2="441.22" y2="304.61"/><g class="cls-5"><text class="cls-6" transform="translate(9.52 23.28)">inline</text><text class="cls-7" transform="translate(54.73 23.28)"> </text><text class="cls-6" transform="translate(62.26 23.28)">__m128</text><text class="cls-7" transform="translate(107.46 23.28)"> </text><text class="cls-8" transform="translate(115 23.28)">sample_tea</text><text class="cls-7" transform="translate(190.34 23.28)">(</text><text class="cls-6" transform="translate(197.87 23.28)">__m128i</text><text class="cls-7" transform="translate(250.61 23.28)"> v0, </text><text class="cls-6" transform="translate(288.28 23.28)">__m128i</text><text class="cls-7" transform="translate(341.02 23.28)"> v1) {</text></g><text class="cls-9" transform="translate(9.52 42.43)">    </text><text class="cls-6" transform="translate(39.66 42.43)">const</text><text class="cls-7" transform="translate(77.33 42.43)"> </text><text class="cls-6" transform="translate(84.86 42.43)">__m128i</text><text class="cls-7" transform="translate(137.6 42.43)"> C1 </text><text class="cls-10" transform="translate(167.74 42.43)">=</text><text class="cls-7" transform="translate(175.27 42.43)"> _mm_set1_epi32(</text><text class="cls-10" transform="translate(295.81 42.43)">0xA341316C</text><text class="cls-7" transform="translate(371.15 42.43)">);</text><text class="cls-9" transform="translate(9.52 57.5)">    </text><text class="cls-6" transform="translate(39.66 57.5)">const</text><text class="cls-7" transform="translate(77.33 57.5)"> </text><text class="cls-6" transform="translate(84.86 57.5)">__m128i</text><text class="cls-7" transform="translate(137.6 57.5)"> C2 </text><text class="cls-10" transform="translate(167.74 57.5)">=</text><text class="cls-7" transform="translate(175.27 57.5)"> _mm_set1_epi32(</text><text class="cls-10" transform="translate(295.81 57.5)">0xC8013EA4</text><text class="cls-7" transform="translate(371.15 57.5)">);</text><text class="cls-9" transform="translate(9.52 72.58)">    </text><text class="cls-6" transform="translate(39.66 72.58)">const</text><text class="cls-7" transform="translate(77.33 72.58)"> </text><text class="cls-6" transform="translate(84.86 72.58)">__m128i</text><text class="cls-7" transform="translate(137.6 72.58)"> C3 </text><text class="cls-10" transform="translate(167.74 72.58)">=</text><text class="cls-7" transform="translate(175.27 72.58)"> _mm_set1_epi32(</text><text class="cls-10" transform="translate(295.81 72.58)">0xAD90777D</text><text class="cls-7" transform="translate(371.15 72.58)">);</text><text class="cls-9" transform="translate(9.52 87.65)">    </text><text class="cls-6" transform="translate(39.66 87.65)">const</text><text class="cls-7" transform="translate(77.33 87.65)"> </text><text class="cls-6" transform="translate(84.86 87.65)">__m128i</text><text class="cls-7" transform="translate(137.6 87.65)"> C4 </text><text class="cls-10" transform="translate(167.74 87.65)">=</text><text class="cls-7" transform="translate(175.27 87.65)"> _mm_set1_epi32(</text><text class="cls-10" transform="translate(295.81 87.65)">0x7E95761E</text><text class="cls-7" transform="translate(371.15 87.65)">);</text><text class="cls-9" transform="translate(9.52 108.94)">    </text><text class="cls-6" transform="translate(39.66 108.94)">__m128i</text><text class="cls-7" transform="translate(92.4 108.94)"> sum </text><text class="cls-10" transform="translate(130.07 108.94)">=</text><text class="cls-7" transform="translate(137.6 108.94)">_mm_set1_epi32(<tspan class="cls-11" x="113.01" y="0">0x9e3779b9</tspan><tspan x="188.35" y="0">);</tspan></text><text class="cls-9" transform="translate(9.52 131.09)">    v0 </text><text class="cls-10" transform="translate(62.26 131.09)">=</text><text class="cls-7" transform="translate(69.79 131.09)"> _mm_add_epi32(v0, _mm_xor_si128(</text><text class="cls-9" transform="translate(9.52 146.16)">        _mm_xor_si128(</text><text class="cls-9" transform="translate(9.52 161.23)">            _mm_add_epi32(_mm_slli_epi32(v1, </text><text class="cls-10" transform="translate(348.55 161.23)">4</text><text class="cls-7" transform="translate(356.08 161.23)">), C1),</text><text class="cls-9" transform="translate(9.52 176.31)">            _mm_add_epi32(v1, sum)),</text><text class="cls-9" transform="translate(9.52 191.38)">            _mm_add_epi32(_mm_srli_epi32(v1, </text><text class="cls-10" transform="translate(348.55 191.38)">5</text><text class="cls-7" transform="translate(356.08 191.38)">), C2)));</text><text class="cls-9" transform="translate(9.52 221.53)">    v1 </text><text class="cls-10" transform="translate(62.26 221.53)">=</text><text class="cls-7" transform="translate(69.79 221.53)"> _mm_add_epi32(v1, _mm_xor_si128(</text><text class="cls-9" transform="translate(9.52 236.6)">        _mm_xor_si128(</text><text class="cls-9" transform="translate(9.52 251.67)">            _mm_add_epi32(_mm_slli_epi32(v0, </text><text class="cls-10" transform="translate(348.55 251.67)">4</text><text class="cls-7" transform="translate(356.08 251.67)">), C3),</text><text class="cls-9" transform="translate(9.52 266.75)">            _mm_add_epi32(v0, sum)),</text><text class="cls-9" transform="translate(9.52 281.82)">            _mm_add_epi32(_mm_srli_epi32(v0, </text><text class="cls-10" transform="translate(348.55 281.82)">5</text><text class="cls-7" transform="translate(356.08 281.82)">), C4)));</text><text class="cls-9" transform="translate(9.52 301.11)">    ....</text></svg>


--------------------------------------------------------------------------------
/docs/intro-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitsuba-renderer/enoki/141cf4bd18eee674841a7c3e3c28f3db44adc6fa/docs/intro-03.png


--------------------------------------------------------------------------------
/docs/morton-01.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <svg  version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="542" height="542" viewBox="0 0 542 542"
 4 | 	 overflow="visible" enable-background="new 0 0 542 542" xml:space="preserve">
 5 | <g>
 6 | 	<rect x="298.777" y="21" fill="#F5FFFF" stroke="#666666" width="222.222" height="222.223"/>
 7 | 	<rect x="21" y="21" fill="#F5FFFF" stroke="#666666" width="222.222" height="222.223"/>
 8 | 	<rect x="21" y="298.778" fill="#F5FFFF" stroke="#666666" width="222.222" height="222.223"/>
 9 | 	<rect x="298.777" y="298.778" fill="#F5FFFF" stroke="#666666" width="222.222" height="222.223"/>
10 | 	<polyline fill="none" stroke="#C86464" stroke-width="8" points="76.556,76.556 187.667,76.556 76.556,187.667 187.667,187.667
11 | 		"/>
12 | 	<polyline fill="none" stroke="#B45050" stroke-width="4" points="324.24,48.006 379.796,48.006 324.24,103.562 379.796,103.562
13 | 		435.352,48.006 490.907,48.006 435.352,103.562 490.907,103.562 324.24,159.117 379.796,159.117 324.24,214.673 379.796,214.673
14 | 		435.352,159.117 490.907,159.117 435.352,214.673 490.907,214.673 	"/>
15 | 	<polyline fill="none" stroke="#A03C3C" stroke-width="2" points="229.333,507.112 201.556,507.112 229.333,479.333
16 | 		201.556,479.333 173.778,507.112 146,507.112 173.778,479.333 146,479.333 229.333,451.555 201.556,451.555 229.333,423.778
17 | 		201.556,423.778 173.778,451.555 146,451.555 173.778,423.778 146,423.778 118.223,507.112 90.444,507.112 118.223,479.333
18 | 		90.444,479.333 62.667,507.112 34.889,507.112 62.667,479.333 34.889,479.333 118.223,451.555 90.444,451.555 118.223,423.778
19 | 		90.444,423.778 62.667,451.555 34.889,451.555 62.667,423.778 34.889,423.778 229.333,396 201.556,396 229.333,368.223
20 | 		201.556,368.223 173.778,396 146,396 173.778,368.223 146,368.223 229.333,340.444 201.556,340.444 229.333,312.667
21 | 		201.556,312.667 173.778,340.444 146,340.444 173.778,312.667 146,312.667 118.223,396 90.444,396 118.223,368.223 90.444,368.223
22 | 		62.667,396 34.889,396 62.667,368.223 34.889,368.223 118.223,340.444 90.444,340.444 118.223,312.667 90.444,312.667
23 | 		62.667,340.444 34.889,340.444 62.667,312.667 34.889,312.667 	"/>
24 | 	<polyline fill="none" stroke="#8C2828" points="514.055,514.055 500.166,514.055 514.055,500.167 500.166,500.167 486.277,514.055
25 | 		472.389,514.055 486.277,500.167 472.389,500.167 514.055,486.278 500.166,486.278 514.055,472.389 500.166,472.389
26 | 		486.277,486.278 472.389,486.278 486.277,472.389 472.389,472.389 458.499,514.055 444.61,514.055 458.499,500.167 444.61,500.167
27 | 		430.722,514.055 416.833,514.055 430.722,500.167 416.833,500.167 458.499,486.278 444.61,486.278 458.499,472.389 444.61,472.389
28 | 		430.722,486.278 416.833,486.278 430.722,472.389 416.833,472.389 514.055,458.5 500.166,458.5 514.055,444.612 500.166,444.612
29 | 		486.277,458.5 472.389,458.5 486.277,444.612 472.389,444.612 514.055,430.723 500.166,430.723 514.055,416.833 500.166,416.833
30 | 		486.277,430.723 472.389,430.723 486.277,416.833 472.389,416.833 458.499,458.5 444.61,458.5 458.499,444.612 444.61,444.612
31 | 		430.722,458.5 416.833,458.5 430.722,444.612 416.833,444.612 458.499,430.723 444.61,430.723 458.499,416.833 444.61,416.833
32 | 		430.722,430.723 416.833,430.723 430.722,416.833 416.833,416.833 402.943,514.055 389.055,514.055 402.943,500.167
33 | 		389.055,500.167 375.166,514.055 361.277,514.055 375.166,500.167 361.277,500.167 402.943,486.278 389.055,486.278
34 | 		402.943,472.389 389.055,472.389 375.166,486.278 361.277,486.278 375.166,472.389 361.277,472.389 347.389,514.055
35 | 		333.499,514.055 347.389,500.167 333.499,500.167 319.61,514.055 305.722,514.055 319.61,500.167 305.722,500.167 347.389,486.278
36 | 		333.499,486.278 347.389,472.389 333.499,472.389 319.61,486.278 305.722,486.278 319.61,472.389 305.722,472.389 402.943,458.5
37 | 		389.055,458.5 402.943,444.612 389.055,444.612 375.166,458.5 361.277,458.5 375.166,444.612 361.277,444.612 402.943,430.723
38 | 		389.055,430.723 402.943,416.833 389.055,416.833 375.166,430.723 361.277,430.723 375.166,416.833 361.277,416.833 347.389,458.5
39 | 		333.499,458.5 347.389,444.612 333.499,444.612 319.61,458.5 305.722,458.5 319.61,444.612 305.722,444.612 347.389,430.723
40 | 		333.499,430.723 347.389,416.833 333.499,416.833 319.61,430.723 305.722,430.723 319.61,416.833 305.722,416.833 514.055,402.944
41 | 		500.166,402.944 514.055,389.055 500.166,389.055 486.277,402.944 472.389,402.944 486.277,389.055 472.389,389.055
42 | 		514.055,375.167 500.166,375.167 514.055,361.278 500.166,361.278 486.277,375.167 472.389,375.167 486.277,361.278
43 | 		472.389,361.278 458.499,402.944 444.61,402.944 458.499,389.055 444.61,389.055 430.722,402.944 416.833,402.944 430.722,389.055
44 | 		416.833,389.055 458.499,375.167 444.61,375.167 458.499,361.278 444.61,361.278 430.722,375.167 416.833,375.167 430.722,361.278
45 | 		416.833,361.278 514.055,347.389 500.166,347.389 514.055,333.5 500.166,333.5 486.277,347.389 472.389,347.389 486.277,333.5
46 | 		472.389,333.5 514.055,319.612 500.166,319.612 514.055,305.723 500.166,305.723 486.277,319.612 472.389,319.612 486.277,305.723
47 | 		472.389,305.723 458.499,347.389 444.61,347.389 458.499,333.5 444.61,333.5 430.722,347.389 416.833,347.389 430.722,333.5
48 | 		416.833,333.5 458.499,319.612 444.61,319.612 458.499,305.723 444.61,305.723 430.722,319.612 416.833,319.612 430.722,305.723
49 | 		416.833,305.723 402.943,402.944 389.055,402.944 402.943,389.055 389.055,389.055 375.166,402.944 361.277,402.944
50 | 		375.166,389.055 361.277,389.055 402.943,375.167 389.055,375.167 402.943,361.278 389.055,361.278 375.166,375.167
51 | 		361.277,375.167 375.166,361.278 361.277,361.278 347.389,402.944 333.499,402.944 347.389,389.055 333.499,389.055
52 | 		319.61,402.944 305.722,402.944 319.61,389.055 305.722,389.055 347.389,375.167 333.499,375.167 347.389,361.278 333.499,361.278
53 | 		319.61,375.167 305.722,375.167 319.61,361.278 305.722,361.278 402.943,347.389 389.055,347.389 402.943,333.5 389.055,333.5
54 | 		375.166,347.389 361.277,347.389 375.166,333.5 361.277,333.5 402.943,319.612 389.055,319.612 402.943,305.723 389.055,305.723
55 | 		375.166,319.612 361.277,319.612 375.166,305.723 361.277,305.723 347.389,347.389 333.499,347.389 347.389,333.5 333.499,333.5
56 | 		319.61,347.389 305.722,347.389 319.61,333.5 305.722,333.5 347.389,319.612 333.499,319.612 347.389,305.723 333.499,305.723
57 | 		319.61,319.612 305.722,319.612 319.61,305.723 305.722,305.723 	"/>
58 | </g>
59 | <rect fill="none" width="542" height="542"/>
60 | </svg>
61 | 


--------------------------------------------------------------------------------
/docs/morton.rst:
--------------------------------------------------------------------------------
 1 | .. cpp:namespace:: enoki
 2 | 
 3 | Morton/Z-order indexing
 4 | =======================
 5 | 
 6 | Enoki provides efficient support for encoding and decoding of Morton/Z-order
 7 | indices of arbitrary dimension. Both scalar indices and index vectors are
 8 | supported. Z-order indexing can improve the locality accesses when two- or
 9 | higher-dimensional data is arranged in memory.
10 | 
11 | .. figure:: morton-01.svg
12 |     :width: 300px
13 |     :align: center
14 | 
15 |     (Figure by `David Eppstein <https://commons.wikimedia.org/wiki/File:Four-level_Z.svg>`_)
16 | 
17 | To use this feature, include the following header:
18 | 
19 | .. code-block:: cpp
20 | 
21 |     #include <enoki/morton.h>
22 | 
23 | 
24 | Usage
25 | -----
26 | 
27 | The following shows a round trip, encoding a 32-bit position as a Morton index
28 | and decoding it again.
29 | 
30 | .. code-block:: cpp
31 | 
32 |     using Vector2u = Array<uint32_t, 2>;
33 | 
34 |     Vector2u pos(123u, 456u);
35 |     uint32_t encoded = morton_encode(pos);
36 |     Vector2u decoded = morton_decode<Vector2u>(encoded);
37 | 
38 |     std::cout << "Original : " << pos << std::endl;
39 |     std::cout << "Encoded  : " << encoded << std::endl;
40 |     std::cout << "Decoded  : " << decoded << std::endl;
41 | 
42 |     /* Prints:
43 |         Original : [123, 456]
44 |         Encoded  : 177605
45 |         Decoded  : [123, 456]
46 |     */
47 | 
48 | Depending on hardware support, Enoki implements these operations using BMI2
49 | instructions or bit shifts with precomputed magic constants.
50 | 
51 | The same also works for nested vectors:
52 | 
53 | .. code-block:: cpp
54 | 
55 |     using UInt32P = Packet<uint32_t, 8>;
56 |     using Vector2uP = Array<UInt32P, 2>;
57 | 
58 |     Vector2uP pos(123u, 456u);
59 |     UInt32P encoded = morton_encode(pos);
60 |     Vector2uP decoded = morton_decode<Vector2uP>(encoded);
61 | 
62 |     std::cout << "Original : " << pos << std::endl;
63 |     std::cout << "Encoded  : " << encoded << std::endl;
64 |     std::cout << "Decoded  : " << decoded << std::endl;
65 | 
66 |     /* Prints:
67 |         Original : [[123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456]]
68 |         Encoded  : [177605, 177605, 177605, 177605, 177605, 177605, 177605, 177605]
69 |         Decoded  : [[123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456], [123, 456]]
70 |     */
71 | 
72 | Reference
73 | ---------
74 | 
75 | .. cpp:function:: template <typename Array> value_t<Array> morton_encode(Array array)
76 | 
77 |     Converts a potentially nested N-dimensional array into its corresponding
78 |     Morton/Z-order index by interleaving the bits of the component values. The
79 |     array must have an unsigned integer as its underlying scalar type.
80 | 
81 | .. cpp:function:: template <typename Array> Array morton_encode(value_t<Array> array)
82 | 
83 |     Converts a Morton/Z-order index or index array into a potentially nested
84 |     N-dimensional array. The array must have an unsigned integer as its
85 |     underlying scalar type.
86 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | guzzle_sphinx_theme
2 | pygments-mathematica
3 | 


--------------------------------------------------------------------------------
/docs/sh.rst:
--------------------------------------------------------------------------------
  1 | .. cpp:namespace:: enoki
  2 | 
  3 | Spherical harmonics
  4 | ===================
  5 | 
  6 | Enoki can efficiently evaluate real spherical harmonics basis functions for
  7 | both scalar and vector arguments. To use this feature, include the following
  8 | header file:
  9 | 
 10 | .. code-block:: cpp
 11 | 
 12 |     #include <enoki/sh.h>
 13 | 
 14 | The evaluation routines rely on efficient pre-generated branch-free code
 15 | processed using aggressive constant folding and common subexpression
 16 | elimination passes. Evaluation routines are provided up to order 10.
 17 | 
 18 | The generated code is based on the paper `Efficient Spherical Harmonic
 19 | Evaluation <http://jcgt.org/published/0002/02/06/>`_, *Journal of Computer
 20 | Graphics Techniques (JCGT)*, vol. 2, no. 2, 84-90, 2013 by `Peter-Pike Sloan
 21 | <http://www.ppsloan.org/publications/>`_.
 22 | 
 23 | .. note::
 24 | 
 25 |     The directions provided to ``sh_eval_*`` must be normalized 3D vectors
 26 |     (i.e. using Cartesian instead of spherical coordinates).
 27 | 
 28 |     The Mathematica equivalent of the real spherical harmonic basis implemented
 29 |     in :file:`enoki/sh.h` is given by the following definition:
 30 | 
 31 |     .. code-block:: wolfram-language
 32 | 
 33 |         SphericalHarmonicQ[l_, m_, d_] := Block[{θ, ϕ},
 34 |           θ = ArcCos[d[[3]]];
 35 |           ϕ = ArcTan[d[[1]], d[[2]]];
 36 |           Piecewise[{
 37 |             {SphericalHarmonicY[l, m, θ, ϕ], m == 0},
 38 |             {Sqrt[2] * Re[SphericalHarmonicY[l,  m, θ, ϕ]], m > 0},
 39 |             {Sqrt[2] * Im[SphericalHarmonicY[l, -m, θ, ϕ]], m < 0}
 40 |           }]
 41 |         ]
 42 | 
 43 | Usage
 44 | -----
 45 | 
 46 | The following example shows how to evaluate the spherical harmonics basis up to
 47 | and including order 2 producing a total of 9 function evaluations.
 48 | 
 49 | .. code-block:: cpp
 50 | 
 51 |     using Vector3f = Array<float, 3>;
 52 |     Vector3f d = normalize(Vector3f(1, 2, 3));
 53 | 
 54 |     float coeffs[9];
 55 |     sh_eval(d, 2, coeffs);
 56 | 
 57 |     // Prints: [0.282095, -0.261169, 0.391754, -0.130585, 0.156078, -0.468235, 0.292864, -0.234118, -0.117059]
 58 |     std::cout << load<Array<float, 9>>(coeffs) << std::endl;
 59 | 
 60 | 
 61 | Reference
 62 | ---------
 63 | 
 64 | .. cpp:function:: template <typename Array> void sh_eval(const Array &d, size_t order, expr_t<value_t<Array>> *out)
 65 | 
 66 |     Evaluates the real spherical harmonics basis functions up to and including
 67 |     order ``order``. The output array must have room for ``(order + 1)*(order +
 68 |     1)`` entries. This function dispatches to one of the ``sh_eval_*``
 69 |     implementations and throws an exception if ``order > 9``.
 70 | 
 71 | .. cpp:function:: template <typename Array> void sh_eval_0(const Array &d, expr_t<value_t<Array>> *out)
 72 | 
 73 |     Evaluates the real spherical harmonics basis functions up to and including
 74 |     order 0. The output array must have room for ``1`` entry.
 75 | 
 76 | .. cpp:function:: template <typename Array> void sh_eval_1(const Array &d, expr_t<value_t<Array>> *out)
 77 | 
 78 |     Evaluates the real spherical harmonics basis functions up to and including
 79 |     order 1. The output array must have room for ``4`` entries.
 80 | 
 81 | .. cpp:function:: template <typename Array> void sh_eval_2(const Array &d, expr_t<value_t<Array>> *out)
 82 | 
 83 |     Evaluates the real spherical harmonics basis functions up to and including
 84 |     order 2. The output array must have room for ``9`` entries.
 85 | 
 86 | .. cpp:function:: template <typename Array> void sh_eval_3(const Array &d, expr_t<value_t<Array>> *out)
 87 | 
 88 |     Evaluates the real spherical harmonics basis functions up to and including
 89 |     order 3. The output array must have room for ``16`` entries.
 90 | 
 91 | .. cpp:function:: template <typename Array> void sh_eval_4(const Array &d, expr_t<value_t<Array>> *out)
 92 | 
 93 |     Evaluates the real spherical harmonics basis functions up to and including
 94 |     order 4. The output array must have room for ``25`` entries.
 95 | 
 96 | .. cpp:function:: template <typename Array> void sh_eval_5(const Array &d, expr_t<value_t<Array>> *out)
 97 | 
 98 |     Evaluates the real spherical harmonics basis functions up to and including
 99 |     order 5. The output array must have room for ``36`` entries.
100 | 
101 | .. cpp:function:: template <typename Array> void sh_eval_6(const Array &d, expr_t<value_t<Array>> *out)
102 | 
103 |     Evaluates the real spherical harmonics basis functions up to and including
104 |     order 6. The output array must have room for ``49`` entries.
105 | 
106 | .. cpp:function:: template <typename Array> void sh_eval_7(const Array &d, expr_t<value_t<Array>> *out)
107 | 
108 |     Evaluates the real spherical harmonics basis functions up to and including
109 |     order 7. The output array must have room for ``64`` entries.
110 | 
111 | .. cpp:function:: template <typename Array> void sh_eval_8(const Array &d, expr_t<value_t<Array>> *out)
112 | 
113 |     Evaluates the real spherical harmonics basis functions up to and including
114 |     order 8. The output array must have room for ``81`` entries.
115 | 
116 | .. cpp:function:: template <typename Array> void sh_eval_9(const Array &d, expr_t<value_t<Array>> *out)
117 | 
118 |     Evaluates the real spherical harmonics basis functions up to and including
119 |     order 9. The output array must have room for ``100`` entries.
120 | 


--------------------------------------------------------------------------------
/docs/stl.rst:
--------------------------------------------------------------------------------
 1 | .. cpp:namespace:: enoki
 2 | 
 3 | Standard Template Library
 4 | =========================
 5 | 
 6 | When Enoki extracts packets or slices through custom data structures, it also
 7 | handles STL data structures including ``std::array``,  and ``std::pair``,  and
 8 | ``std::tuple``. Please review the section on :ref:`dynamic arrays <dynamic>`
 9 | for general details on vectorizing over dynamic arrays and working with slices.
10 | 
11 | To use this feature, include the following header file:
12 | 
13 | .. code-block:: cpp
14 | 
15 |     #include <enoki/stl.h>
16 | 
17 | Usage
18 | -----
19 | 
20 | Consider the following example, where a function returns a ``std::tuple``
21 | containing a 3D position and a mask specifying whether the computation was
22 | successful. When the :file:`enoki/stl.h` header file is included, Enoki's
23 | dynamic vectorization machinery can be applied to vectorize such functions over
24 | arbitrarily large inputs.
25 | 
26 | .. code-block:: cpp
27 |     :emphasize-lines: 2,3,4,5,6,30,36
28 | 
29 |     /// Return value of 'my_function'
30 |     template <typename T>
31 |     using Return = std::tuple<
32 |         Array<T, 3>,
33 |         mask_t<T>
34 |     >;
35 | 
36 |     template <typename T> Return<T> my_function(T theta, T phi) {
37 |         /* Turn spherical -> cartesian coordinates */
38 |         Array<T, 3> pos(
39 |             sin(theta) * cos(phi),
40 |             sin(theta) * sin(phi),
41 |             cos(theta)
42 |         );
43 | 
44 |         /* Only points on the top hemisphere are 'valid' */
45 |         return std::make_pair(pos, pos.z() > 0);
46 |     }
47 | 
48 |     /// Packet of floats
49 |     using FloatP  = Packet<float>;
50 | 
51 |     /// Arbitrarily large sequence of floats
52 |     using FloatX  = DynamicArray<FloatP>;
53 | 
54 |     /// Tuple containing a packet of results
55 |     using ReturnP = Return<FloatP>;
56 | 
57 |     /// Tuple containing dynamic arrays with arbitrarily many results
58 |     using ReturnX = Return<FloatX>;
59 | 
60 |     int main(int argc, char *argv[]) {
61 |         FloatX theta = linspace<FloatX>(-10.f, 10.f, 10);
62 |         FloatX phi = linspace<FloatX>(0.f, 60.f, 10);
63 | 
64 |         ReturnX result = vectorize(my_function<FloatP>, theta, phi);
65 | 
66 |         /* Prints:
67 |             [[0.544021, 0, -0.839072],
68 |              [-0.924676, -0.373065, 0.0761302],
69 |              [0.478888, 0.461548, 0.746753],
70 |              [0.0777672, 0.173978, -0.981674],
71 |              [-0.0330365, -0.895583, 0.443666],
72 |              [-0.304446, 0.842896, 0.443666],
73 |              [0.127097, -0.141995, -0.981674],
74 |              [0.596782, -0.293616, 0.746753],
75 |              [-0.994388, 0.0734624, 0.0761293],
76 |              [0.518133, 0.165823, -0.839072]]
77 |         */
78 | 
79 |         std::cout << std::get<0>(result) << std::endl;
80 | 
81 |         /* Prints:
82 |             [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
83 |         */
84 |         std::cout << std::get<1>(result) << std::endl;
85 |     }
86 | 


--------------------------------------------------------------------------------
/docs/transform.rst:
--------------------------------------------------------------------------------
  1 | .. cpp:namespace:: enoki
  2 | 
  3 | Homogeneous transformations
  4 | ===========================
  5 | 
  6 | Enoki provides a number of convenience functions to construct 3D homogeneous
  7 | coordinate transformations (rotations, translations, scales, perspective
  8 | transformation matrices, etc.). To use them, include the following header file:
  9 | 
 10 | .. code-block:: cpp
 11 | 
 12 |     #include <enoki/transform.h>
 13 | 
 14 | Reference
 15 | ---------
 16 | 
 17 | .. cpp:function:: template <typename Matrix, typename Vector3> Matrix translate(Vector3 v)
 18 | 
 19 |     Constructs a homogeneous coordinate transformation, which translates points by ``v``.
 20 | 
 21 | .. cpp:function:: template <typename Matrix, typename Vector3> Matrix scale(Vector3 v)
 22 | 
 23 |     Constructs a homogeneous coordinate transformation, which scales points by ``v``.
 24 | 
 25 | .. cpp:function:: template <typename Matrix, typename Vector3, typename Float> Matrix rotate(Vector3 v, Float angle)
 26 | 
 27 |     Constructs a homogeneous coordinate transformation, which rotates by ``angle`` radians
 28 |     around the axis ``v``. The function requires ``v`` to be normalized.
 29 | 
 30 | .. cpp:function:: template <typename Matrix> auto transform_decompose(Matrix m)
 31 | 
 32 |     Performs a polar decomposition of a non-perspective 4x4 homogeneous
 33 |     coordinate matrix and returns a tuple of
 34 | 
 35 |     1. A positive definite 3x3 matrix containing an inhomogeneous scaling operation
 36 | 
 37 |     2. A rotation quaternion
 38 | 
 39 |     3. A 3D translation vector
 40 | 
 41 |     This representation is helpful when animating keyframe animations.
 42 | 
 43 |     The function also handles singular inputs ``m``, in which case the rotation
 44 |     component is set to the identity quaternion and the scaling part simply
 45 |     copies the input matrix.
 46 | 
 47 | .. cpp:function:: template <typename Matrix3, typename Quaternion, typename Vector3> auto transform_compose(Matrix3 scale, Quaternion rotation, Vector3 translate)
 48 | 
 49 |     This function composes a 4x4 homogeneous coordinate transformation from the
 50 |     given scale, rotation, and translation. It performs the reverse of
 51 |     ``transform_decompose``.
 52 | 
 53 | .. cpp:function:: template <typename Matrix3, typename Quaternion, typename Vector3> auto transform_compose_inverse(Matrix3 scale, Quaternion rotation, Vector3 translate)
 54 | 
 55 |     This function composes a 4x4 homogeneous *inverse* coordinate
 56 |     transformation from the given scale, rotation, and translation. It is the
 57 |     equivalent to (but more efficient than) the expression
 58 |     ``inverse(transform_compose(...))``.
 59 | 
 60 | .. cpp:function:: template <typename Matrix, typename Point3, typename Vector3> Matrix look_at(Point3 origin, Point3, target, Vector3 up)
 61 | 
 62 |     Constructs a homogeneous coordinate transformation, which translates to
 63 |     :math:`\mathrm{origin}`, maps the negative :math:`z` axis to
 64 |     :math:`\mathrm{target}-\mathrm{origin}` (normalized) and the positive
 65 |     :math:`y` axis to :math:`\mathrm{up}` (if orthogonal to
 66 |     :math:`\mathrm{target}-\mathrm{origin}`). The algorithm performs
 67 |     Gram-Schmidt orthogonalization to ensure that the returned matrix is
 68 |     orthonormal.
 69 | 
 70 | .. cpp:function:: template <typename Matrix, typename Float> Matrix perspective(Float fov, Float near, Float far)
 71 | 
 72 |     Constructs an OpenGL-compatible perspective projection matrix with the
 73 |     specified field of view (in radians) and near and far clip planes. The
 74 |     returned matrix performs the transformation
 75 | 
 76 |     .. math::
 77 | 
 78 |         \begin{pmatrix}
 79 |         x\\y\\z\end{pmatrix}
 80 |         \mapsto
 81 |         \begin{pmatrix}
 82 |         -c\,x/z\\ -c\,x/z\\
 83 |         \frac{2\,\mathrm{far}\,\mathrm{near}\,+\,z\,(\mathrm{far}+\mathrm{near})}{z\, (\mathrm{far}-\mathrm{near})}
 84 |         \end{pmatrix},
 85 | 
 86 |     where
 87 | 
 88 |     .. math::
 89 | 
 90 |         c = \mathrm{cot}\!\left(0.5\, \textrm{fov}\right),
 91 | 
 92 |     which maps :math:`(0, 0, -\mathrm{near})^T` to :math:`(0, 0, -1)^T` and
 93 |     :math:`(0, 0, -\mathrm{far})^T` to :math:`(0, 0, 1)^T`.
 94 | 
 95 | .. cpp:function:: template <typename Matrix, typename Float> Matrix frustum(Float left, Float right, Float bottom, Float top, Float near, Float far)
 96 | 
 97 |     Constructs an OpenGL-compatible perspective projection matrix. The provided
 98 |     parameters specify the intersection of the camera frustum with the near
 99 |     clipping plane. Specifically, the returned transformation maps
100 |     :math:`(\mathrm{left}, \mathrm{bottom}, -\mathrm{near})` to :math:`(-1, -1,
101 |     -1)` and :math:`(\mathrm{right}, \mathrm{top}, -\mathrm{near})` to
102 |     :math:`(1, 1, -1)`.
103 | 
104 | .. cpp:function:: template <typename Matrix, typename Float> Matrix ortho(Float left, Float right, Float bottom, Float top, Float near, Float far)
105 | 
106 |     Constructs an OpenGL-compatible orthographic projection matrix. The
107 |     provided parameters specify the intersection of the camera frustum with the
108 |     near clipping plane. Specifically, the returned transformation maps
109 |     :math:`(\mathrm{left}, \mathrm{bottom}, -\mathrm{near})` to :math:`(-1, -1,
110 |     -1)` and :math:`(\mathrm{right}, \mathrm{top}, -\mathrm{near})` to
111 |     :math:`(1, 1, -1)`.
112 | 


--------------------------------------------------------------------------------
/include/enoki/array_enum.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     enoki/array_call.h -- Enoki arrays of pointers, support for
 3 |     array (virtual) method calls
 4 | 
 5 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
 6 | 
 7 |     All rights reserved. Use of this source code is governed by a BSD-style
 8 |     license that can be found in the LICENSE file.
 9 | */
10 | 
11 | #pragma once
12 | 
13 | NAMESPACE_BEGIN(enoki)
14 | 
15 | template <typename Value_, size_t Size_, bool IsMask_, typename Derived_>
16 | struct StaticArrayImpl<Value_, Size_, IsMask_, Derived_,
17 |                        enable_if_t<detail::array_config<Value_, Size_>::use_enum_impl>>
18 |     : StaticArrayImpl<std::underlying_type_t<Value_>, Size_, IsMask_, Derived_> {
19 | 
20 |     using UnderlyingType = std::underlying_type_t<Value_>;
21 | 
22 |     using Base = StaticArrayImpl<UnderlyingType, Size_, IsMask_, Derived_>;
23 | 
24 |     ENOKI_ARRAY_DEFAULTS(StaticArrayImpl)
25 |     using Base::derived;
26 | 
27 |     using Value = std::conditional_t<IsMask_, typename Base::Value, Value_>;
28 |     using Scalar = std::conditional_t<IsMask_, typename Base::Scalar, Value_>;
29 | 
30 |     StaticArrayImpl() = default;
31 |     StaticArrayImpl(Value value) : Base(UnderlyingType(value)) { }
32 | 
33 |     template <typename T, enable_if_t<!std::is_enum_v<T>> = 0>
34 |     StaticArrayImpl(const T &b) : Base(b) { }
35 | 
36 |     template <typename T, enable_if_t<!is_array_v<T>> = 0>
37 |     StaticArrayImpl(const T &v1, const T &v2) : Base(v1, v2) { }
38 | 
39 |     template <typename T>
40 |     StaticArrayImpl(const T &b, detail::reinterpret_flag)
41 |         : Base(b, detail::reinterpret_flag()) { }
42 | 
43 |     template <typename T1, typename T2, typename T = StaticArrayImpl, enable_if_t<
44 |               array_depth_v<T1> == array_depth_v<T> && array_size_v<T1> == Base::Size1 &&
45 |               array_depth_v<T2> == array_depth_v<T> && array_size_v<T2> == Base::Size2 &&
46 |               Base::Size2 != 0> = 0>
47 |     StaticArrayImpl(const T1 &a1, const T2 &a2)
48 |         : Base(a1, a2) { }
49 | 
50 |     ENOKI_INLINE decltype(auto) coeff(size_t i) const {
51 |         using Coeff = decltype(Base::coeff(i));
52 |         if constexpr (std::is_same_v<Coeff, const typename Base::Value &>)
53 |             return (const Value &) Base::coeff(i);
54 |         else
55 |             return Base::coeff(i);
56 |     }
57 | 
58 |     ENOKI_INLINE decltype(auto) coeff(size_t i) {
59 |         using Coeff = decltype(Base::coeff(i));
60 |         if constexpr (std::is_same_v<Coeff, typename Base::Value &>)
61 |             return (Value &) Base::coeff(i);
62 |         else
63 |             return Base::coeff(i);
64 |     }
65 | 
66 |     template <typename T, typename Mask>
67 |     ENOKI_INLINE size_t compress_(T *&ptr, const Mask &mask) const {
68 |         return Base::compress_((UnderlyingType *&) ptr, mask);
69 |     }
70 | 
71 |     template <typename T> Derived_& operator=(T&& t) {
72 |         ENOKI_MARK_USED(t);
73 |         if constexpr (std::is_same_v<T, std::nullptr_t>)
74 |             return (Derived_ &) Base::operator=(UnderlyingType(0));
75 |         else if constexpr (std::is_convertible_v<T, Value>)
76 |             return (Derived_ &) Base::operator=(UnderlyingType(t));
77 |         else
78 |             return (Derived_ &) Base::operator=(std::forward<T>(t));
79 |     }
80 | };
81 | 
82 | NAMESPACE_END(enoki)
83 | 


--------------------------------------------------------------------------------
/include/enoki/array_masked.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     enoki/array_masked.h -- Helper classes for masked assignments and
 3 |     in-place operators
 4 | 
 5 |     Enoki is a C++ template library that enables transparent vectorization
 6 |     of numerical kernels using ENOKI instruction sets available on current
 7 |     processor architectures.
 8 | 
 9 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
10 | 
11 |     All rights reserved. Use of this source code is governed by a BSD-style
12 |     license that can be found in the LICENSE file.
13 | */
14 | 
15 | #pragma once
16 | 
17 | NAMESPACE_BEGIN(enoki)
18 | 
19 | // -----------------------------------------------------------------------
20 | //! @{ \name Masked array helper classes
21 | // -----------------------------------------------------------------------
22 | 
23 | NAMESPACE_BEGIN(detail)
24 | 
25 | template <typename T> struct MaskedValue {
26 |     MaskedValue(T &d, bool m) : d(d), m(m) { }
27 | 
28 |     template <typename T2> ENOKI_INLINE void operator =(const T2 &value) { if (m) d = value; }
29 |     template <typename T2> ENOKI_INLINE void operator+=(const T2 &value) { if (m) d += value; }
30 |     template <typename T2> ENOKI_INLINE void operator-=(const T2 &value) { if (m) d -= value; }
31 |     template <typename T2> ENOKI_INLINE void operator*=(const T2 &value) { if (m) d *= value; }
32 |     template <typename T2> ENOKI_INLINE void operator/=(const T2 &value) { if (m) d /= value; }
33 |     template <typename T2> ENOKI_INLINE void operator|=(const T2 &value) { if (m) d |= value; }
34 |     template <typename T2> ENOKI_INLINE void operator&=(const T2 &value) { if (m) d &= value; }
35 |     template <typename T2> ENOKI_INLINE void operator^=(const T2 &value) { if (m) d ^= value; }
36 | 
37 |     T &d;
38 |     bool m;
39 | };
40 | 
41 | template <typename T> struct MaskedArray : ArrayBase<value_t<T>, MaskedArray<T>> {
42 |     using Mask     = mask_t<T>;
43 |     using Scalar   = MaskedValue<scalar_t<T>>;
44 |     using MaskType = MaskedArray<Mask>;
45 |     using Value    = std::conditional_t<is_scalar_v<value_t<T>>,
46 |                                      MaskedValue<value_t<T>>,
47 |                                      MaskedArray<value_t<T>>>;
48 |     using UnderlyingValue = value_t<T>;
49 |     static constexpr size_t Size = array_size_v<T>;
50 |     static constexpr bool IsMaskedArray = true;
51 | 
52 |     MaskedArray(T &d, const Mask &m) : d(d), m(m) { }
53 | 
54 |     template <typename T2> ENOKI_INLINE void operator =(const T2 &value) { d.massign_(value, m); }
55 |     template <typename T2> ENOKI_INLINE void operator+=(const T2 &value) { d.madd_(value, m); }
56 |     template <typename T2> ENOKI_INLINE void operator-=(const T2 &value) { d.msub_(value, m); }
57 |     template <typename T2> ENOKI_INLINE void operator*=(const T2 &value) { d.mmul_(value, m); }
58 |     template <typename T2> ENOKI_INLINE void operator/=(const T2 &value) { d.mdiv_(value, m); }
59 |     template <typename T2> ENOKI_INLINE void operator|=(const T2 &value) { d.mor_(value, m); }
60 |     template <typename T2> ENOKI_INLINE void operator&=(const T2 &value) { d.mand_(value, m); }
61 |     template <typename T2> ENOKI_INLINE void operator^=(const T2 &value) { d.mxor_(value, m); }
62 | 
63 |     /// Type alias for a similar-shaped array over a different type
64 |     template <typename T2> using ReplaceValue = MaskedArray<typename T::template ReplaceValue<T2>>;
65 | 
66 |     T &d;
67 |     Mask m;
68 | };
69 | 
70 | NAMESPACE_END(detail)
71 | 
72 | template <typename Value_, size_t Size_>
73 | struct Array<detail::MaskedArray<Value_>, Size_>
74 |     : detail::MaskedArray<Array<Value_, Size_>> {
75 |     using Base = detail::MaskedArray<Array<Value_, Size_>>;
76 |     using Base::Base;
77 |     using Base::operator=;
78 |     Array(const Base &b) : Base(b) { }
79 | };
80 | 
81 | template <typename T, typename Mask>
82 | ENOKI_INLINE auto masked(T &value, const Mask &mask) {
83 |     if constexpr (std::is_same_v<Mask, bool>)
84 |         return detail::MaskedValue<T>{ value, mask };
85 |     else
86 |         return struct_support_t<T>::masked(value, mask);
87 | }
88 | 
89 | //! @}
90 | // -----------------------------------------------------------------------
91 | 
92 | NAMESPACE_END(enoki)
93 | 


--------------------------------------------------------------------------------
/include/enoki/array_round.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     enoki/array_round.h -- Fallback for nonstandard rounding modes
  3 | 
  4 |     Enoki is a C++ template library that enables transparent vectorization
  5 |     of numerical kernels using ENOKI instruction sets available on current
  6 |     processor architectures.
  7 | 
  8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
  9 | 
 10 |     All rights reserved. Use of this source code is governed by a BSD-style
 11 |     license that can be found in the LICENSE file.
 12 | */
 13 | 
 14 | #pragma once
 15 | 
 16 | #include <enoki/array_generic.h>
 17 | 
 18 | NAMESPACE_BEGIN(enoki)
 19 | 
 20 | #if defined(ENOKI_X86_64) || defined(ENOKI_X86_32)
 21 | /// RAII wrapper that saves and restores the FP Control/Status Register
 22 | template <RoundingMode Mode> struct set_rounding_mode {
 23 |     set_rounding_mode() : value(_mm_getcsr()) {
 24 |         unsigned int csr = value & ~(unsigned int) _MM_ROUND_MASK;
 25 |         switch (Mode) {
 26 |             case RoundingMode::Nearest: csr |= _MM_ROUND_NEAREST; break;
 27 |             case RoundingMode::Down: csr |= _MM_ROUND_DOWN; break;
 28 |             case RoundingMode::Up: csr |= _MM_ROUND_UP; break;
 29 |             case RoundingMode::Zero: csr |= _MM_ROUND_TOWARD_ZERO; break;
 30 |         }
 31 |         _mm_setcsr(csr);
 32 |     }
 33 | 
 34 |     ~set_rounding_mode() {
 35 |         _mm_setcsr(value);
 36 |     }
 37 | 
 38 |     unsigned int value;
 39 | };
 40 | #else
 41 | template <RoundingMode Mode> struct set_rounding_mode {
 42 |     // Don't know how to change rounding modes on this platform :(
 43 | };
 44 | #endif
 45 | 
 46 | template <typename Value_, size_t Size_, bool Approx_, RoundingMode Mode_, bool IsMask_, typename Derived_>
 47 | struct StaticArrayImpl<Value_, Size_, Approx_, Mode_, IsMask_, Derived_,
 48 |                        enable_if_t<detail::array_config<Value_, Size_, Mode_>::use_rounding_fallback_impl>>
 49 |     : StaticArrayImpl<Value_, Size_, Approx_, RoundingMode::Default, IsMask_, Derived_> {
 50 | 
 51 |     using Base = StaticArrayImpl<Value_, Size_, Approx_, RoundingMode::Default, IsMask_, Derived_>;
 52 |     using Derived = Derived_;
 53 | 
 54 |     using Base::derived;
 55 | 
 56 |     /// Rounding mode of arithmetic operations
 57 |     static constexpr RoundingMode Mode = Mode_;
 58 | 
 59 |     template <typename Arg, enable_if_t<std::is_same_v<value_t<Arg>, Value_>> = 0>
 60 |     ENOKI_INLINE StaticArrayImpl(Arg&& arg) : Base(std::forward<Arg>(arg)) { }
 61 | 
 62 |     template <typename... Args>
 63 |     ENOKI_INLINE StaticArrayImpl(Args&&... args) : Base(std::forward<Args>(args)...) { }
 64 | 
 65 |     template <typename Arg, enable_if_t<!std::is_same_v<value_t<Arg>, Value_>> = 0>
 66 |     ENOKI_NOINLINE StaticArrayImpl(Arg&& arg) {
 67 |         set_rounding_mode<Mode_> mode; (void) mode;
 68 |         using Base2 = std::conditional_t<IsMask_,
 69 |             Array<Value_, Size_, Approx_, RoundingMode::Default>,
 70 |             Packet<Value_, Size_, Approx_, RoundingMode::Default>>;
 71 |         Base::operator=(Base2(std::forward<Arg>(arg)));
 72 |     }
 73 | 
 74 |     template <typename Arg, enable_if_t<std::is_same_v<value_t<Arg>, Value_>> = 0>
 75 |     ENOKI_NOINLINE Derived& operator=(Arg&& arg) {
 76 |         Base::operator=(std::forward<Arg>(arg));
 77 |         return derived();
 78 |     }
 79 | 
 80 |     template <typename Arg, enable_if_t<!std::is_same_v<value_t<Arg>, Value_>> = 0>
 81 |     ENOKI_NOINLINE Derived& operator=(Arg&& arg) {
 82 |         set_rounding_mode<Mode_> mode; (void) mode;
 83 |         using Base2 = std::conditional_t<IsMask_,
 84 |             Array<Value_, Size_, Approx_, RoundingMode::Default>,
 85 |             Packet<Value_, Size_, Approx_, RoundingMode::Default>>;
 86 |         Base::operator=(Base2(std::forward<Arg>(arg)));
 87 |         return derived();
 88 |     }
 89 | 
 90 |     ENOKI_NOINLINE Derived add_(const Derived &a) const {
 91 |         set_rounding_mode<Mode_> mode; (void) mode;
 92 |         return Base::add_(a);
 93 |     }
 94 | 
 95 |     ENOKI_NOINLINE Derived sub_(const Derived &a) const {
 96 |         set_rounding_mode<Mode_> mode; (void) mode;
 97 |         return Base::sub_(a);
 98 |     }
 99 | 
100 |     ENOKI_NOINLINE Derived mul_(const Derived &a) const {
101 |         set_rounding_mode<Mode_> mode; (void) mode;
102 |         return Base::mul_(a);
103 |     }
104 | 
105 |     ENOKI_NOINLINE Derived div_(const Derived &a) const {
106 |         set_rounding_mode<Mode_> mode; (void) mode;
107 |         return Base::div_(a);
108 |     }
109 | 
110 |     ENOKI_NOINLINE Derived sqrt_() const {
111 |         set_rounding_mode<Mode_> mode; (void) mode;
112 |         return Base::sqrt_();
113 |     }
114 | 
115 |     ENOKI_NOINLINE Derived fmadd_(const Derived &b, const Derived &c) const {
116 |         set_rounding_mode<Mode_> mode; (void) mode;
117 |         return Base::fmadd_(b, c);
118 |     }
119 | 
120 |     ENOKI_NOINLINE Derived fmsub_(const Derived &b, const Derived &c) const {
121 |         set_rounding_mode<Mode_> mode; (void) mode;
122 |         return Base::fmsub_(b, c);
123 |     }
124 | 
125 |     ENOKI_NOINLINE Derived fnmadd_(const Derived &b, const Derived &c) const {
126 |         set_rounding_mode<Mode_> mode; (void) mode;
127 |         return Base::fnmadd_(b, c);
128 |     }
129 | 
130 |     ENOKI_NOINLINE Derived fnmsub_(const Derived &b, const Derived &c) const {
131 |         set_rounding_mode<Mode_> mode; (void) mode;
132 |         return Base::fnmsub_(b, c);
133 |     }
134 | 
135 |     ENOKI_NOINLINE Derived fmsubadd_(const Derived &b, const Derived &c) const {
136 |         set_rounding_mode<Mode_> mode; (void) mode;
137 |         return Base::fmsubadd_(b, c);
138 |     }
139 | 
140 |     ENOKI_NOINLINE Derived fmaddsub_(const Derived &b, const Derived &c) const {
141 |         set_rounding_mode<Mode_> mode; (void) mode;
142 |         return Base::fmaddsub_(b, c);
143 |     }
144 | 
145 |     ENOKI_NOINLINE Value_ hsum() const {
146 |         set_rounding_mode<Mode_> mode; (void) mode;
147 |         return Base::hsum_();
148 |     }
149 | 
150 |     ENOKI_NOINLINE Value_ hprod() const {
151 |         set_rounding_mode<Mode_> mode; (void) mode;
152 |         return Base::hprod_();
153 |     }
154 | };
155 | 
156 | NAMESPACE_END(enoki)
157 | 


--------------------------------------------------------------------------------
/include/enoki/color.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     enoki/color.h -- Color space transformations (only sRGB so far)
 3 | 
 4 |     Enoki is a C++ template library that enables transparent vectorization
 5 |     of numerical kernels using SIMD instruction sets available on current
 6 |     processor architectures.
 7 | 
 8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
 9 | 
10 |     All rights reserved. Use of this source code is governed by a BSD-style
11 |     license that can be found in the LICENSE file.
12 | */
13 | 
14 | #pragma once
15 | 
16 | #include <enoki/array.h>
17 | 
18 | NAMESPACE_BEGIN(enoki)
19 | 
20 | template <typename T> expr_t<T> linear_to_srgb(const T &x) {
21 |     using Value  = expr_t<T>;
22 |     using Mask   = mask_t<Value>;
23 |     using Scalar = scalar_t<Value>;
24 |     constexpr bool Single = std::is_same_v<Scalar, float>;
25 | 
26 |     Value r = Scalar(12.92);
27 |     Mask large_mask = x > Scalar(0.0031308);
28 | 
29 |     if (ENOKI_LIKELY(any(large_mask))) {
30 |         Value y = sqrt(x), p, q;
31 | 
32 |         if constexpr (Single) {
33 |             p = poly5(y, -0.0016829072605308378, 0.03453868659826638,
34 |                       0.7642611304733891, 2.0041169284241644,
35 |                       0.7551545191665577, -0.016202083165206348);
36 |             q = poly5(y, 4.178892964897981e-7, -0.00004375359692957097,
37 |                       0.03467195408529984, 0.6085338522168684,
38 |                       1.8970238036421054, 1.);
39 |         } else {
40 |             p = poly10(y, -3.7113872202050023e-6, -0.00021805827098915798,
41 |                        0.002531335520959116, 0.2263810267005674,
42 |                        3.0477578489880823, 15.374469584296442,
43 |                        32.44669922192121, 27.901125077137042, 8.450947414259522,
44 |                        0.5838023820686707, -0.0031151377052754843);
45 |             q = poly10(y, 2.2380622409188757e-11, -8.387527630781522e-9,
46 |                        0.00007045228641004039, 0.007244514696840552,
47 |                        0.21749170309546628, 2.575446652731678,
48 |                        13.297981743005433, 30.50364355650628, 29.70548706952188,
49 |                        10.723011300050162, 1.);
50 |         }
51 | 
52 |         masked(r, large_mask) = p / q;
53 |     }
54 | 
55 |     return r * x;
56 | }
57 | 
58 | template <typename T> expr_t<T> srgb_to_linear(const T &x) {
59 |     using Value  = expr_t<T>;
60 |     using Mask   = mask_t<Value>;
61 |     using Scalar = scalar_t<Value>;
62 |     constexpr bool Single = std::is_same_v<Scalar, float>;
63 | 
64 |     Value r = Scalar(1.0 / 12.92);
65 |     Mask large_mask = x > Scalar(0.04045);
66 | 
67 |     if (ENOKI_LIKELY(any(large_mask))) {
68 |         Value p, q;
69 | 
70 |         if constexpr (Single) {
71 |             p = poly4(x, -0.0163933279112946, -0.7386328024653209,
72 |                       -11.199318357635072, -47.46726633009393,
73 |                       -36.04572663838034);
74 |             q = poly4(x, -0.004261480793199332, -19.140923959601675,
75 |                       -59.096406619244426, -18.225745396846637, 1.);
76 |         } else {
77 |             p = poly9(x, -0.008042950896814532, -0.5489744177844188,
78 |                       -14.786385491859248, -200.19589605282445,
79 |                       -1446.951694673217, -5548.704065887224,
80 |                       -10782.158977031822, -9735.250875334352,
81 |                       -3483.4445569178347, -342.62884098034357);
82 |             q = poly9(x, -2.2132610916769585e-8, -9.646075249097724,
83 |                       -237.47722999429413, -2013.8039726540235,
84 |                       -7349.477378676199, -11916.470977597566,
85 |                       -8059.219012060384, -1884.7738197074218,
86 |                       -84.8098437770271, 1.);
87 |         }
88 | 
89 |         masked(r, large_mask) = p / q;
90 |     }
91 | 
92 |     return r * x;
93 | }
94 | 
95 | NAMESPACE_END(enoki)
96 | 


--------------------------------------------------------------------------------
/include/enoki/morton.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     enoki/morton.h -- Morton/Z-order curve encoding and decoding routines
  3 | 
  4 |     Enoki is a C++ template library that enables transparent vectorization
  5 |     of numerical kernels using SIMD instruction sets available on current
  6 |     processor architectures.
  7 | 
  8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
  9 |     Includes contributions by Sebastien Speierer
 10 | 
 11 |     All rights reserved. Use of this source code is governed by a BSD-style
 12 |     license that can be found in the LICENSE file.
 13 | */
 14 | 
 15 | #pragma once
 16 | 
 17 | #include <enoki/array.h>
 18 | 
 19 | #if defined(_MSC_VER)
 20 | #  pragma warning (push)
 21 | #  pragma warning (disable: 4310) // cast truncates constant value
 22 | #endif
 23 | 
 24 | NAMESPACE_BEGIN(enoki)
 25 | NAMESPACE_BEGIN(detail)
 26 | 
 27 | /// Generate bit masks for the functions \ref scatter_bits() and \ref gather_bits()
 28 | template <typename Value> constexpr Value morton_magic(size_t dim, size_t level) {
 29 |     size_t n_bits = sizeof(Value) * 8;
 30 |     size_t max_block_size = n_bits / dim;
 31 |     size_t block_size = std::min(size_t(1) << (level - 1), max_block_size);
 32 |     size_t count = 0;
 33 | 
 34 |     Value mask  = Value(1) << (n_bits - 1),
 35 |          value = Value(0);
 36 | 
 37 |     for (size_t i = 0; i < n_bits; ++i) {
 38 |         value >>= 1;
 39 | 
 40 |         if (count < max_block_size && (i / block_size) % dim == 0) {
 41 |             count++;
 42 |             value |= mask;
 43 |         }
 44 |     }
 45 | 
 46 |     return value;
 47 | }
 48 | 
 49 | /// Bit scatter function. \c Dimension defines the final distance between two output bits
 50 | template <size_t, typename Value, size_t Level, enable_if_t<Level == 0> = 0>
 51 | ENOKI_INLINE Value scatter_bits(Value x) { return x; }
 52 | 
 53 | template <size_t Dimension, typename Value,
 54 |           size_t Level = clog2i(sizeof(Value) * 8),
 55 |           enable_if_t<Level != 0 && (!(has_avx2 && has_x86_64) || !std::is_integral_v<Value>)> = 0>
 56 | ENOKI_INLINE Value scatter_bits(Value x) {
 57 |     using Scalar = scalar_t<Value>;
 58 | 
 59 |     constexpr Scalar magic = morton_magic<Scalar>(Dimension, Level);
 60 |     constexpr size_t shift_maybe = (1 << (Level - 1)) * (Dimension - 1);
 61 |     constexpr size_t shift = (shift_maybe < sizeof(Scalar) * 8) ? shift_maybe : 0;
 62 | 
 63 |     if constexpr (shift != 0)
 64 |         x |= sl<shift>(x);
 65 | 
 66 |     x &= magic;
 67 | 
 68 |     return scatter_bits<Dimension, Value, Level - 1>(x);
 69 | }
 70 | 
 71 | template <size_t, typename Value, size_t Level,
 72 |           enable_if_t<Level == 0> = 0>
 73 | ENOKI_INLINE Value gather_bits(Value x) { return x; }
 74 | 
 75 | /// Bit gather function. \c Dimension defines the final distance between two input bits
 76 | template <size_t Dimension, typename Value,
 77 |           size_t Level = clog2i(sizeof(Value) * 8),
 78 |           enable_if_t<Level != 0 && (!(has_avx2 && has_x86_64) || !std::is_integral_v<Value>)> = 0>
 79 | ENOKI_INLINE Value gather_bits(Value x) {
 80 |     using Scalar = scalar_t<Value>;
 81 | 
 82 |     constexpr size_t ilevel = clog2i(sizeof(Value) * 8) - Level + 1;
 83 |     constexpr Scalar magic = morton_magic<Scalar>(Dimension, ilevel);
 84 |     constexpr size_t shift_maybe = (1 << (ilevel - 1)) * (Dimension - 1);
 85 |     constexpr size_t shift = (shift_maybe < sizeof(Scalar) * 8) ? shift_maybe : 0;
 86 | 
 87 |     x &= magic;
 88 | 
 89 |     if constexpr (shift != 0)
 90 |         x |= sr<shift>(x);
 91 | 
 92 |     return gather_bits<Dimension, Value, Level - 1>(x);
 93 | }
 94 | 
 95 | #if defined(ENOKI_X86_AVX2) && defined(ENOKI_X86_64)
 96 | template <size_t Dimension, typename Value,
 97 |           enable_if_t<std::is_integral_v<Value>> = 0>
 98 | ENOKI_INLINE Value scatter_bits(Value x) {
 99 |     constexpr Value magic = morton_magic<Value>(Dimension, 1);
100 |     if constexpr (sizeof(Value) <= 4)
101 |         return Value(_pdep_u32((uint32_t) x, (uint32_t) magic));
102 |     else
103 |         return Value(_pdep_u64((uint64_t) x, (uint64_t) magic));
104 | }
105 | 
106 | template <size_t Dimension, typename Value,
107 |           enable_if_t<std::is_integral_v<Value>> = 0>
108 | ENOKI_INLINE Value gather_bits(Value x) {
109 |     constexpr Value magic = morton_magic<Value>(Dimension, 1);
110 |     if constexpr (sizeof(Value) <= 4)
111 |         return Value(_pext_u32((uint32_t) x, (uint32_t) magic));
112 |     else
113 |         return Value(_pext_u64((uint64_t) x, (uint64_t) magic));
114 | }
115 | #endif
116 | 
117 | template <typename Array, size_t Index,
118 |           enable_if_t<Index == 0> = 0>
119 | ENOKI_INLINE void morton_decode_helper(value_t<Array> value, Array &out) {
120 |     out.coeff(0) = gather_bits<Array::Size>(value);
121 | }
122 | 
123 | template <typename Array, size_t Index = array_size_v<Array> - 1,
124 |           enable_if_t<Index != 0> = 0>
125 | ENOKI_INLINE void morton_decode_helper(value_t<Array> value, Array &out) {
126 |     out.coeff(Index) = gather_bits<Array::Size>(sr<Index>(value));
127 |     morton_decode_helper<Array, Index - 1>(value, out);
128 | }
129 | 
130 | NAMESPACE_END(detail)
131 | 
132 | /// Convert a N-dimensional integer array into the Morton/Z-order curve encoding
133 | template <typename Array, size_t Index, typename Return = value_t<Array>,
134 |           enable_if_t<Index == 0> = 0>
135 | ENOKI_INLINE Return morton_encode(Array a) {
136 |     return detail::scatter_bits<Array::Size>(a.coeff(0));
137 | }
138 | 
139 | /// Convert a N-dimensional integer array into the Morton/Z-order curve encoding
140 | template <typename Array, size_t Index = array_size_v<Array> - 1,
141 |           typename Return = value_t<Array>, enable_if_t<Index != 0> = 0>
142 | ENOKI_INLINE Return morton_encode(Array a) {
143 |     static_assert(std::is_unsigned_v<scalar_t<Array>>, "morton_encode() requires unsigned arguments");
144 |     return sl<Index>(detail::scatter_bits<Array::Size>(a.coeff(Index))) |
145 |            morton_encode<Array, Index - 1>(a);
146 | }
147 | 
148 | /// Convert Morton/Z-order curve encoding into a N-dimensional integer array
149 | template <typename Array, typename Value = value_t<Array>>
150 | ENOKI_INLINE Array morton_decode(Value value) {
151 |     static_assert(std::is_unsigned_v<scalar_t<Array>>, "morton_decode() requires unsigned arguments");
152 |     Array result;
153 |     detail::morton_decode_helper(value, result);
154 |     return result;
155 | }
156 | 
157 | NAMESPACE_END(enoki)
158 | 
159 | #if defined(_MSC_VER)
160 | #  pragma warning (pop)
161 | #endif
162 | 


--------------------------------------------------------------------------------
/resources/FindSphinx.cmake:
--------------------------------------------------------------------------------
 1 | find_program(SPHINX_EXECUTABLE NAMES sphinx-build
 2 |     HINTS
 3 |     $ENV{SPHINX_DIR}
 4 |     PATH_SUFFIXES bin
 5 |     DOC "Sphinx documentation generator"
 6 | )
 7 | 
 8 | include(FindPackageHandleStandardArgs)
 9 | 
10 | find_package_handle_standard_args(Sphinx DEFAULT_MSG
11 |     SPHINX_EXECUTABLE
12 | )
13 | 
14 | mark_as_advanced(SPHINX_EXECUTABLE)
15 | 


--------------------------------------------------------------------------------
/resources/__init__.py:
--------------------------------------------------------------------------------
1 | __import__("enoki.core")
2 | 


--------------------------------------------------------------------------------
/resources/archflags_unix.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | int main(int argc, char *argv[]) {
 4 | #if defined(__AVX512DQ__)
 5 |     std::cout << "-march=skx" << std::endl;
 6 | #elif defined(__AVX512ER__)
 7 |     std::cout << "-march=knl" << std::endl;
 8 | #elif defined(__AVX2__)
 9 |     std::cout << "-mavx2" << std::endl;
10 | #elif defined(__AVX__)
11 |     std::cout << "-mavx" << std::endl;
12 | #elif defined(__SSE4_2__)
13 |     std::cout << "-msse4.2" << std::endl;
14 | #elif defined(__aarch64__)
15 |     std::cout << "-march=armv8-a+simd -mtune=cortex-a53" << std::endl;
16 | #elif defined(__arm__)
17 |     std::cout << "-march=armv7-a -mtune=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard -mfp16-format=ieee" << std::endl;
18 | #endif
19 |     return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/resources/archflags_win32.cpp:
--------------------------------------------------------------------------------
 1 | #include <intrin.h>
 2 | #include <vector>
 3 | #include <bitset>
 4 | #include <array>
 5 | #include <iostream>
 6 | 
 7 | struct Flags {
 8 |     std::bitset<32> eax;
 9 |     std::bitset<32> ebx;
10 |     std::bitset<32> ecx;
11 |     std::bitset<32> edx;
12 | 
13 |     Flags(int eax, int ebx, int ecx, int edx) :
14 |         eax(eax), ebx(ebx), ecx(ecx), edx(edx) { }
15 | };
16 | 
17 | int main(int argc, char *argv[]) {
18 |     std::array<int, 4> buffer;
19 |     std::vector<Flags> flags;
20 | 
21 |     __cpuid(buffer.data(), 0);
22 |     int nIDs = buffer[0];
23 | 
24 |     for (int i = 0; i <= nIDs; ++i) {
25 |         __cpuidex(buffer.data(), i, 0);
26 | 
27 |         flags.emplace_back(buffer[0], buffer[1], buffer[2], buffer[3]);
28 |     }
29 | 
30 |     if (flags[7].ebx[5])
31 |         std::cout << "/arch:AVX2";
32 |     else if (flags[1].ecx[28])
33 |         std::cout << "/arch:AVX";
34 | 
35 |     return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/resources/check-style.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Script to check include/test code for common pybind11 code style errors.
 4 | #
 5 | # This script currently checks for
 6 | #
 7 | # 1. use of tabs instead of spaces
 8 | # 2. MSDOS-style CRLF endings
 9 | # 3. trailing spaces
10 | # 4. missing space between keyword and parenthesis, e.g.: for(, if(, while(
11 | # 5. Missing space between right parenthesis and brace, e.g. 'for (...){'
12 | # 6. opening brace on its own line. It should always be on the same line as the
13 | #    if/while/for/do statment.
14 | # 7. Leftover markers denoting incomplete implementations/tasks
15 | #
16 | # Invoke as: tools/check-style.sh
17 | #
18 | 
19 | errors=0
20 | IFS=$'\n'
21 | found=
22 | # The mt=41 sets a red background for matched tabs:
23 | GREP_COLORS='mt=41' GREP_COLOR='41' grep $'\t' include/ src/ tests/*.{h,cpp} docs/*.rst -rn --color=always |
24 | while read f; do
25 |     if [ -z "$found" ]; then
26 |         echo -e '\033[31m\033[01mError: found tabs instead of spaces in the following files:\033[0m'
27 |         found=1
28 |         errors=1
29 |     fi
30 | 
31 |     echo "    $f"
32 | done
33 | 
34 | found=
35 | # The mt=41 sets a red background for matched MS-DOS CRLF characters
36 | GREP_COLORS='mt=41' GREP_COLOR='41' grep -IUlr $'\r' include/ src/ tests/*.{h,cpp} docs/*.rst --color=always |
37 | while read f; do
38 |     if [ -z "$found" ]; then
39 |         echo -e '\033[31m\033[01mError: found CRLF characters in the following files:\033[0m'
40 |         found=1
41 |         errors=1
42 |     fi
43 | 
44 |     echo "    $f"
45 | done
46 | 
47 | found=
48 | # The mt=41 sets a red background for matched trailing spaces
49 | GREP_COLORS='mt=41' GREP_COLOR='41' grep '[[:blank:]]\+$' include/ src/ tests/*.{h,cpp} docs/*.rst -rn --color=always |
50 | while read f; do
51 |     if [ -z "$found" ]; then
52 |         echo -e '\033[31m\033[01mError: found trailing spaces in the following files:\033[0m'
53 |         found=1
54 |         errors=1
55 |     fi
56 | 
57 |     echo "    $f"
58 | done
59 | 
60 | found=
61 | grep '\<\(if\|for\|while\|catch\)(\|){' include/ src/ tests/*.{h,cpp} -rn --color=always |
62 | while read f; do
63 |     if [ -z "$found" ]; then
64 |         echo -e '\033[31m\033[01mError: found the following coding style problems:\033[0m'
65 |         found=1
66 |         errors=1
67 |     fi
68 | 
69 |     echo "    $f"
70 | done
71 | 
72 | found=
73 | GREP_COLORS='mt=41' GREP_COLOR='41' grep '^\s*{\s*$' include/ src/ tests/*.{h,cpp} -rn --color=always |
74 | while read f; do
75 |     if [ -z "$found" ]; then
76 |         echo -e '\033[31m\033[01mError: braces should occur on the same line as the if/while/.. statement. Found issues in the following files: \033[0m'
77 |         found=1
78 |         errors=1
79 |     fi
80 | 
81 |     echo "    $f"
82 | done
83 | 
84 | found=
85 | GREP_COLORS='mt=41' GREP_COLOR='41' grep '\<\(TODO\|XXX\)' include/ src/ tests/*.{h,cpp} -rn --color=always |
86 | while read f; do
87 |     if [ -z "$found" ]; then
88 |         echo -e '\033[31m\033[01mError: Incomplete implementation markers in code. Found issues in the following files: \033[0m'
89 |         found=1
90 |         errors=1
91 |     fi
92 | 
93 |     echo "    $f"
94 | done
95 | 
96 | exit $errors
97 | 


--------------------------------------------------------------------------------
/resources/enoki_gdb.py:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | # GDB Script to improve introspection of array types when debugging software
  3 | # using Enoki. Copy this file to "~/.gdb" (creating the directory, if not
  4 | # present) and then apppend the following line to the file "~/.gdbinit"
  5 | # (again, creating it if, not already present):
  6 | ###############################################################################
  7 | # set print pretty
  8 | # source ~/.gdb/enoki_gdb.py
  9 | ###############################################################################
 10 | 
 11 | import gdb
 12 | 
 13 | simple_types = {
 14 |     'bool',
 15 |     'char', 'unsigned char',
 16 |     'short', 'unsigned short',
 17 |     'int', 'unsigned int',
 18 |     'long', 'unsigned long',
 19 |     'long long', 'unsigned long long',
 20 |     'float', 'double'
 21 | }
 22 | 
 23 | 
 24 | class EnokiIterator:
 25 |     def __init__(self, instance, size):
 26 |         self.instance = instance
 27 |         self.size = size
 28 |         self.index = 0
 29 | 
 30 |     def __iter__(self):
 31 |         return self
 32 | 
 33 |     def __next__(self):
 34 |         if self.index >= self.size:
 35 |             raise StopIteration
 36 |         result = ('[%i]' % self.index, self.instance[self.index])
 37 |         self.index += 1
 38 |         return result
 39 | 
 40 |     def next(self):
 41 |         return self.__next__()
 42 | 
 43 | 
 44 | class EnokiStaticArrayPrinter:
 45 |     def __init__(self, instance):
 46 |         self.instance = instance
 47 |         itype = self.instance.type.strip_typedefs()
 48 | 
 49 |         # Extract derived type
 50 |         if 'StaticArrayImpl' in str(itype):
 51 |             itype = itype.template_argument(3)
 52 | 
 53 |         try:
 54 |             data = self.instance['m_data']['_M_elems']
 55 |             self.data_type = data.type.strip_typedefs().target()
 56 |         except Exception:
 57 |             self.data_type = itype.template_argument(0)
 58 | 
 59 |         # Determine the size and data type
 60 |         self.size = int(str(itype.template_argument(1)))
 61 |         self.is_simple = str(self.data_type) in simple_types
 62 |         self.type_size = self.data_type.sizeof
 63 |         self.is_mask = 'Mask' in str(itype)
 64 | 
 65 |         try:
 66 |             _ = instance['k']
 67 |             self.kmask = True
 68 |         except Exception:
 69 |             self.kmask = False
 70 | 
 71 |     def entry(self, i):
 72 |         if i < 0 or i >= self.size:
 73 |             return None
 74 |         addr = int(self.instance.address) + self.type_size * i
 75 |         cmd = '*((%s *) 0x%x)' % (str(self.data_type), addr)
 76 |         return str(gdb.parse_and_eval(cmd))
 77 | 
 78 |     def children(self):
 79 |         if self.is_simple:
 80 |             return []
 81 |         else:
 82 |             return EnokiIterator(self.instance['m_data']['_M_elems'], self.size)
 83 | 
 84 |     def to_string(self):
 85 |         if self.is_simple:
 86 |             if not self.is_mask:
 87 |                 result = [self.entry(i) for i in range(self.size)]
 88 |             else:
 89 |                 if self.kmask:
 90 |                     # AVX512 mask register
 91 |                     result = list(reversed(format(int(self.instance['k']), '0%ib' % self.size)))
 92 |                 else:
 93 |                     result = [None] * self.size
 94 |                     for i in range(self.size):
 95 |                         value = self.entry(i)
 96 |                         result[i] = '0' if (value == '0' or value == 'false') else '1'
 97 |             return '[' + ', '.join(result) + ']'
 98 |         else:
 99 |             return ''
100 | 
101 | 
102 | class EnokiDynamicArrayPrinter:
103 |     def __init__(self, instance):
104 |         self.instance = instance
105 |         itype = self.instance.type.strip_typedefs()
106 |         self.size = int(str(self.instance['m_size']))
107 |         self.packet_count = int(str(self.instance['m_packets_allocated']))
108 |         self.packet_type = itype.template_argument(0)
109 |         self.packet_size = self.packet_type.sizeof
110 |         self.data = int(str(instance['m_packets']['_M_t']['_M_t']['_M_head_impl']), 0)
111 |         self.limit = 20
112 | 
113 |     def to_string(self):
114 |         values = []
115 |         for i in range(self.packet_count):
116 |             addr = int(self.data) + self.packet_size * i
117 |             cmd = '*((%s *) 0x%x)' % (str(self.packet_type), addr)
118 |             value = str(gdb.parse_and_eval(cmd))
119 |             assert value[-1] == ']'
120 |             values += value[value.rfind('[')+1:-1].split(', ')
121 |             if len(values) > self.size:
122 |                 values = values[0:self.size]
123 |                 break
124 |             if len(values) > self.limit:
125 |                 break
126 |         if len(values) > self.limit:
127 |             values = values[0:self.limit]
128 |             values.append(".. %i skipped .." % (self.size - self.limit))
129 |         return '[' + ', '.join(values) + ']'
130 | 
131 | # Static Enoki arrays
132 | regexp_1 = r'(enoki::)?(Array|Packet|Complex|Matrix|' \
133 |     'Quaternion|StaticArrayImpl)(Mask)?<.+>'
134 | 
135 | # Mitsuba 2 is one of the main users of Enoki. For convenience, also
136 | # declare its custom array types here
137 | regexp_2 = r'(mitsuba::)?(Vector|Point|Normal|Spectrum|Color)<.+>'
138 | 
139 | regexp_combined = r'^(%s)|(%s)$' % (regexp_1, regexp_2)
140 | 
141 | p = gdb.printing.RegexpCollectionPrettyPrinter("enoki")
142 | p.add_printer("static", regexp_combined, EnokiStaticArrayPrinter)
143 | p.add_printer("dynamic", r'^(enoki::)?DynamicArray(Impl)?<.+>$', EnokiDynamicArrayPrinter)
144 | 
145 | o = gdb.current_objfile()
146 | gdb.printing.register_pretty_printer(o, p)
147 | 


--------------------------------------------------------------------------------
/src/cuda/common.cuh:
--------------------------------------------------------------------------------
 1 | #if !defined(NAMESPACE_BEGIN)
 2 | #  define NAMESPACE_BEGIN(name) namespace name {
 3 | #endif
 4 | 
 5 | #if !defined(NAMESPACE_END)
 6 | #  define NAMESPACE_END(name) }
 7 | #endif
 8 | #
 9 | #if defined(_MSC_VER)
10 | #  define ENOKI_EXPORT                 __declspec(dllexport)
11 | #  define ENOKI_LIKELY(x)              x
12 | #  define ENOKI_UNLIKELY(x)            x
13 | #else
14 | #  define ENOKI_EXPORT                 __attribute__ ((visibility("default")))
15 | #  define ENOKI_LIKELY(x)              __builtin_expect(!!(x), 1)
16 | #  define ENOKI_UNLIKELY(x)            __builtin_expect(!!(x), 0)
17 | #endif
18 | 
19 | #if defined(__SSE4_2__)
20 | #  include <x86intrin.h>
21 | #endif
22 | 
23 | NAMESPACE_BEGIN(enoki)
24 | 
25 | enum EnokiType { Invalid = 0, Int8, UInt8, Int16, UInt16,
26 |                  Int32, UInt32, Int64, UInt64, Float16,
27 |                  Float32, Float64, Bool, Pointer };
28 | 
29 | #define cuda_check(err) cuda_check_impl(err, __FILE__, __LINE__)
30 | ENOKI_EXPORT extern void cuda_check_impl(CUresult errval, const char *file, const int line);
31 | ENOKI_EXPORT extern void cuda_check_impl(cudaError_t errval, const char *file, const int line);
32 | 
33 | inline uint32_t next_power_of_two(uint32_t n) {
34 |     n--;
35 |     n |= n >> 1;
36 |     n |= n >> 2;
37 |     n |= n >> 4;
38 |     n |= n >> 8;
39 |     n |= n >> 16;
40 |     n++;
41 |     return n;
42 | }
43 | 
44 | ENOKI_EXPORT extern void* cuda_malloc(size_t size);
45 | ENOKI_EXPORT extern void* cuda_managed_malloc(size_t size);
46 | ENOKI_EXPORT extern void* cuda_host_malloc(size_t size);
47 | ENOKI_EXPORT extern void cuda_free(void *p, cudaStream_t stream);
48 | ENOKI_EXPORT extern void cuda_free(void *p);
49 | ENOKI_EXPORT extern void cuda_host_free(void *p, cudaStream_t stream);
50 | ENOKI_EXPORT extern void cuda_host_free(void *p);
51 | ENOKI_EXPORT extern void cuda_malloc_trim();
52 | ENOKI_EXPORT extern void cuda_sync();
53 | ENOKI_EXPORT void cuda_eval(bool log_assembly = false);
54 | 
55 | extern std::string mem_string(size_t size);
56 | extern std::string time_string(size_t size);
57 | 
58 | struct StringHasher {
59 |     size_t operator()(const std::string& k) const {
60 | #if defined(__SSE4_2__)
61 |         const char *ptr = k.c_str(),
62 |                    *end = ptr + k.length();
63 | 
64 |         uint64_t state64 = 0;
65 |         while (ptr + 8 < end) {
66 |             state64 = _mm_crc32_u64(state64, *((uint64_t *) ptr));
67 |             ptr += 8;
68 |         }
69 |         uint32_t state32 = (uint32_t) state64;
70 |         while (ptr < end)
71 |             state32 = _mm_crc32_u8(state32, *ptr++);
72 |         return (size_t) state32;
73 | #else
74 |         return std::hash<std::string>()(k);
75 | #endif
76 |     }
77 | };
78 | 
79 | #define cuda_check_maybe_redo(expr)                                            \
80 |     for (int i = 0; i < 2; ++i) {                                              \
81 |         cudaError_t rv = expr;                                                 \
82 |         if (rv == cudaErrorMemoryAllocation && i == 0) {                       \
83 |             cuda_malloc_trim();                                                \
84 |         } else {                                                               \
85 |             cuda_check(rv);                                                    \
86 |             break;                                                             \
87 |         }                                                                      \
88 |     }                                                                          \
89 | 
90 | 
91 | NAMESPACE_END(enoki)
92 | 


--------------------------------------------------------------------------------
/src/python/complex.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "common.h"
  3 | #include <enoki/complex.h>
  4 | 
  5 | template <typename Complex>
  6 | py::class_<Complex> bind_complex(py::module &m, py::module &s, const char *name) {
  7 |     using Value  = value_t<Complex>;
  8 |     using Scalar = scalar_t<Value>;
  9 |     using Mask   = mask_t<Complex>;
 10 | 
 11 |     auto cls = py::class_<Complex>(s, name)
 12 |         .def(py::init<>())
 13 |         .def(py::init<const Scalar &>());
 14 |         if constexpr (!std::is_same_v<Value, Scalar>)
 15 |             cls.def(py::init<const Value &>());
 16 |         cls.def(py::init<const Value &, const Value &>(), "real"_a, "imag"_a)
 17 |         .def(py::self == py::self)
 18 |         .def(py::self != py::self)
 19 |         .def(py::self - py::self)
 20 |         .def(py::self + py::self)
 21 |         .def(py::self * py::self)
 22 |         .def(py::self / py::self)
 23 |         .def(-py::self)
 24 |         .def("__repr__", [](const Complex &a) -> std::string {
 25 |             if (*implicit_conversion)
 26 |                 return "";
 27 |             std::ostringstream oss;
 28 |             oss << a;
 29 |             return oss.str();
 30 |         })
 31 |         .def("__getitem__", [](const Complex &a, size_t index) {
 32 |             if (index >= 2)
 33 |                 throw py::index_error();
 34 |             return a.coeff(index);
 35 |         })
 36 |         .def("__setitem__", [](Complex &a, size_t index, const Value &value) {
 37 |             if (index >= 2)
 38 |                 throw py::index_error();
 39 |             a.coeff(index) = value;
 40 |         })
 41 |         .def_static("identity", [](size_t size) { return identity<Complex>(size); }, "size"_a = 1)
 42 |         .def_static("zero", [](size_t size) { return zero<Complex>(size); }, "size"_a = 1)
 43 |         .def_static("full", [](Scalar value, size_t size) { return full<Complex>(value, size); },
 44 |                     "value"_a, "size"_a = 1);
 45 | 
 46 |     m.def("real", [](const Complex &z) { return real(z); });
 47 |     m.def("imag", [](const Complex &z) { return imag(z); });
 48 |     m.def("norm", [](const Complex &z) { return norm(z); });
 49 |     m.def("squared_norm", [](const Complex &z) { return squared_norm(z); });
 50 |     m.def("rcp", [](const Complex &z) { return rcp(z); });
 51 |     m.def("conj", [](const Complex &z) { return conj(z); });
 52 |     m.def("exp", [](const Complex &z) { return exp(z); });
 53 |     m.def("log", [](const Complex &z) { return log(z); });
 54 |     m.def("arg", [](const Complex &z) { return arg(z); });
 55 |     m.def("pow", [](const Complex &z1, const Complex &z2) { return pow(z1, z2); });
 56 |     m.def("sqrt", [](const Complex &z) { return sqrt(z); });
 57 |     m.def("sin", [](const Complex &z) { return sin(z); });
 58 |     m.def("cos", [](const Complex &z) { return cos(z); });
 59 |     m.def("sincos", [](const Complex &z) { return sincos(z); });
 60 |     m.def("tan", [](const Complex &z) { return tan(z); });
 61 |     m.def("asin", [](const Complex &z) { return asin(z); });
 62 |     m.def("acos", [](const Complex &z) { return acos(z); });
 63 |     m.def("atan", [](const Complex &z) { return atan(z); });
 64 |     m.def("sinh", [](const Complex &z) { return sinh(z); });
 65 |     m.def("cosh", [](const Complex &z) { return cosh(z); });
 66 |     m.def("sincosh", [](const Complex &z) { return sincosh(z); });
 67 |     m.def("tanh", [](const Complex &z) { return tanh(z); });
 68 |     m.def("asinh", [](const Complex &z) { return asinh(z); });
 69 |     m.def("acosh", [](const Complex &z) { return acosh(z); });
 70 |     m.def("atanh", [](const Complex &z) { return atanh(z); });
 71 | 
 72 |     m.def("isfinite", [](const Complex &z) -> Mask { return enoki::isfinite(z); });
 73 |     m.def("isnan",    [](const Complex &z) -> Mask { return enoki::isnan(z); });
 74 |     m.def("isinf",    [](const Complex &z) -> Mask { return enoki::isinf(z); });
 75 | 
 76 |     if constexpr (is_diff_array_v<Complex>) {
 77 |         using Detached = expr_t<decltype(detach(std::declval<Complex&>()))>;
 78 | 
 79 |         m.def("detach", [](const Complex &a) -> Detached { return detach(a); });
 80 |         m.def("requires_gradient",
 81 |               [](const Complex &a) { return requires_gradient(a); },
 82 |               "array"_a);
 83 | 
 84 |         m.def("set_requires_gradient",
 85 |               [](Complex &a, bool value) { set_requires_gradient(a, value); },
 86 |               "array"_a, "value"_a = true);
 87 | 
 88 |         m.def("gradient", [](Complex &a) { return eval(gradient(a)); });
 89 |         m.def("set_gradient",
 90 |               [](Complex &a, const Detached &g, bool b) { set_gradient(a, g, b); },
 91 |               "array"_a, "gradient"_a, "backward"_a = true);
 92 | 
 93 |         m.def("graphviz", [](const Complex &a) { return graphviz(a); });
 94 | 
 95 |         m.def("set_label", [](const Complex &a, const char *label) {
 96 |             set_label(a, label);
 97 |         });
 98 |     }
 99 | 
100 |     implicitly_convertible<Value, Complex>();
101 |     if constexpr (!std::is_same_v<Scalar, Value>)
102 |         implicitly_convertible<Scalar, Complex>();
103 | 
104 |     return cls;
105 | }
106 | 


--------------------------------------------------------------------------------
/src/python/cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | extern void bind_cuda_0d(py::module&, py::module&);
 4 | extern void bind_cuda_1d(py::module&, py::module&);
 5 | extern void bind_cuda_2d(py::module&, py::module&);
 6 | extern void bind_cuda_3d(py::module&, py::module&);
 7 | extern void bind_cuda_4d(py::module&, py::module&);
 8 | extern void bind_cuda_complex(py::module&, py::module&);
 9 | extern void bind_cuda_matrix(py::module&, py::module&);
10 | extern void bind_cuda_pcg32(py::module&, py::module&);
11 | 
12 | bool *implicit_conversion = nullptr;
13 | 
14 | PYBIND11_MODULE(cuda, s) {
15 |     py::module m = py::module::import("enoki");
16 |     py::module::import("enoki.scalar");
17 | 
18 |     implicit_conversion = (bool *) py::get_shared_data("implicit_conversion");
19 | 
20 |     py::class_<Buffer<true>>(m, "GPUBuffer");
21 | 
22 |     cuda_sync();
23 |     bind_cuda_1d(m, s);
24 |     bind_cuda_0d(m, s); // after FloatC
25 |     bind_cuda_2d(m, s);
26 |     bind_cuda_3d(m, s);
27 |     bind_cuda_4d(m, s);
28 |     bind_cuda_complex(m, s);
29 |     bind_cuda_matrix(m, s);
30 |     bind_cuda_pcg32(m, s);
31 | 
32 |     m.def("cuda_eval", &cuda_eval, "log_assembly"_a = false,
33 |           py::call_guard<py::gil_scoped_release>());
34 | 
35 |     m.def("cuda_sync", &cuda_sync,
36 |           py::call_guard<py::gil_scoped_release>());
37 | 
38 |     m.def("cuda_malloc_trim", &cuda_malloc_trim);
39 | 
40 |     m.def("cuda_whos", []() { char *w = cuda_whos(); py::print(w); free(w); });
41 | 
42 |     m.def("cuda_mem_get_info", []() {
43 |         size_t free = 0, total = 0;
44 |         cuda_mem_get_info(&free, &total);
45 |         return std::make_pair(free, total);
46 |     });
47 | 
48 |     m.def("cuda_set_log_level", &cuda_set_log_level,
49 |           "Sets the current log level (0: none, 1: kernel launches, 2: +ptxas "
50 |           "statistics, 3: +ptx source, 4: +jit trace, 5: +ref counting)");
51 | 
52 |     m.def("cuda_log_level", &cuda_log_level);
53 | }
54 | 


--------------------------------------------------------------------------------
/src/python/cuda_0d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_cuda_0d(py::module& m, py::module& s) {
 4 |     auto vector0m_class = bind<Vector0mC>(m, s, "Vector0m");
 5 |     auto vector0i_class = bind<Vector0iC>(m, s, "Vector0i");
 6 |     auto vector0u_class = bind<Vector0uC>(m, s, "Vector0u");
 7 |     auto vector0f_class = bind<Vector0fC>(m, s, "Vector0f");
 8 |     auto vector0d_class = bind<Vector0dC>(m, s, "Vector0d");
 9 | 
10 |     vector0f_class
11 |         .def(py::init<const Vector0f  &>())
12 |         .def(py::init<const Vector0dC &>())
13 |         .def(py::init<const Vector0uC &>())
14 |         .def(py::init<const Vector0iC &>());
15 | 
16 |     vector0d_class
17 |         .def(py::init<const Vector0d  &>())
18 |         .def(py::init<const Vector0fC &>())
19 |         .def(py::init<const Vector0uC &>())
20 |         .def(py::init<const Vector0iC &>());
21 | 
22 |     vector0i_class
23 |         .def(py::init<const Vector0i  &>())
24 |         .def(py::init<const Vector0fC &>())
25 |         .def(py::init<const Vector0dC &>())
26 |         .def(py::init<const Vector0uC &>());
27 | 
28 |     vector0u_class
29 |         .def(py::init<const Vector0u  &>())
30 |         .def(py::init<const Vector0fC &>())
31 |         .def(py::init<const Vector0dC &>())
32 |         .def(py::init<const Vector0iC &>());
33 | }
34 | 


--------------------------------------------------------------------------------
/src/python/cuda_1d.cpp:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | #include <pybind11/functional.h>
  3 | 
  4 | void bind_cuda_1d(py::module& m, py::module& s) {
  5 |     auto mask_class = bind<MaskC>(m, s, "Mask");
  6 |     auto uint32_class = bind<UInt32C>(m, s, "UInt32");
  7 |     auto uint64_class = bind<UInt64C>(m, s, "UInt64");
  8 |     auto int32_class = bind<Int32C>(m, s, "Int32");
  9 |     auto int64_class = bind<Int64C>(m, s, "Int64");
 10 |     auto float32_class = bind<Float32C>(m, s, "Float32");
 11 |     auto float64_class = bind<Float64C>(m, s, "Float64");
 12 | 
 13 |     float32_class
 14 |         .def(py::init<const Float64C &>())
 15 |         .def(py::init<const Int32C &>())
 16 |         .def(py::init<const Int64C &>())
 17 |         .def(py::init<const UInt32C &>())
 18 |         .def(py::init<const UInt64C &>());
 19 | 
 20 |     float64_class
 21 |         .def(py::init<const Float32C &>())
 22 |         .def(py::init<const Int32C &>())
 23 |         .def(py::init<const Int64C &>())
 24 |         .def(py::init<const UInt32C &>())
 25 |         .def(py::init<const UInt64C &>());
 26 | 
 27 |     int32_class
 28 |         .def(py::init<const Float32C &>())
 29 |         .def(py::init<const Float64C &>())
 30 |         .def(py::init<const Int64C &>())
 31 |         .def(py::init<const UInt32C &>())
 32 |         .def(py::init<const UInt64C &>());
 33 | 
 34 |     int64_class
 35 |         .def(py::init<const Float32C &>())
 36 |         .def(py::init<const Float64C &>())
 37 |         .def(py::init<const Int32C &>())
 38 |         .def(py::init<const UInt32C &>())
 39 |         .def(py::init<const UInt64C &>());
 40 | 
 41 |     uint32_class
 42 |         .def(py::init<const Float32C &>())
 43 |         .def(py::init<const Float64C &>())
 44 |         .def(py::init<const Int32C &>())
 45 |         .def(py::init<const Int64C &>())
 46 |         .def(py::init<const UInt64C &>());
 47 | 
 48 |     uint64_class
 49 |         .def(py::init<const Float32C &>())
 50 |         .def(py::init<const Float64C &>())
 51 |         .def(py::init<const Int32C &>())
 52 |         .def(py::init<const Int64C &>())
 53 |         .def(py::init<const UInt32C &>());
 54 | 
 55 |     auto vector1m_class = bind<Vector1mC>(m, s, "Vector1m");
 56 |     auto vector1i_class = bind<Vector1iC>(m, s, "Vector1i");
 57 |     auto vector1u_class = bind<Vector1uC>(m, s, "Vector1u");
 58 |     auto vector1f_class = bind<Vector1fC>(m, s, "Vector1f");
 59 |     auto vector1d_class = bind<Vector1dC>(m, s, "Vector1d");
 60 | 
 61 |     vector1f_class
 62 |         .def(py::init<const Vector1f  &>())
 63 |         .def(py::init<const Vector1dC &>())
 64 |         .def(py::init<const Vector1uC &>())
 65 |         .def(py::init<const Vector1iC &>());
 66 | 
 67 |     vector1d_class
 68 |         .def(py::init<const Vector1d  &>())
 69 |         .def(py::init<const Vector1fC &>())
 70 |         .def(py::init<const Vector1uC &>())
 71 |         .def(py::init<const Vector1iC &>());
 72 | 
 73 |     vector1i_class
 74 |         .def(py::init<const Vector1i  &>())
 75 |         .def(py::init<const Vector1fC &>())
 76 |         .def(py::init<const Vector1dC &>())
 77 |         .def(py::init<const Vector1uC &>());
 78 | 
 79 |     vector1u_class
 80 |         .def(py::init<const Vector1u  &>())
 81 |         .def(py::init<const Vector1fC &>())
 82 |         .def(py::init<const Vector1dC &>())
 83 |         .def(py::init<const Vector1iC &>());
 84 | 
 85 |     m.def(
 86 |         "binary_search",
 87 |         [](uint32_t start,
 88 |            uint32_t end,
 89 |            const std::function<MaskC(UInt32C)> &pred) {
 90 |             return enoki::binary_search(start, end, pred);
 91 |         },
 92 |         "start"_a, "end"_a, "pred"_a);
 93 | 
 94 |     m.def("meshgrid", [](const Float32C &x, const Float32C &y) {
 95 |         auto result = meshgrid(x, y);
 96 |         return std::make_pair(std::move(result.x()), std::move(result.y()));
 97 |     });
 98 | 
 99 |     m.def("meshgrid", [](const Float64C &x, const Float64C &y) {
100 |         auto result = meshgrid(x, y);
101 |         return std::make_pair(std::move(result.x()), std::move(result.y()));
102 |     });
103 | 
104 |     m.def("partition", [](const UInt64C &x) {
105 |         return partition(x);
106 |     });
107 | }
108 | 


--------------------------------------------------------------------------------
/src/python/cuda_2d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_cuda_2d(py::module& m, py::module& s) {
 4 |     auto vector2m_class = bind<Vector2mC>(m, s, "Vector2m");
 5 |     auto vector2i_class = bind<Vector2iC>(m, s, "Vector2i");
 6 |     auto vector2u_class = bind<Vector2uC>(m, s, "Vector2u");
 7 |     auto vector2f_class = bind<Vector2fC>(m, s, "Vector2f");
 8 |     auto vector2d_class = bind<Vector2dC>(m, s, "Vector2d");
 9 | 
10 |     vector2f_class
11 |         .def(py::init<const Vector2f  &>())
12 |         .def(py::init<const Vector2dC &>())
13 |         .def(py::init<const Vector2uC &>())
14 |         .def(py::init<const Vector2iC &>());
15 | 
16 |     vector2d_class
17 |         .def(py::init<const Vector2d  &>())
18 |         .def(py::init<const Vector2fC &>())
19 |         .def(py::init<const Vector2uC &>())
20 |         .def(py::init<const Vector2iC &>());
21 | 
22 |     vector2i_class
23 |         .def(py::init<const Vector2i  &>())
24 |         .def(py::init<const Vector2fC &>())
25 |         .def(py::init<const Vector2dC &>())
26 |         .def(py::init<const Vector2uC &>());
27 | 
28 |     vector2u_class
29 |         .def(py::init<const Vector2u  &>())
30 |         .def(py::init<const Vector2fC &>())
31 |         .def(py::init<const Vector2dC &>())
32 |         .def(py::init<const Vector2iC &>());
33 | }
34 | 


--------------------------------------------------------------------------------
/src/python/cuda_3d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_cuda_3d(py::module& m, py::module& s) {
 4 |     auto vector3m_class = bind<Vector3mC>(m, s, "Vector3m");
 5 |     auto vector3i_class = bind<Vector3iC>(m, s, "Vector3i");
 6 |     auto vector3u_class = bind<Vector3uC>(m, s, "Vector3u");
 7 |     auto vector3f_class = bind<Vector3fC>(m, s, "Vector3f");
 8 |     auto vector3d_class = bind<Vector3dC>(m, s, "Vector3d");
 9 | 
10 |     vector3f_class
11 |         .def(py::init<const Vector3f  &>())
12 |         .def(py::init<const Vector3dC &>())
13 |         .def(py::init<const Vector3iC &>())
14 |         .def(py::init<const Vector3uC &>());
15 | 
16 |     vector3d_class
17 |         .def(py::init<const Vector3d  &>())
18 |         .def(py::init<const Vector3fC &>())
19 |         .def(py::init<const Vector3iC &>())
20 |         .def(py::init<const Vector3uC &>());
21 | 
22 |     vector3i_class
23 |         .def(py::init<const Vector3i  &>())
24 |         .def(py::init<const Vector3uC &>())
25 |         .def(py::init<const Vector3fC &>())
26 |         .def(py::init<const Vector3dC &>());
27 | 
28 |     vector3u_class
29 |         .def(py::init<const Vector3u  &>())
30 |         .def(py::init<const Vector3iC &>())
31 |         .def(py::init<const Vector3fC &>())
32 |         .def(py::init<const Vector3dC &>());
33 | }
34 | 


--------------------------------------------------------------------------------
/src/python/cuda_4d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_cuda_4d(py::module& m, py::module& s) {
 4 |     auto vector4m_class = bind<Vector4mC>(m, s, "Vector4m");
 5 |     auto vector4i_class = bind<Vector4iC>(m, s, "Vector4i");
 6 |     auto vector4u_class = bind<Vector4uC>(m, s, "Vector4u");
 7 |     auto vector4f_class = bind<Vector4fC>(m, s, "Vector4f");
 8 |     auto vector4d_class = bind<Vector4dC>(m, s, "Vector4d");
 9 | 
10 |     vector4f_class
11 |         .def(py::init<const Vector4f  &>())
12 |         .def(py::init<const Vector4dC &>())
13 |         .def(py::init<const Vector4iC &>())
14 |         .def(py::init<const Vector4uC &>());
15 | 
16 |     vector4d_class
17 |         .def(py::init<const Vector4d  &>())
18 |         .def(py::init<const Vector4fC &>())
19 |         .def(py::init<const Vector4iC &>())
20 |         .def(py::init<const Vector4uC &>());
21 | 
22 |     vector4i_class
23 |         .def(py::init<const Vector4i  &>())
24 |         .def(py::init<const Vector4uC &>())
25 |         .def(py::init<const Vector4fC &>())
26 |         .def(py::init<const Vector4dC &>());
27 | 
28 |     vector4u_class
29 |         .def(py::init<const Vector4u  &>())
30 |         .def(py::init<const Vector4iC &>())
31 |         .def(py::init<const Vector4fC &>())
32 |         .def(py::init<const Vector4dC &>());
33 | }
34 | 


--------------------------------------------------------------------------------
/src/python/cuda_autodiff.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | extern void bind_cuda_autodiff_0d(py::module&, py::module&);
 4 | extern void bind_cuda_autodiff_1d(py::module&, py::module&);
 5 | extern void bind_cuda_autodiff_2d(py::module&, py::module&);
 6 | extern void bind_cuda_autodiff_3d(py::module&, py::module&);
 7 | extern void bind_cuda_autodiff_4d(py::module&, py::module&);
 8 | extern void bind_cuda_autodiff_complex(py::module&, py::module&);
 9 | extern void bind_cuda_autodiff_matrix(py::module&, py::module&);
10 | 
11 | bool *implicit_conversion = nullptr;
12 | 
13 | PYBIND11_MODULE(cuda_autodiff, s) {
14 |     py::module m = py::module::import("enoki");
15 |     py::module::import("enoki.cuda");
16 | 
17 |     implicit_conversion = (bool *) py::get_shared_data("implicit_conversion");
18 | 
19 |     bind_cuda_autodiff_1d(m, s);
20 |     bind_cuda_autodiff_0d(m, s); // after FloatD
21 |     bind_cuda_autodiff_2d(m, s);
22 |     bind_cuda_autodiff_3d(m, s);
23 |     bind_cuda_autodiff_4d(m, s);
24 |     bind_cuda_autodiff_complex(m, s);
25 |     bind_cuda_autodiff_matrix(m, s);
26 | 
27 |     m.def("set_requires_gradient",
28 |           [](py::object o, bool value) {
29 |               throw py::type_error("set_requires_gradient(): requires a differentiable type as input!");
30 |           }, "array"_a, "value"_a = true);
31 | }
32 | 


--------------------------------------------------------------------------------
/src/python/cuda_autodiff_0d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_cuda_autodiff_0d(py::module& m, py::module& s) {
 4 |     auto vector0m_class = bind<Vector0mD>(m, s, "Vector0m");
 5 |     auto vector0i_class = bind<Vector0iD>(m, s, "Vector0i");
 6 |     auto vector0u_class = bind<Vector0uD>(m, s, "Vector0u");
 7 |     auto vector0f_class = bind<Vector0fD>(m, s, "Vector0f");
 8 |     auto vector0d_class = bind<Vector0dD>(m, s, "Vector0d");
 9 | 
10 |     vector0f_class
11 |         .def(py::init<const Vector0f  &>())
12 |         .def(py::init<const Vector0fC &>())
13 |         .def(py::init<const Vector0dD &>())
14 |         .def(py::init<const Vector0uD &>())
15 |         .def(py::init<const Vector0iD &>());
16 | 
17 |     vector0d_class
18 |         .def(py::init<const Vector0d  &>())
19 |         .def(py::init<const Vector0dC &>())
20 |         .def(py::init<const Vector0fD &>())
21 |         .def(py::init<const Vector0uD &>())
22 |         .def(py::init<const Vector0iD &>());
23 | 
24 |     vector0i_class
25 |         .def(py::init<const Vector0i  &>())
26 |         .def(py::init<const Vector0iC &>())
27 |         .def(py::init<const Vector0fD &>())
28 |         .def(py::init<const Vector0dD &>())
29 |         .def(py::init<const Vector0uD &>());
30 | 
31 |     vector0u_class
32 |         .def(py::init<const Vector0u  &>())
33 |         .def(py::init<const Vector0uC &>())
34 |         .def(py::init<const Vector0fD &>())
35 |         .def(py::init<const Vector0dD &>())
36 |         .def(py::init<const Vector0iD &>());
37 | }
38 | 


--------------------------------------------------------------------------------
/src/python/cuda_autodiff_1d.cpp:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | #include <pybind11/functional.h>
  3 | 
  4 | void bind_cuda_autodiff_1d(py::module& m, py::module& s) {
  5 |     auto mask_class = bind<mask_t<Float32D>>(m, s, "Mask");
  6 |     auto uint32_class = bind<UInt32D>(m, s, "UInt32");
  7 |     auto uint64_class = bind<UInt64D>(m, s, "UInt64");
  8 |     auto int32_class = bind<Int32D>(m, s, "Int32");
  9 |     auto int64_class = bind<Int64D>(m, s, "Int64");
 10 |     auto float32_class = bind<Float32D>(m, s, "Float32");
 11 |     auto float64_class = bind<Float64D>(m, s, "Float64");
 12 | 
 13 |     mask_class
 14 |         .def(py::init<const MaskC &>());
 15 | 
 16 |     float32_class
 17 |         .def(py::init<const Float32C &>())
 18 |         .def(py::init<const Float64D &>())
 19 |         .def(py::init<const Int64D &>())
 20 |         .def(py::init<const Int32D &>())
 21 |         .def(py::init<const UInt64D &>())
 22 |         .def(py::init<const UInt32D &>())
 23 |         .def("set_graph_simplification", [](bool value) { Float32D::set_graph_simplification_(value); })
 24 |         .def("whos", []() { py::print(Float32D::whos_()); })
 25 |         .def_static("set_log_level",
 26 |              [](int log_level) { Float32D::set_log_level_(log_level); },
 27 |              "Sets the current log level (0 == none, 1 == minimal, 2 == moderate, 3 == high, 4 == everything)")
 28 |         .def_static("log_level", []() { return Float32D::log_level_(); })
 29 |         .def_static("simplify_graph", []() { Float32D::simplify_graph_(); })
 30 |         .def_static("backward",
 31 |                     [](bool free_graph) { backward<Float32D>(free_graph); },
 32 |                     "free_graph"_a = true)
 33 |         .def_static("forward",
 34 |                     [](bool free_graph) { forward<Float32D>(free_graph); },
 35 |                     "free_graph"_a = true);
 36 | 
 37 |     float64_class
 38 |         .def(py::init<const Float64C &>())
 39 |         .def(py::init<const Float32D &>())
 40 |         .def(py::init<const Int64D &>())
 41 |         .def(py::init<const Int32D &>())
 42 |         .def(py::init<const UInt64D &>())
 43 |         .def(py::init<const UInt32D &>())
 44 |         .def("set_graph_simplification", [](bool value) { Float64D::set_graph_simplification_(value); })
 45 |         .def("whos", []() { py::print(Float64D::whos_()); })
 46 |         .def_static("set_log_level",
 47 |              [](int log_level) { Float64D::set_log_level_(log_level); },
 48 |              "Sets the current log level (0 == none, 1 == minimal, 2 == moderate, 3 == high, 4 == everything)")
 49 |         .def_static("log_level", []() { return Float64D::log_level_(); })
 50 |         .def_static("simplify_graph", []() { Float64D::simplify_graph_(); })
 51 |         .def_static("backward",
 52 |                     [](bool free_graph) { backward<Float64D>(free_graph); },
 53 |                     "free_graph"_a = true)
 54 |         .def_static("forward",
 55 |                     [](bool free_graph) { forward<Float64D>(free_graph); },
 56 |                     "free_graph"_a = true);
 57 | 
 58 |     int32_class
 59 |         .def(py::init<const Int32C &>())
 60 |         .def(py::init<const Int64D &>())
 61 |         .def(py::init<const UInt32D &>())
 62 |         .def(py::init<const UInt64D &>())
 63 |         .def(py::init<const Float32D &>())
 64 |         .def(py::init<const Float64D &>());
 65 | 
 66 |     int64_class
 67 |         .def(py::init<const Int32D &>())
 68 |         .def(py::init<const Int64C &>())
 69 |         .def(py::init<const UInt32D &>())
 70 |         .def(py::init<const UInt64D &>())
 71 |         .def(py::init<const Float32D &>())
 72 |         .def(py::init<const Float64D &>());
 73 | 
 74 |     uint32_class
 75 |         .def(py::init<const Int32D &>())
 76 |         .def(py::init<const Int64D &>())
 77 |         .def(py::init<const UInt32C &>())
 78 |         .def(py::init<const UInt64D &>())
 79 |         .def(py::init<const Float32D &>())
 80 |         .def(py::init<const Float64D &>());
 81 | 
 82 |     uint64_class
 83 |         .def(py::init<const Int32D &>())
 84 |         .def(py::init<const Int64D &>())
 85 |         .def(py::init<const UInt32D &>())
 86 |         .def(py::init<const UInt64C &>())
 87 |         .def(py::init<const Float32D &>())
 88 |         .def(py::init<const Float64D &>());
 89 | 
 90 |     auto vector1m_class = bind<Vector1mD>(m, s, "Vector1m");
 91 |     auto vector1i_class = bind<Vector1iD>(m, s, "Vector1i");
 92 |     auto vector1u_class = bind<Vector1uD>(m, s, "Vector1u");
 93 |     auto vector1f_class = bind<Vector1fD>(m, s, "Vector1f");
 94 |     auto vector1d_class = bind<Vector1dD>(m, s, "Vector1d");
 95 | 
 96 |     vector1f_class
 97 |         .def(py::init<const Vector1f  &>())
 98 |         .def(py::init<const Vector1fC &>())
 99 |         .def(py::init<const Vector1dD &>())
100 |         .def(py::init<const Vector1uD &>())
101 |         .def(py::init<const Vector1iD &>());
102 | 
103 |     vector1d_class
104 |         .def(py::init<const Vector1d  &>())
105 |         .def(py::init<const Vector1dC &>())
106 |         .def(py::init<const Vector1fD &>())
107 |         .def(py::init<const Vector1uD &>())
108 |         .def(py::init<const Vector1iD &>());
109 | 
110 |     vector1i_class
111 |         .def(py::init<const Vector1i  &>())
112 |         .def(py::init<const Vector1iC &>())
113 |         .def(py::init<const Vector1fD &>())
114 |         .def(py::init<const Vector1dD &>())
115 |         .def(py::init<const Vector1uD &>());
116 | 
117 |     vector1u_class
118 |         .def(py::init<const Vector1u  &>())
119 |         .def(py::init<const Vector1uC &>())
120 |         .def(py::init<const Vector1fD &>())
121 |         .def(py::init<const Vector1dD &>())
122 |         .def(py::init<const Vector1iD &>());
123 | 
124 |     m.def(
125 |         "binary_search",
126 |         [](uint32_t start,
127 |            uint32_t end,
128 |            const std::function<MaskD(UInt32D)> &pred) {
129 |             return enoki::binary_search(start, end, pred);
130 |         },
131 |         "start"_a, "end"_a, "pred"_a);
132 | 
133 |     m.def("meshgrid", [](const Float32D &x, const Float32D &y) {
134 |         auto result = meshgrid(x, y);
135 |         return std::make_pair(std::move(result.x()), std::move(result.y()));
136 |     });
137 | 
138 |     m.def("meshgrid", [](const Float64D &x, const Float64D &y) {
139 |         auto result = meshgrid(x, y);
140 |         return std::make_pair(std::move(result.x()), std::move(result.y()));
141 |     });
142 | 
143 |     struct Scope {
144 |         Scope(const std::string &name) : name(name) { }
145 | 
146 |         void enter() { Float32D::push_prefix_(name.c_str()); }
147 |         void exit(py::handle, py::handle, py::handle) { Float32D::pop_prefix_(); }
148 | 
149 |         std::string name;
150 |     };
151 | 
152 |     py::class_<Scope>(float32_class, "Scope")
153 |         .def(py::init<const std::string &>())
154 |         .def("__enter__", &Scope::enter)
155 |         .def("__exit__", &Scope::exit);
156 | }
157 | 


--------------------------------------------------------------------------------
/src/python/cuda_autodiff_2d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_cuda_autodiff_2d(py::module& m, py::module& s) {
 4 |     auto vector2m_class = bind<Vector2mD>(m, s, "Vector2m");
 5 |     auto vector2i_class = bind<Vector2iD>(m, s, "Vector2i");
 6 |     auto vector2u_class = bind<Vector2uD>(m, s, "Vector2u");
 7 |     auto vector2f_class = bind<Vector2fD>(m, s, "Vector2f");
 8 |     auto vector2d_class = bind<Vector2dD>(m, s, "Vector2d");
 9 | 
10 |     vector2f_class
11 |         .def(py::init<const Vector2f  &>())
12 |         .def(py::init<const Vector2fC &>())
13 |         .def(py::init<const Vector2dD &>())
14 |         .def(py::init<const Vector2uD &>())
15 |         .def(py::init<const Vector2iD &>());
16 | 
17 |     vector2d_class
18 |         .def(py::init<const Vector2d  &>())
19 |         .def(py::init<const Vector2dC &>())
20 |         .def(py::init<const Vector2fD &>())
21 |         .def(py::init<const Vector2uD &>())
22 |         .def(py::init<const Vector2iD &>());
23 | 
24 |     vector2i_class
25 |         .def(py::init<const Vector2i  &>())
26 |         .def(py::init<const Vector2iC &>())
27 |         .def(py::init<const Vector2fD &>())
28 |         .def(py::init<const Vector2dD &>())
29 |         .def(py::init<const Vector2uD &>());
30 | 
31 |     vector2u_class
32 |         .def(py::init<const Vector2u  &>())
33 |         .def(py::init<const Vector2uC &>())
34 |         .def(py::init<const Vector2fD &>())
35 |         .def(py::init<const Vector2dD &>())
36 |         .def(py::init<const Vector2iD &>());
37 | }
38 | 


--------------------------------------------------------------------------------
/src/python/cuda_autodiff_3d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_cuda_autodiff_3d(py::module& m, py::module& s) {
 4 |     auto vector3m_class = bind<Vector3mD>(m, s, "Vector3m");
 5 |     auto vector3i_class = bind<Vector3iD>(m, s, "Vector3i");
 6 |     auto vector3u_class = bind<Vector3uD>(m, s, "Vector3u");
 7 |     auto vector3f_class = bind<Vector3fD>(m, s, "Vector3f");
 8 |     auto vector3d_class = bind<Vector3dD>(m, s, "Vector3d");
 9 | 
10 |     vector3f_class
11 |         .def(py::init<const Vector3f  &>())
12 |         .def(py::init<const Vector3fC &>())
13 |         .def(py::init<const Vector3dD &>())
14 |         .def(py::init<const Vector3uD &>())
15 |         .def(py::init<const Vector3iD &>());
16 | 
17 |     vector3d_class
18 |         .def(py::init<const Vector3d  &>())
19 |         .def(py::init<const Vector3dC &>())
20 |         .def(py::init<const Vector3fD &>())
21 |         .def(py::init<const Vector3uD &>())
22 |         .def(py::init<const Vector3iD &>());
23 | 
24 |     vector3i_class
25 |         .def(py::init<const Vector3i  &>())
26 |         .def(py::init<const Vector3iC &>())
27 |         .def(py::init<const Vector3fD &>())
28 |         .def(py::init<const Vector3dD &>())
29 |         .def(py::init<const Vector3uD &>());
30 | 
31 |     vector3u_class
32 |         .def(py::init<const Vector3u  &>())
33 |         .def(py::init<const Vector3uC &>())
34 |         .def(py::init<const Vector3fD &>())
35 |         .def(py::init<const Vector3dD &>())
36 |         .def(py::init<const Vector3iD &>());
37 | }
38 | 


--------------------------------------------------------------------------------
/src/python/cuda_autodiff_4d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_cuda_autodiff_4d(py::module& m, py::module& s) {
 4 |     auto vector4m_class = bind<Vector4mD>(m, s, "Vector4m");
 5 |     auto vector4i_class = bind<Vector4iD>(m, s, "Vector4i");
 6 |     auto vector4u_class = bind<Vector4uD>(m, s, "Vector4u");
 7 |     auto vector4f_class = bind<Vector4fD>(m, s, "Vector4f");
 8 |     auto vector4d_class = bind<Vector4dD>(m, s, "Vector4d");
 9 | 
10 |     vector4f_class
11 |         .def(py::init<const Vector4f  &>())
12 |         .def(py::init<const Vector4fC &>())
13 |         .def(py::init<const Vector4dD &>())
14 |         .def(py::init<const Vector4uD &>())
15 |         .def(py::init<const Vector4iD &>());
16 | 
17 |     vector4d_class
18 |         .def(py::init<const Vector4d  &>())
19 |         .def(py::init<const Vector4dC &>())
20 |         .def(py::init<const Vector4fD &>())
21 |         .def(py::init<const Vector4uD &>())
22 |         .def(py::init<const Vector4iD &>());
23 | 
24 |     vector4i_class
25 |         .def(py::init<const Vector4i  &>())
26 |         .def(py::init<const Vector4iC &>())
27 |         .def(py::init<const Vector4fD &>())
28 |         .def(py::init<const Vector4dD &>())
29 |         .def(py::init<const Vector4uD &>());
30 | 
31 |     vector4u_class
32 |         .def(py::init<const Vector4u  &>())
33 |         .def(py::init<const Vector4uC &>())
34 |         .def(py::init<const Vector4fD &>())
35 |         .def(py::init<const Vector4dD &>())
36 |         .def(py::init<const Vector4iD &>());
37 | }
38 | 


--------------------------------------------------------------------------------
/src/python/cuda_autodiff_complex.cpp:
--------------------------------------------------------------------------------
 1 | #include "complex.h"
 2 | 
 3 | void bind_cuda_autodiff_complex(py::module& m, py::module& s) {
 4 |     bind_complex<Complex2fD>(m, s, "Complex2f")
 5 |         .def(py::init<const Complex2f &>())
 6 |         .def(py::init<const Complex2fC &>());
 7 | 
 8 |     bind_complex<Complex24fD>(m, s, "Complex24f")
 9 |         .def(py::init<const Complex24f &>())
10 |         .def(py::init<const Complex24fC &>());
11 | 
12 |     bind_complex<Complex2dD>(m, s, "Complex2d")
13 |         .def(py::init<const Complex2d &>())
14 |         .def(py::init<const Complex2dC &>());
15 | 
16 |     bind_complex<Complex24dD>(m, s, "Complex24d")
17 |         .def(py::init<const Complex24d &>())
18 |         .def(py::init<const Complex24dC &>());
19 | }
20 | 


--------------------------------------------------------------------------------
/src/python/cuda_autodiff_matrix.cpp:
--------------------------------------------------------------------------------
 1 | #include "matrix.h"
 2 | 
 3 | void bind_cuda_autodiff_matrix(py::module& m, py::module& s) {
 4 |     bind_matrix_mask<Matrix2mD>(m, s, "Matrix2m");
 5 |     bind_matrix_mask<Matrix3mD>(m, s, "Matrix3m");
 6 |     bind_matrix_mask<Matrix4mD>(m, s, "Matrix4m");
 7 |     bind_matrix_mask<Matrix44mD>(m, s, "Matrix44m");
 8 | 
 9 |     bind_matrix<Matrix2fD>(m, s, "Matrix2f");
10 |     bind_matrix<Matrix3fD>(m, s, "Matrix3f");
11 |     bind_matrix<Matrix4fD>(m, s, "Matrix4f");
12 |     bind_matrix<Matrix44fD>(m, s, "Matrix44f");
13 | 
14 |     bind_matrix<Matrix2dD>(m, s, "Matrix2d");
15 |     bind_matrix<Matrix3dD>(m, s, "Matrix3d");
16 |     bind_matrix<Matrix4dD>(m, s, "Matrix4d");
17 |     bind_matrix<Matrix44dD>(m, s, "Matrix44d");
18 | }
19 | 


--------------------------------------------------------------------------------
/src/python/cuda_complex.cpp:
--------------------------------------------------------------------------------
 1 | #include "complex.h"
 2 | 
 3 | void bind_cuda_complex(py::module& m, py::module& s) {
 4 |     bind_complex<Complex2fC>(m, s, "Complex2f")
 5 |         .def(py::init<const Complex2f &>());
 6 | 
 7 |     bind_complex<Complex24fC>(m, s, "Complex24f")
 8 |         .def(py::init<const Complex24f &>());
 9 | 
10 |     bind_complex<Complex2dC>(m, s, "Complex2d")
11 |         .def(py::init<const Complex2d &>());
12 | 
13 |     bind_complex<Complex24dC>(m, s, "Complex24d")
14 |         .def(py::init<const Complex24d &>());
15 | }
16 | 


--------------------------------------------------------------------------------
/src/python/cuda_matrix.cpp:
--------------------------------------------------------------------------------
 1 | #include "matrix.h"
 2 | 
 3 | void bind_cuda_matrix(py::module& m, py::module& s) {
 4 |     bind_matrix_mask<Matrix2mC>(m, s, "Matrix2m");
 5 |     bind_matrix_mask<Matrix3mC>(m, s, "Matrix3m");
 6 |     bind_matrix_mask<Matrix4mC>(m, s, "Matrix4m");
 7 |     bind_matrix_mask<Matrix44mC>(m, s, "Matrix44m");
 8 | 
 9 |     bind_matrix<Matrix2fC>(m, s, "Matrix2f");
10 |     bind_matrix<Matrix3fC>(m, s, "Matrix3f");
11 |     bind_matrix<Matrix4fC>(m, s, "Matrix4f");
12 |     bind_matrix<Matrix44fC>(m, s, "Matrix44f");
13 | 
14 |     bind_matrix<Matrix2dC>(m, s, "Matrix2d");
15 |     bind_matrix<Matrix3dC>(m, s, "Matrix3d");
16 |     bind_matrix<Matrix4dC>(m, s, "Matrix4d");
17 |     bind_matrix<Matrix44dC>(m, s, "Matrix44d");
18 | }
19 | 


--------------------------------------------------------------------------------
/src/python/cuda_pcg32.cpp:
--------------------------------------------------------------------------------
1 | #include "random.h"
2 | 
3 | void bind_cuda_pcg32(py::module& m, py::module& s) {
4 |     bind_pcg32<PCG32<Float32C, 1>>(m, s, "PCG32");
5 | }
6 | 


--------------------------------------------------------------------------------
/src/python/docstr.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file contains docstrings for the Python bindings.
 3 |   Do not edit! These were automatically extracted by mkdoc.py
 4 |  */
 5 | 
 6 | #define __EXPAND(x)                                      x
 7 | #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
 8 | #define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
 9 | #define __CAT1(a, b)                                     a ## b
10 | #define __CAT2(a, b)                                     __CAT1(a, b)
11 | #define __DOC1(n1)                                       __doc_##n1
12 | #define __DOC2(n1, n2)                                   __doc_##n1##_##n2
13 | #define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
14 | #define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
15 | #define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
16 | #define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
17 | #define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
18 | #define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
19 | 
20 | #if defined(__GNUG__)
21 | #pragma GCC diagnostic push
22 | #pragma GCC diagnostic ignored "-Wunused-variable"
23 | #endif
24 | 
25 | static const char *__doc_PCG32 = R"doc(PCG32 pseudorandom number generator proposed by Melissa O'Neill)doc";
26 | 
27 | static const char *__doc_PCG32_PCG32 = R"doc(Initialize the pseudorandom number generator with the seed() function)doc";
28 | 
29 | static const char *__doc_PCG32_advance =
30 | R"doc(Multi-step advance function (jump-ahead, jump-back)
31 | 
32 | The method used here is based on Brown, "Random Number Generation with
33 | Arbitrary Stride", Transactions of the American Nuclear Society (Nov.
34 | 1994). The algorithm is very similar to fast exponentiation.)doc";
35 | 
36 | static const char *__doc_PCG32_inc = R"doc()doc";
37 | 
38 | static const char *__doc_PCG32_next_float32 =
39 | R"doc(Generate a single precision floating point value on the interval [0,
40 | 1))doc";
41 | 
42 | static const char *__doc_PCG32_next_float32_2 = R"doc(Masked version of next_float32)doc";
43 | 
44 | static const char *__doc_PCG32_next_float64 =
45 | R"doc(Generate a double precision floating point value on the interval [0,
46 | 1)
47 | 
48 | Remark:
49 |     Since the underlying random number generator produces 32 bit
50 |     output, only the first 32 mantissa bits will be filled (however,
51 |     the resolution is still finer than in next_float(), which only
52 |     uses 23 mantissa bits))doc";
53 | 
54 | static const char *__doc_PCG32_next_float64_2 = R"doc(Masked version of next_float64)doc";
55 | 
56 | static const char *__doc_PCG32_next_uint32 = R"doc(Generate a uniformly distributed unsigned 32-bit random number)doc";
57 | 
58 | static const char *__doc_PCG32_next_uint32_2 = R"doc(Masked version of next_uint32)doc";
59 | 
60 | static const char *__doc_PCG32_next_uint32_bounded = R"doc(Generate a uniformly distributed integer r, where 0 <= r < bound)doc";
61 | 
62 | static const char *__doc_PCG32_next_uint64 = R"doc(Generate a uniformly distributed unsigned 64-bit random number)doc";
63 | 
64 | static const char *__doc_PCG32_next_uint64_2 = R"doc(Masked version of next_uint64)doc";
65 | 
66 | static const char *__doc_PCG32_next_uint64_bounded = R"doc(Generate a uniformly distributed integer r, where 0 <= r < bound)doc";
67 | 
68 | static const char *__doc_PCG32_operator_eq = R"doc(Equality operator)doc";
69 | 
70 | static const char *__doc_PCG32_operator_ne = R"doc(Inequality operator)doc";
71 | 
72 | static const char *__doc_PCG32_operator_sub = R"doc(Compute the distance between two PCG32 pseudorandom number generators)doc";
73 | 
74 | static const char *__doc_PCG32_seed =
75 | R"doc(Seed the pseudorandom number generator
76 | 
77 | Specified in two parts: a state initializer and a sequence selection
78 | constant (a.k.a. stream id))doc";
79 | 
80 | static const char *__doc_PCG32_shuffle =
81 | R"doc(Draw uniformly distributed permutation and permute the given container
82 | 
83 | From: Knuth, TAoCP Vol. 2 (3rd 3d), Section 3.4.2)doc";
84 | 
85 | static const char *__doc_PCG32_state = R"doc()doc";
86 | 
87 | static const char *__doc_operator_lshift = R"doc(Prints the canonical representation of a PCG32 object.)doc";
88 | 
89 | 


--------------------------------------------------------------------------------
/src/python/dynamic.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | extern void bind_dynamic_0d(py::module&, py::module&);
 4 | extern void bind_dynamic_1d(py::module&, py::module&);
 5 | extern void bind_dynamic_2d(py::module&, py::module&);
 6 | extern void bind_dynamic_3d(py::module&, py::module&);
 7 | extern void bind_dynamic_4d(py::module&, py::module&);
 8 | extern void bind_dynamic_complex(py::module&, py::module&);
 9 | extern void bind_dynamic_matrix(py::module&, py::module&);
10 | extern void bind_dynamic_pcg32(py::module&, py::module&);
11 | 
12 | bool *implicit_conversion = nullptr;
13 | 
14 | PYBIND11_MODULE(dynamic, s) {
15 |     py::module m = py::module::import("enoki");
16 |     py::module::import("enoki.scalar");
17 | 
18 |     implicit_conversion = (bool *) py::get_shared_data("implicit_conversion");
19 | 
20 |     bind_dynamic_1d(m, s);
21 |     bind_dynamic_0d(m, s); // after FloatX
22 |     bind_dynamic_2d(m, s);
23 |     bind_dynamic_3d(m, s);
24 |     bind_dynamic_4d(m, s);
25 |     bind_dynamic_complex(m, s);
26 |     bind_dynamic_matrix(m, s);
27 |     bind_dynamic_pcg32(m, s);
28 | }
29 | 


--------------------------------------------------------------------------------
/src/python/dynamic_0d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_dynamic_0d(py::module& m, py::module& s) {
 4 |     auto vector0m_class = bind<Vector0mX>(m, s, "Vector0m");
 5 |     auto vector0i_class = bind<Vector0iX>(m, s, "Vector0i");
 6 |     auto vector0u_class = bind<Vector0uX>(m, s, "Vector0u");
 7 |     auto vector0f_class = bind<Vector0fX>(m, s, "Vector0f");
 8 |     auto vector0d_class = bind<Vector0dX>(m, s, "Vector0d");
 9 | 
10 |     vector0f_class
11 |         .def(py::init<const Vector0f  &>())
12 |         .def(py::init<const Vector0dX &>())
13 |         .def(py::init<const Vector0uX &>())
14 |         .def(py::init<const Vector0iX &>());
15 | 
16 |     vector0d_class
17 |         .def(py::init<const Vector0d  &>())
18 |         .def(py::init<const Vector0fX &>())
19 |         .def(py::init<const Vector0uX &>())
20 |         .def(py::init<const Vector0iX &>());
21 | 
22 |     vector0i_class
23 |         .def(py::init<const Vector0i  &>())
24 |         .def(py::init<const Vector0fX &>())
25 |         .def(py::init<const Vector0dX &>())
26 |         .def(py::init<const Vector0uX &>());
27 | 
28 |     vector0u_class
29 |         .def(py::init<const Vector0u  &>())
30 |         .def(py::init<const Vector0fX &>())
31 |         .def(py::init<const Vector0dX &>())
32 |         .def(py::init<const Vector0iX &>());
33 | }
34 | 


--------------------------------------------------------------------------------
/src/python/dynamic_1d.cpp:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | #include <pybind11/functional.h>
  3 | 
  4 | void bind_dynamic_1d(py::module& m, py::module& s) {
  5 |     auto mask_class = bind<MaskX>(m, s, "Mask");
  6 |     auto mask64_class = bind<Mask64X>(m, s, "Mask64");
  7 |     auto uint32_class = bind<UInt32X>(m, s, "UInt32");
  8 |     auto uint64_class = bind<UInt64X>(m, s, "UInt64");
  9 |     auto int32_class = bind<Int32X>(m, s, "Int32");
 10 |     auto int64_class = bind<Int64X>(m, s, "Int64");
 11 |     auto float32_class = bind<Float32X>(m, s, "Float32");
 12 |     auto float64_class = bind<Float64X>(m, s, "Float64");
 13 | 
 14 |     mask_class
 15 |         .def(py::init<const Mask64X &>());
 16 | 
 17 |     mask64_class
 18 |         .def(py::init<const MaskX &>());
 19 | 
 20 |     implicitly_convertible<Mask64X, MaskX>();
 21 |     implicitly_convertible<MaskX, Mask64X>();
 22 | 
 23 |     float32_class
 24 |         .def(py::init<const Float64X &>())
 25 |         .def(py::init<const Int32X &>())
 26 |         .def(py::init<const Int64X &>())
 27 |         .def(py::init<const UInt32X &>())
 28 |         .def(py::init<const UInt64X &>());
 29 | 
 30 |     float64_class
 31 |         .def(py::init<const Float32X &>())
 32 |         .def(py::init<const Int32X &>())
 33 |         .def(py::init<const Int64X &>())
 34 |         .def(py::init<const UInt32X &>())
 35 |         .def(py::init<const UInt64X &>());
 36 | 
 37 |     int32_class
 38 |         .def(py::init<const Float32X &>())
 39 |         .def(py::init<const Float64X &>())
 40 |         .def(py::init<const Int64X &>())
 41 |         .def(py::init<const UInt32X &>())
 42 |         .def(py::init<const UInt64X &>());
 43 | 
 44 |     int64_class
 45 |         .def(py::init<const Float32X &>())
 46 |         .def(py::init<const Float64X &>())
 47 |         .def(py::init<const Int32X &>())
 48 |         .def(py::init<const UInt32X &>())
 49 |         .def(py::init<const UInt64X &>());
 50 | 
 51 |     uint32_class
 52 |         .def(py::init<const Float32X &>())
 53 |         .def(py::init<const Float64X &>())
 54 |         .def(py::init<const Int32X &>())
 55 |         .def(py::init<const Int64X &>())
 56 |         .def(py::init<const UInt64X &>());
 57 | 
 58 |     uint64_class
 59 |         .def(py::init<const Float32X &>())
 60 |         .def(py::init<const Float64X &>())
 61 |         .def(py::init<const Int32X &>())
 62 |         .def(py::init<const Int64X &>())
 63 |         .def(py::init<const UInt32X &>());
 64 | 
 65 |     auto vector1m_class = bind<Vector1mX>(m, s, "Vector1m");
 66 |     auto vector1i_class = bind<Vector1iX>(m, s, "Vector1i");
 67 |     auto vector1u_class = bind<Vector1uX>(m, s, "Vector1u");
 68 |     auto vector1f_class = bind<Vector1fX>(m, s, "Vector1f");
 69 |     auto vector1d_class = bind<Vector1dX>(m, s, "Vector1d");
 70 | 
 71 |     vector1f_class
 72 |         .def(py::init<const Vector1f  &>())
 73 |         .def(py::init<const Vector1dX &>())
 74 |         .def(py::init<const Vector1uX &>())
 75 |         .def(py::init<const Vector1iX &>());
 76 | 
 77 |     vector1d_class
 78 |         .def(py::init<const Vector1d  &>())
 79 |         .def(py::init<const Vector1fX &>())
 80 |         .def(py::init<const Vector1uX &>())
 81 |         .def(py::init<const Vector1iX &>());
 82 | 
 83 |     vector1i_class
 84 |         .def(py::init<const Vector1i  &>())
 85 |         .def(py::init<const Vector1fX &>())
 86 |         .def(py::init<const Vector1dX &>())
 87 |         .def(py::init<const Vector1uX &>());
 88 | 
 89 |     vector1u_class
 90 |         .def(py::init<const Vector1u  &>())
 91 |         .def(py::init<const Vector1fX &>())
 92 |         .def(py::init<const Vector1dX &>())
 93 |         .def(py::init<const Vector1iX &>());
 94 | 
 95 |     m.def(
 96 |         "binary_search",
 97 |         [](uint32_t start,
 98 |            uint32_t end,
 99 |            const std::function<MaskX (const UInt32X &)> &pred) {
100 |             return enoki::binary_search(start, end, pred);
101 |         },
102 |         "start"_a, "end"_a, "pred"_a);
103 | 
104 |     m.def("meshgrid", [](const Float32X &x, const Float32X &y) {
105 |         auto result = meshgrid(x, y);
106 |         return std::make_pair(std::move(result.x()), std::move(result.y()));
107 |     });
108 | 
109 |     m.def("meshgrid", [](const Float64X &x, const Float64X &y) {
110 |         auto result = meshgrid(x, y);
111 |         return std::make_pair(std::move(result.x()), std::move(result.y()));
112 |     });
113 | }
114 | 


--------------------------------------------------------------------------------
/src/python/dynamic_2d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_dynamic_2d(py::module& m, py::module& s) {
 4 |     auto vector2m_class = bind<Vector2mX>(m, s, "Vector2m");
 5 |     auto vector2i_class = bind<Vector2iX>(m, s, "Vector2i");
 6 |     auto vector2u_class = bind<Vector2uX>(m, s, "Vector2u");
 7 |     auto vector2f_class = bind<Vector2fX>(m, s, "Vector2f");
 8 |     auto vector2d_class = bind<Vector2dX>(m, s, "Vector2d");
 9 | 
10 |     vector2f_class
11 |         .def(py::init<const Vector2f  &>())
12 |         .def(py::init<const Vector2dX &>())
13 |         .def(py::init<const Vector2uX &>())
14 |         .def(py::init<const Vector2iX &>());
15 | 
16 |     vector2d_class
17 |         .def(py::init<const Vector2d  &>())
18 |         .def(py::init<const Vector2fX &>())
19 |         .def(py::init<const Vector2uX &>())
20 |         .def(py::init<const Vector2iX &>());
21 | 
22 |     vector2i_class
23 |         .def(py::init<const Vector2i  &>())
24 |         .def(py::init<const Vector2fX &>())
25 |         .def(py::init<const Vector2dX &>())
26 |         .def(py::init<const Vector2uX &>());
27 | 
28 |     vector2u_class
29 |         .def(py::init<const Vector2u  &>())
30 |         .def(py::init<const Vector2fX &>())
31 |         .def(py::init<const Vector2dX &>())
32 |         .def(py::init<const Vector2iX &>());
33 | }
34 | 


--------------------------------------------------------------------------------
/src/python/dynamic_3d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_dynamic_3d(py::module& m, py::module& s) {
 4 |     auto vector3m_class = bind<Vector3mX>(m, s, "Vector3m");
 5 |     auto vector3i_class = bind<Vector3iX>(m, s, "Vector3i");
 6 |     auto vector3u_class = bind<Vector3uX>(m, s, "Vector3u");
 7 |     auto vector3f_class = bind<Vector3fX>(m, s, "Vector3f");
 8 |     auto vector3d_class = bind<Vector3dX>(m, s, "Vector3d");
 9 | 
10 |     vector3f_class
11 |         .def(py::init<const Vector3f  &>())
12 |         .def(py::init<const Vector3dX &>())
13 |         .def(py::init<const Vector3uX &>())
14 |         .def(py::init<const Vector3iX &>());
15 | 
16 |     vector3d_class
17 |         .def(py::init<const Vector3d  &>())
18 |         .def(py::init<const Vector3fX &>())
19 |         .def(py::init<const Vector3uX &>())
20 |         .def(py::init<const Vector3iX &>());
21 | 
22 |     vector3i_class
23 |         .def(py::init<const Vector3i  &>())
24 |         .def(py::init<const Vector3fX &>())
25 |         .def(py::init<const Vector3dX &>())
26 |         .def(py::init<const Vector3uX &>());
27 | 
28 |     vector3u_class
29 |         .def(py::init<const Vector3u  &>())
30 |         .def(py::init<const Vector3fX &>())
31 |         .def(py::init<const Vector3dX &>())
32 |         .def(py::init<const Vector3iX &>());
33 | }
34 | 


--------------------------------------------------------------------------------
/src/python/dynamic_4d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_dynamic_4d(py::module& m, py::module& s) {
 4 |     auto vector4m_class = bind<Vector4mX>(m, s, "Vector4m");
 5 |     auto vector4i_class = bind<Vector4iX>(m, s, "Vector4i");
 6 |     auto vector4u_class = bind<Vector4uX>(m, s, "Vector4u");
 7 |     auto vector4f_class = bind<Vector4fX>(m, s, "Vector4f");
 8 |     auto vector4d_class = bind<Vector4dX>(m, s, "Vector4d");
 9 | 
10 |     vector4f_class
11 |         .def(py::init<const Vector4f  &>())
12 |         .def(py::init<const Vector4dX &>())
13 |         .def(py::init<const Vector4uX &>())
14 |         .def(py::init<const Vector4iX &>());
15 | 
16 |     vector4d_class
17 |         .def(py::init<const Vector4d  &>())
18 |         .def(py::init<const Vector4fX &>())
19 |         .def(py::init<const Vector4uX &>())
20 |         .def(py::init<const Vector4iX &>());
21 | 
22 |     vector4i_class
23 |         .def(py::init<const Vector4i  &>())
24 |         .def(py::init<const Vector4fX &>())
25 |         .def(py::init<const Vector4dX &>())
26 |         .def(py::init<const Vector4uX &>());
27 | 
28 |     vector4u_class
29 |         .def(py::init<const Vector4u  &>())
30 |         .def(py::init<const Vector4fX &>())
31 |         .def(py::init<const Vector4dX &>())
32 |         .def(py::init<const Vector4iX &>());
33 | }
34 | 


--------------------------------------------------------------------------------
/src/python/dynamic_complex.cpp:
--------------------------------------------------------------------------------
 1 | #include "complex.h"
 2 | 
 3 | void bind_dynamic_complex(py::module& m, py::module& s) {
 4 |     bind_complex<Complex2fX>(m, s, "Complex2f")
 5 |         .def(py::init<const Complex2f &>());
 6 | 
 7 |     bind_complex<Complex24fX>(m, s, "Complex24f")
 8 |         .def(py::init<const Complex24f &>());
 9 | 
10 |     bind_complex<Complex2dX>(m, s, "Complex2d")
11 |         .def(py::init<const Complex2d &>());
12 | 
13 |     bind_complex<Complex24dX>(m, s, "Complex24d")
14 |         .def(py::init<const Complex24d &>());
15 | }
16 | 


--------------------------------------------------------------------------------
/src/python/dynamic_matrix.cpp:
--------------------------------------------------------------------------------
 1 | #include "matrix.h"
 2 | 
 3 | void bind_dynamic_matrix(py::module& m, py::module& s) {
 4 |     bind_matrix_mask<Matrix2mX>(m, s, "Matrix2m");
 5 |     bind_matrix_mask<Matrix3mX>(m, s, "Matrix3m");
 6 |     bind_matrix_mask<Matrix4mX>(m, s, "Matrix4m");
 7 |     bind_matrix_mask<Matrix44mX>(m, s, "Matrix44m");
 8 | 
 9 |     bind_matrix<Matrix2fX>(m, s, "Matrix2f");
10 |     bind_matrix<Matrix3fX>(m, s, "Matrix3f");
11 |     bind_matrix<Matrix4fX>(m, s, "Matrix4f");
12 |     bind_matrix<Matrix44fX>(m, s, "Matrix44f");
13 | 
14 |     bind_matrix<Matrix2dX>(m, s, "Matrix2d");
15 |     bind_matrix<Matrix3dX>(m, s, "Matrix3d");
16 |     bind_matrix<Matrix4dX>(m, s, "Matrix4d");
17 |     bind_matrix<Matrix44dX>(m, s, "Matrix44d");
18 | }
19 | 


--------------------------------------------------------------------------------
/src/python/dynamic_pcg32.cpp:
--------------------------------------------------------------------------------
1 | #include "random.h"
2 | 
3 | void bind_dynamic_pcg32(py::module& m, py::module& s) {
4 |     bind_pcg32<PCG32<Float32X, 1>>(m, s, "PCG32");
5 | }
6 | 


--------------------------------------------------------------------------------
/src/python/main.cpp:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | #include <pybind11/functional.h>
  3 | 
  4 | bool __implicit_conversion = false;
  5 | 
  6 | bool allclose_py(const py::object &a, const py::object &b,
  7 |                  const py::float_ &rtol, const py::float_ &atol,
  8 |                  bool equal_nan) {
  9 |     const char *tp_name_a = a.ptr()->ob_type->tp_name,
 10 |                *tp_name_b = b.ptr()->ob_type->tp_name;
 11 | 
 12 |     ssize_t la = PyObject_Length(a.ptr()),
 13 |             lb = PyObject_Length(b.ptr());
 14 | 
 15 |     bool num_a     = PyNumber_Check(a.ptr()) && la == -1,
 16 |          num_b     = PyNumber_Check(b.ptr()) && lb == -1,
 17 |          enoki_a   = strncmp(tp_name_a, "enoki.", 6) == 0,
 18 |          enoki_b   = strncmp(tp_name_b, "enoki.", 6) == 0,
 19 |          ndarray_a = strcmp(tp_name_a, "numpy.ndarray") == 0,
 20 |          ndarray_b = strcmp(tp_name_b, "numpy.ndarray") == 0;
 21 | 
 22 |     if (la == -1 || lb == -1)
 23 |         PyErr_Clear();
 24 | 
 25 |     if (enoki_a && (ndarray_b || num_b))
 26 |         return allclose_py(a, a.get_type()(b), rtol, atol, equal_nan);
 27 |     else if (enoki_b && (ndarray_a || num_a))
 28 |         return allclose_py(b.get_type()(a), b, rtol, atol, equal_nan);
 29 | 
 30 |     if (la != lb && !(((num_a || la == 1) && lb > 0) || ((num_b || lb == 1) && la > 0)))
 31 |         throw std::runtime_error("enoki.allclose(): length mismatch!");
 32 | 
 33 |     if ((enoki_a && enoki_b) || (num_a && num_b)) {
 34 |         py::module ek = py::module::import("enoki");
 35 | 
 36 |         py::object abs        = ek.attr("abs"),
 37 |                    eq         = ek.attr("eq"),
 38 |                    isnan      = ek.attr("isnan"),
 39 |                    isinf      = ek.attr("isinf"),
 40 |                    full       = ek.attr("full"),
 41 |                    all_nested = ek.attr("all_nested");
 42 | 
 43 |         py::object lhs = abs(a - b),
 44 |                    rhs = (num_b ? atol + abs(b) * rtol
 45 |                                 : full(b.get_type(), atol) + abs(b) * rtol);
 46 | 
 47 |         py::object cond =
 48 |             py::reinterpret_steal<py::object>(PyObject_RichCompare(lhs.ptr(), rhs.ptr(), Py_LE));
 49 | 
 50 |         if (!cond)
 51 |             throw py::error_already_set();
 52 | 
 53 |         cond = cond | (isinf(a) & isinf(b));
 54 | 
 55 |         if (equal_nan)
 56 |             cond = cond | (isnan(a) & isnan(b));
 57 | 
 58 |         return py::cast<bool>(all_nested(cond));
 59 |     } else if (la >= 0) {
 60 |         for (size_t i = 0; i < (size_t) la; ++i) {
 61 |             py::int_ key(i);
 62 |             py::object ai = num_a ? a : a[key],
 63 |                        bi = num_b ? b : b[key];
 64 |             if (!allclose_py(ai, bi, rtol, atol, equal_nan))
 65 |                 return false;
 66 |         }
 67 |     } else {
 68 |         throw std::runtime_error("enoki.allclose(): unsupported type!");
 69 |     }
 70 | 
 71 |     return true;
 72 | }
 73 | 
 74 | bool is_enoki_type(py::handle h) {
 75 |     return PyType_Check(h.ptr()) &&
 76 |            strncmp(((PyTypeObject *) h.ptr())->tp_name, "enoki.", 6) == 0;
 77 | }
 78 | 
 79 | PYBIND11_MODULE(core, m_) {
 80 |     ENOKI_MARK_USED(m_);
 81 |     py::module m = py::module::import("enoki");
 82 | 
 83 |     m.attr("__version__") = ENOKI_VERSION;
 84 |     py::set_shared_data("implicit_conversion", &__implicit_conversion);
 85 | 
 86 |     py::class_<Buffer<false>>(m, "CPUBuffer");
 87 | 
 88 |     m.def("empty",
 89 |         [](py::handle h, size_t size) {
 90 |             if (!is_enoki_type(h) && size == 1)
 91 |                 return h();
 92 |             else
 93 |                 return h.attr("empty")(size);
 94 |         },
 95 |         "type"_a, "size"_a = 1);
 96 | 
 97 |     m.def("zero",
 98 |         [](py::handle h, size_t size) {
 99 |             if (!is_enoki_type(h) && size == 1)
100 |                 return h(0);
101 |             else
102 |                 return h.attr("zero")(size);
103 |         },
104 |         "type"_a, "size"_a = 1);
105 | 
106 |     m.def("arange",
107 |         [](py::handle h, size_t size) {
108 |             if (!is_enoki_type(h) && size == 1)
109 |                 return h(0);
110 |             else
111 |                 return h.attr("arange")(size);
112 |         },
113 |         "type"_a, "size"_a = 1);
114 | 
115 |     m.def("full",
116 |         [](py::handle h, py::handle value, size_t size) {
117 |             if (!is_enoki_type(h) && size == 1)
118 |                 return h(value);
119 |             else
120 |                 return h.attr("full")(value, size);
121 |         },
122 |         "type"_a, "value"_a, "size"_a = 1);
123 | 
124 |     m.def("linspace",
125 |         [](py::handle h, py::handle start, py::handle end, size_t size) {
126 |             if (!is_enoki_type(h))
127 |                 return h(start);
128 |             else
129 |                 return h.attr("linspace")(start, end, size);
130 |         },
131 |         "type"_a, "start"_a, "end"_a, "size"_a = 1);
132 | 
133 |     m.def("allclose", &allclose_py,
134 |         "a"_a, "b"_a, "rtol"_a = 1e-5, "atol"_a = 1e-8,
135 |         "equal_nan"_a = false
136 |     );
137 | 
138 |     m.attr("pi") = M_PI;
139 |     m.attr("e") = M_E;
140 |     m.attr("inf") = std::numeric_limits<float>::infinity();
141 |     m.attr("nan") = std::numeric_limits<float>::quiet_NaN();
142 | }
143 | 


--------------------------------------------------------------------------------
/src/python/quat.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | #include <enoki/quaternion.h>
 4 | 
 5 | template <typename Quat>
 6 | py::class_<Quat> bind_quaternion(py::module &m, py::module &s, const char *name) {
 7 |     using Value  = value_t<Quat>;
 8 |     using Scalar = scalar_t<Quat>;
 9 |     using Mask   = mask_t<Quat>;
10 | 
11 |     auto cls = py::class_<Quat>(s, name)
12 |         .def(py::init<>())
13 |         .def(py::init<const Value &>(), "w"_a)
14 |         .def(py::init<const Value &, const Value &, const Value &, const Value &>(), "x"_a, "y"_a, "z"_a, "w"_a)
15 |         .def(py::self == py::self)
16 |         .def(py::self != py::self)
17 |         .def(py::self - py::self)
18 |         .def(py::self + py::self)
19 |         .def(py::self * Value())
20 |         .def(py::self / Value())
21 |         .def(py::self * py::self)
22 |         .def(py::self / py::self)
23 |         .def(-py::self)
24 |         .def("__repr__", [](const Quat &a) -> std::string {
25 |             if (*implicit_conversion)
26 |                 return "";
27 |             std::ostringstream oss;
28 |             oss << a;
29 |             return oss.str();
30 |         })
31 |         .def("__getitem__", [](const Quat &a, size_t index) {
32 |             if (index >= 4)
33 |                 throw py::index_error();
34 |             return a.coeff(index);
35 |         })
36 |         .def("__setitem__", [](Quat &a, size_t index, const Value &value) {
37 |             if (index >= 4)
38 |                 throw py::index_error();
39 |             a.coeff(index) = value;
40 |         })
41 |         .def("__setitem__", [](Quat &a, const mask_t<Value> &m, const Quat &b) {
42 |             a[m] = b;
43 |         })
44 |         .def_static("identity", [](size_t size) { return identity<Quat>(size); }, "size"_a = 1)
45 |         .def_static("zero", [](size_t size) { return zero<Quat>(size); }, "size"_a = 1)
46 |         .def_static("full", [](Scalar value, size_t size) { return full<Quat>(value, size); },
47 |                     "value"_a, "size"_a = 1);
48 | 
49 |     m.def("real", [](const Quat &a) { return real(a); });
50 |     m.def("imag", [](const Quat &a) { return imag(a); });
51 |     m.def("norm", [](const Quat &a) { return norm(a); });
52 |     m.def("squared_norm", [](const Quat &a) { return squared_norm(a); });
53 |     m.def("rcp", [](const Quat &a) { return rcp(a); });
54 |     m.def("normalize", [](const Quat &a) { return normalize(a); });
55 |     m.def("dot", [](const Quat &a, const Quat &b) { return dot(a, b); });
56 | 
57 |     m.def("abs", [](const Quat &a) { return abs(a); });
58 |     m.def("sqrt", [](const Quat &a) { return sqrt(a); });
59 |     m.def("exp", [](const Quat &a) { return exp(a); });
60 |     m.def("log", [](const Quat &a) { return log(a); });
61 |     m.def("pow", [](const Quat &a, const Quat &b) { return pow(a, b); });
62 | 
63 |     cls.def_property("x", [](const Quat &a) { return a.x(); },
64 |                           [](Quat &a, const Value &v) { a.x() = v; });
65 |     cls.def_property("y", [](const Quat &a) { return a.y(); },
66 |                           [](Quat &a, const Value &v) { a.y() = v; });
67 |     cls.def_property("z", [](const Quat &a) { return a.z(); },
68 |                           [](Quat &a, const Value &v) { a.z() = v; });
69 |     cls.def_property("w", [](const Quat &a) { return a.w(); },
70 |                           [](Quat &a, const Value &v) { a.w() = v; });
71 | 
72 |     m.def("isfinite", [](const Quat &a) -> Mask { return enoki::isfinite(a); });
73 |     m.def("isnan",    [](const Quat &a) -> Mask { return enoki::isnan(a); });
74 |     m.def("isinf",    [](const Quat &a) -> Mask { return enoki::isinf(a); });
75 | 
76 |     using Vector3f = Array<Value, 3>;
77 |     using Matrix4f = Matrix<Value, 4>;
78 | 
79 |     m.def("slerp",
80 |           [](const Quat &a, const Quat &b, const Value &t) {
81 |               return slerp(a, b, t);
82 |           },
83 |           "a"_a, "b"_a, "t"_a);
84 | 
85 |     m.def("quat_to_euler", [](const Quat &q) { return quat_to_euler<Vector3f>(q); });
86 |     m.def("quat_to_matrix", [](const Quat &q) { return quat_to_matrix<Matrix4f>(q); });
87 |     m.def("matrix_to_quat", [](const Matrix4f &m) { return matrix_to_quat(m); });
88 | 
89 |     m.def("rotate", [](const Vector3f &axis, const Value &angle) {
90 |         return rotate<Quat>(axis, angle);
91 |     }, "axis"_a, "angle"_a);
92 | 
93 |     return cls;
94 | }
95 | 


--------------------------------------------------------------------------------
/src/python/random.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <enoki/random.h>
 4 | #include "common.h"
 5 | #include "docstr.h"
 6 | 
 7 | #define D(...) DOC(__VA_ARGS__)
 8 | 
 9 | template <typename PCG32>
10 | void bind_pcg32(py::module &m, py::module s, const char *name) {
11 |     using UInt64 = typename PCG32::UInt64;
12 |     using Mask = mask_t<typename PCG32::Float32>;
13 | 
14 |     py::class_<PCG32>(s, name, D(PCG32))
15 |         .def(py::init<UInt64, UInt64>(),
16 |              "initstate"_a = PCG32_DEFAULT_STATE,
17 |              "initseq"_a = PCG32_DEFAULT_STREAM,
18 |              D(PCG32, PCG32))
19 |         .def("seed", &PCG32::seed,
20 |              "initstate"_a = PCG32_DEFAULT_STATE,
21 |              "initseq"_a = PCG32_DEFAULT_STREAM,
22 |              D(PCG32, seed))
23 |         .def(py::self - py::self, D(PCG32, operator, sub))
24 |         .def(py::self == py::self)
25 |         .def(py::self != py::self)
26 |         .def("advance", &PCG32::advance, D(PCG32, advance))
27 |         .def("next_uint32",
28 |              [](PCG32 &pcg) { return pcg.next_uint32(); },
29 |              D(PCG32, next_uint32))
30 |         .def("next_uint32",
31 |              [](PCG32 &pcg, const Mask &mask) {
32 |                  return pcg.next_uint32(mask);
33 |              },
34 |              "mask"_a, D(PCG32, next_uint32, 2))
35 |         .def("next_uint64",
36 |              [](PCG32 &pcg) { return pcg.next_uint64(); },
37 |              D(PCG32, next_uint64))
38 |         .def("next_uint64",
39 |              [](PCG32 &pcg, const Mask &mask) {
40 |                  return pcg.next_uint64(mask);
41 |              },
42 |              "mask"_a, D(PCG32, next_uint64, 2))
43 |         .def("next_uint32_bounded",
44 |              [](PCG32 &pcg, uint32_t bound) {
45 |                  return pcg.next_uint32_bounded(bound);
46 |              },
47 |              "bound"_a, D(PCG32, next_uint32_bounded))
48 |         .def("next_uint32_bounded",
49 |              [](PCG32 &pcg, uint32_t bound, const Mask &mask) {
50 |                  return pcg.next_uint32_bounded(bound, mask);
51 |              },
52 |              "bound"_a, "mask"_a)
53 |         .def("next_uint64_bounded",
54 |              [](PCG32 &pcg, uint64_t bound) {
55 |                  return pcg.next_uint64_bounded(bound);
56 |              },
57 |              "bound"_a, D(PCG32, next_uint64_bounded))
58 |         .def("next_uint64_bounded",
59 |              [](PCG32 &pcg, uint64_t bound, const Mask &mask) {
60 |                  return pcg.next_uint64_bounded(bound, mask);
61 |              },
62 |              "bound"_a, "mask"_a)
63 |         .def("next_float32",
64 |              [](PCG32 &pcg) { return pcg.next_float32(); },
65 |              D(PCG32, next_float32))
66 |         .def("next_float32",
67 |              [](PCG32 &pcg, const Mask &mask) {
68 |                  return pcg.next_float32(mask);
69 |              },
70 |              "mask"_a, D(PCG32, next_float32, 2))
71 |         .def_readwrite("state", &PCG32::state)
72 |         .def_readwrite("inc", &PCG32::inc)
73 |         .def("__repr__", [](const PCG32 &pcg) {
74 |             std::ostringstream oss;
75 |             oss << "PCG32[state=" << pcg.state << ", inc=" << pcg.inc << "]";
76 |             return oss.str();
77 |         });
78 | }
79 | 


--------------------------------------------------------------------------------
/src/python/scalar_0d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_scalar_0d(py::module& m, py::module& s) {
 4 |     auto vector0m_class = bind<Vector0m>(m, s, "Vector0m");
 5 |     auto vector0i_class = bind<Vector0i>(m, s, "Vector0i");
 6 |     auto vector0u_class = bind<Vector0u>(m, s, "Vector0u");
 7 |     auto vector0f_class = bind<Vector0f>(m, s, "Vector0f");
 8 |     auto vector0d_class = bind<Vector0d>(m, s, "Vector0d");
 9 | 
10 |     vector0f_class
11 |         .def(py::init<const Vector0d &>())
12 |         .def(py::init<const Vector0u &>())
13 |         .def(py::init<const Vector0i &>());
14 | 
15 |     vector0d_class
16 |         .def(py::init<const Vector0f &>())
17 |         .def(py::init<const Vector0u &>())
18 |         .def(py::init<const Vector0i &>());
19 | 
20 |     vector0i_class
21 |         .def(py::init<const Vector0f &>())
22 |         .def(py::init<const Vector0d &>())
23 |         .def(py::init<const Vector0u &>());
24 | 
25 |     vector0u_class
26 |         .def(py::init<const Vector0f &>())
27 |         .def(py::init<const Vector0d &>())
28 |         .def(py::init<const Vector0i &>());
29 | }
30 | 


--------------------------------------------------------------------------------
/src/python/scalar_1d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | #include <pybind11/functional.h>
 3 | 
 4 | void bind_scalar_1d(py::module& m, py::module& s) {
 5 |     auto vector1m_class = bind<Vector1m>(m, s, "Vector1m");
 6 |     auto vector1i_class = bind<Vector1i>(m, s, "Vector1i");
 7 |     auto vector1u_class = bind<Vector1u>(m, s, "Vector1u");
 8 |     auto vector1f_class = bind<Vector1f>(m, s, "Vector1f");
 9 |     auto vector1d_class = bind<Vector1d>(m, s, "Vector1d");
10 | 
11 |     vector1f_class
12 |         .def(py::init<const Vector1d &>())
13 |         .def(py::init<const Vector1u &>())
14 |         .def(py::init<const Vector1i &>());
15 | 
16 |     vector1d_class
17 |         .def(py::init<const Vector1f &>())
18 |         .def(py::init<const Vector1u &>())
19 |         .def(py::init<const Vector1i &>());
20 | 
21 |     vector1i_class
22 |         .def(py::init<const Vector1f &>())
23 |         .def(py::init<const Vector1d &>())
24 |         .def(py::init<const Vector1u &>());
25 | 
26 |     vector1u_class
27 |         .def(py::init<const Vector1f &>())
28 |         .def(py::init<const Vector1d &>())
29 |         .def(py::init<const Vector1i &>());
30 | 
31 |     m.def(
32 |         "binary_search",
33 |         [](uint32_t start,
34 |            uint32_t end,
35 |            const std::function<bool(uint32_t)> &pred) {
36 |             return enoki::binary_search(start, end, pred);
37 |         },
38 |         "start"_a, "end"_a, "pred"_a);
39 | }
40 | 


--------------------------------------------------------------------------------
/src/python/scalar_2d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_scalar_2d(py::module& m, py::module& s) {
 4 |     auto vector2m_class = bind<Vector2m>(m, s, "Vector2m");
 5 |     auto vector2i_class = bind<Vector2i>(m, s, "Vector2i");
 6 |     auto vector2u_class = bind<Vector2u>(m, s, "Vector2u");
 7 |     auto vector2f_class = bind<Vector2f>(m, s, "Vector2f");
 8 |     auto vector2d_class = bind<Vector2d>(m, s, "Vector2d");
 9 | 
10 |     vector2f_class
11 |         .def(py::init<const Vector2d &>())
12 |         .def(py::init<const Vector2u &>())
13 |         .def(py::init<const Vector2i &>());
14 | 
15 |     vector2d_class
16 |         .def(py::init<const Vector2f &>())
17 |         .def(py::init<const Vector2u &>())
18 |         .def(py::init<const Vector2i &>());
19 | 
20 |     vector2i_class
21 |         .def(py::init<const Vector2f &>())
22 |         .def(py::init<const Vector2d &>())
23 |         .def(py::init<const Vector2u &>());
24 | 
25 |     vector2u_class
26 |         .def(py::init<const Vector2f &>())
27 |         .def(py::init<const Vector2d &>())
28 |         .def(py::init<const Vector2i &>());
29 | }
30 | 


--------------------------------------------------------------------------------
/src/python/scalar_3d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | void bind_scalar_3d(py::module& m, py::module& s) {
 4 |     auto vector3m_class = bind<Vector3m>(m, s, "Vector3m");
 5 |     auto vector3i_class = bind<Vector3i>(m, s, "Vector3i");
 6 |     auto vector3u_class = bind<Vector3u>(m, s, "Vector3u");
 7 |     auto vector3f_class = bind<Vector3f>(m, s, "Vector3f");
 8 |     auto vector3d_class = bind<Vector3d>(m, s, "Vector3d");
 9 | 
10 |     vector3f_class
11 |         .def(py::init<const Vector3d &>())
12 |         .def(py::init<const Vector3u &>())
13 |         .def(py::init<const Vector3i &>());
14 | 
15 |     vector3d_class
16 |         .def(py::init<const Vector3f &>())
17 |         .def(py::init<const Vector3u &>())
18 |         .def(py::init<const Vector3i &>());
19 | 
20 |     vector3i_class
21 |         .def(py::init<const Vector3f &>())
22 |         .def(py::init<const Vector3d &>())
23 |         .def(py::init<const Vector3u &>());
24 | 
25 |     vector3u_class
26 |         .def(py::init<const Vector3f &>())
27 |         .def(py::init<const Vector3d &>())
28 |         .def(py::init<const Vector3i &>());
29 | }
30 | 


--------------------------------------------------------------------------------
/src/python/scalar_4d.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | 
 4 | void bind_scalar_4d(py::module& m, py::module& s) {
 5 |     auto vector4m_class = bind<Vector4m>(m, s, "Vector4m");
 6 |     auto vector4i_class = bind<Vector4i>(m, s, "Vector4i");
 7 |     auto vector4u_class = bind<Vector4u>(m, s, "Vector4u");
 8 |     auto vector4f_class = bind<Vector4f>(m, s, "Vector4f");
 9 |     auto vector4d_class = bind<Vector4d>(m, s, "Vector4d");
10 | 
11 |     vector4f_class
12 |         .def(py::init<const Vector4d &>())
13 |         .def(py::init<const Vector4u &>())
14 |         .def(py::init<const Vector4i &>());
15 | 
16 |     vector4d_class
17 |         .def(py::init<const Vector4f &>())
18 |         .def(py::init<const Vector4u &>())
19 |         .def(py::init<const Vector4i &>());
20 | 
21 |     vector4i_class
22 |         .def(py::init<const Vector4f &>())
23 |         .def(py::init<const Vector4d &>())
24 |         .def(py::init<const Vector4u &>());
25 | 
26 |     vector4u_class
27 |         .def(py::init<const Vector4f &>())
28 |         .def(py::init<const Vector4d &>())
29 |         .def(py::init<const Vector4i &>());
30 | }
31 | 


--------------------------------------------------------------------------------
/src/python/scalar_complex.cpp:
--------------------------------------------------------------------------------
 1 | #include "complex.h"
 2 | 
 3 | void bind_scalar_complex(py::module& m, py::module& s) {
 4 |     bind_complex<Complex2f>(m, s, "Complex2f");
 5 |     bind_complex<Complex24f>(m, s, "Complex24f");
 6 | 
 7 |     bind_complex<Complex2d>(m, s, "Complex2d");
 8 |     bind_complex<Complex24d>(m, s, "Complex24d");
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/python/scalar_matrix.cpp:
--------------------------------------------------------------------------------
 1 | #include "matrix.h"
 2 | #include <enoki/transform.h>
 3 | 
 4 | void bind_scalar_matrix(py::module& m, py::module& s) {
 5 |     bind_matrix_mask<Matrix2m>(m, s, "Matrix2m");
 6 |     bind_matrix_mask<Matrix3m>(m, s, "Matrix3m");
 7 |     bind_matrix_mask<Matrix4m>(m, s, "Matrix4m");
 8 |     bind_matrix_mask<Matrix44m>(m, s, "Matrix44m");
 9 | 
10 |     bind_matrix<Matrix2f>(m, s, "Matrix2f");
11 |     bind_matrix<Matrix3f>(m, s, "Matrix3f");
12 |     bind_matrix<Matrix4f>(m, s, "Matrix4f");
13 |     bind_matrix<Matrix44f>(m, s, "Matrix44f");
14 | 
15 |     bind_matrix<Matrix2d>(m, s, "Matrix2d");
16 |     bind_matrix<Matrix3d>(m, s, "Matrix3d");
17 |     bind_matrix<Matrix4d>(m, s, "Matrix4d");
18 |     bind_matrix<Matrix44d>(m, s, "Matrix44d");
19 | 
20 |     bind_matrix_mask<Matrix41m>(m, s, "Matrix41m");
21 |     bind_matrix<Matrix41f>(m, s, "Matrix41f");
22 |     bind_matrix<Matrix41d>(m, s, "Matrix41d");
23 | 
24 |     m.def("transform_decompose", [](const Matrix4f &m) { return transform_decompose(m); });
25 |     m.def("transform_compose",
26 |           [](const Matrix3f &m, const Quaternion<Float32> &q,
27 |              const Array<Float32, 3> &v) { return transform_compose(m, q, v); });
28 | }
29 | 


--------------------------------------------------------------------------------
/src/python/scalar_pcg32.cpp:
--------------------------------------------------------------------------------
1 | #include "random.h"
2 | 
3 | void bind_scalar_pcg32(py::module& m, py::module& s) {
4 |     bind_pcg32<PCG32<float, 1>>(m, s, "PCG32");
5 | }
6 | 


--------------------------------------------------------------------------------
/src/python/scalar_quat.cpp:
--------------------------------------------------------------------------------
1 | #include "quat.h"
2 | 
3 | void bind_scalar_quaternion(py::module& m, py::module& s) {
4 |     bind_quaternion<Quaternion4f>(m, s, "Quaternion4f");
5 |     bind_quaternion<Quaternion4d>(m, s, "Quaternion4d");
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/call.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     tests/complex.cpp -- tests vectorized function calls
  3 | 
  4 |     Enoki is a C++ template library that enables transparent vectorization
  5 |     of numerical kernels using SIMD instruction sets available on current
  6 |     processor architectures.
  7 | 
  8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
  9 | 
 10 |     All rights reserved. Use of this source code is governed by a BSD-style
 11 |     license that can be found in the LICENSE file.
 12 | */
 13 | 
 14 | #include "test.h"
 15 | #include <enoki/dynamic.h>
 16 | #include "ray.h"
 17 | #include <enoki/stl.h>
 18 | 
 19 | struct Test;
 20 | struct TestChild;
 21 | 
 22 | using Int32P = Array<int>;
 23 | using TestP = Array<const Test*, Int32P::Size>;
 24 | using ChildP = Array<const TestChild*, Int32P::Size>;
 25 | 
 26 | using TestX  = DynamicArray<TestP>;
 27 | using Int32X = DynamicArray<Int32P>;
 28 | 
 29 | using TestPMask = mask_t<TestP>;
 30 | using TestXMask = mask_t<TestX>;
 31 | 
 32 | using FloatP    = Packet<float>;
 33 | using Vector3f  = Array<float, 3>;
 34 | using Vector3fP = Array<FloatP, 3>;
 35 | using Ray3fP    = Ray<Vector3fP>;
 36 | 
 37 | 
 38 | struct Test {
 39 |     ENOKI_CALL_SUPPORT_FRIEND()
 40 | 
 41 |     Test(int32_t value) : value(value) { }
 42 |     virtual ~Test() { }
 43 | 
 44 |     // Vectorized function (accepts a mask, which is ignored here)
 45 |     virtual Int32P func1(Int32P i, TestPMask /* unused */) const { return i + value; }
 46 |     virtual Int32X func1(Int32X i, TestXMask /* unused */) const { return i + value; }
 47 | 
 48 |     // Vectorized function (accepts a mask, which is ignored here)
 49 |     virtual void func2(Int32P &i, TestPMask mask) const { i[mask] += value; }
 50 | 
 51 |     bool func3() const { return value == 20; }
 52 | 
 53 |     std::pair<Int32P, Int32P> func4(TestPMask) const { return std::make_pair(value, value+1); }
 54 | 
 55 |     Ray3fP make_ray(TestPMask) const { return Ray3fP(Vector3f(1, 1, 1), Vector3f(1, 2, 3));}
 56 | 
 57 | protected:
 58 |     int32_t value;
 59 | };
 60 | 
 61 | struct TestChild : public Test {
 62 |     TestChild() : Test(42) { }
 63 | 
 64 |     bool is_child() const { return value == 42; }
 65 | };
 66 | 
 67 | // Allow Enoki arrays containing pointers to transparently forward function
 68 | // calls (with the appropriate masks).
 69 | ENOKI_CALL_SUPPORT_BEGIN(Test)
 70 | ENOKI_CALL_SUPPORT_METHOD(func1)
 71 | ENOKI_CALL_SUPPORT_METHOD(func2)
 72 | ENOKI_CALL_SUPPORT_METHOD(func3)
 73 | ENOKI_CALL_SUPPORT_GETTER(get_value, value)
 74 | ENOKI_CALL_SUPPORT_METHOD(func4)
 75 | ENOKI_CALL_SUPPORT_METHOD(make_ray)
 76 | ENOKI_CALL_SUPPORT_END(Test)
 77 | 
 78 | ENOKI_CALL_SUPPORT_BEGIN(TestChild)
 79 | ENOKI_CALL_SUPPORT_METHOD(is_child)
 80 | ENOKI_CALL_SUPPORT_END(TestChild)
 81 | 
 82 | 
 83 | ENOKI_TEST(test01_call) {
 84 |     size_t offset = std::min((size_t) 2, TestP::Size-1);
 85 |     Test *a = new Test(10);
 86 |     Test *b = new Test(20);
 87 | 
 88 |     TestP pointers(a);
 89 |     pointers.coeff(offset) = b;
 90 | 
 91 |     Int32P index = arange<Int32P>();
 92 |     Int32P ref = arange<Int32P>() + 10;
 93 |     if (offset < Int32P::Size)
 94 |         ref.coeff(offset) += 10;
 95 | 
 96 |     Int32P result = pointers->func1(index);
 97 |     assert(result == ref);
 98 | 
 99 |     Int32P ref2 = 10;
100 |     if (offset < Int32P::Size)
101 |         ref2.coeff(offset) += 10;
102 | 
103 |     assert(pointers->get_value() == ref2);
104 | 
105 |     std::pair<Int32P, Int32P> result2 = pointers->func4();
106 |     assert(result2.first == ref2);
107 |     assert(result2.second == ref2+1);
108 | 
109 |     TestX pointers_x;
110 |     Int32X index_x;
111 |     set_slices(pointers_x, TestP::Size);
112 |     set_slices(index_x, TestP::Size);
113 |     packet(pointers_x, 0) = pointers;
114 |     packet(index_x, 0) = index;
115 |     Int32X result_x = pointers_x->func1(index_x);
116 |     assert(packet(result_x, 0) == ref);
117 | 
118 |     pointers->func2(index);
119 |     assert(index == ref);
120 | 
121 |     auto mask = mask_t<TestP>(pointers->func3());
122 |     assert(mask == eq(pointers, b));
123 | 
124 |     /* The following should not crash */
125 |     pointers.coeff(0) = nullptr;
126 |     pointers->func3();
127 | 
128 |     delete a;
129 |     delete b;
130 | }
131 | 
132 | 
133 | ENOKI_TEST(test02_reinterpret_pointer_array) {
134 |     using Mask = mask_t<ChildP>;
135 |     Test *a = new Test(1);
136 |     Test *b = new TestChild();
137 | 
138 |     TestP objects(b);
139 |     objects[std::min((size_t) 2, TestP::Size-1)] = a;
140 | 
141 |     auto children = reinterpret_array<ChildP>(objects);
142 |     // is_child returns an Array of bools, need to cast to a mask type for the
143 |     // comparison to be correct.
144 |     assert(all(Mask(children->is_child()) == eq(objects, b)));
145 | 
146 |     delete a;
147 |     delete b;
148 | }
149 | 
150 | ENOKI_TEST(test03_call_with_structure) {
151 |     Test *a = new Test(1);
152 |     TestP objects(a);
153 |     Vector3fP t = objects->make_ray()(1);
154 |     assert(all_nested(eq(t, Vector3f(2, 3, 4))));
155 |     delete a;
156 | }
157 | 


--------------------------------------------------------------------------------
/tests/color.cpp:
--------------------------------------------------------------------------------
 1 | #include "test.h"
 2 | #include <enoki/color.h>
 3 | 
 4 | ENOKI_TEST_FLOAT(test01_linear_to_srgb) {
 5 |     test::probe_accuracy<T>(
 6 |         [](const T &a) -> T { return linear_to_srgb(a); },
 7 |         [](double value) {
 8 |             auto branch1 = 12.92 * value;
 9 |             auto branch2 = 1.055 * std::pow(value, 1.0 / 2.4) - 0.055;
10 |             return select(value <= 0.0031308, branch1, branch2);
11 |         },
12 |         Value(0), Value(1),
13 |         60,
14 |         false
15 |     );
16 | }
17 | 
18 | ENOKI_TEST_FLOAT(test02_srgb_to_linear) {
19 |     test::probe_accuracy<T>(
20 |         [](const T &a) -> T { return srgb_to_linear(a); },
21 |         [](double value) {
22 |             auto branch1 = (1.0 / 12.92) * value;
23 |             auto branch2 = std::pow((value + 0.055) * (1.0 / 1.055), 2.4);
24 |             return select(value <= 0.04045, branch1, branch2);
25 |         },
26 |         Value(0), Value(1),
27 |         60,
28 |         false
29 |     );
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/tests/complex.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     tests/complex.cpp -- tests complex numbers and quaternions
  3 | 
  4 |     Enoki is a C++ template library that enables transparent vectorization
  5 |     of numerical kernels using SIMD instruction sets available on current
  6 |     processor architectures.
  7 | 
  8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
  9 | 
 10 |     All rights reserved. Use of this source code is governed by a BSD-style
 11 |     license that can be found in the LICENSE file.
 12 | */
 13 | 
 14 | #include "test.h"
 15 | #include <enoki/complex.h>
 16 | #include <enoki/quaternion.h>
 17 | #include <enoki/transform.h>
 18 | #include <enoki/dynamic.h>
 19 | 
 20 | using Cf = Complex<double>;
 21 | using Qf = Quaternion<double>;
 22 | using V3 = Array<double, 3>;
 23 | 
 24 | ENOKI_TEST(test00_complex_str) {
 25 |     assert(to_string(Cf(1.0)) == "1 + 0i");
 26 |     assert(to_string(Cf(1.0, 2.0)) == "1 + 2i");
 27 |     assert(to_string(conj(Cf(1.0, 2.0))) == "1 - 2i");
 28 | }
 29 | 
 30 | ENOKI_TEST(test01_quat_str) {
 31 |     assert(to_string(Qf(2.0, 3.0, 4.0, 1.0)) == "1 + 2i + 3j + 4k");
 32 |     assert(to_string(conj(Qf(2.0, 3.0, 4.0, 1.0))) == "1 - 2i - 3j - 4k");
 33 | }
 34 | 
 35 | ENOKI_TEST(test02_complex_mult) {
 36 |     assert(to_string(Cf(1.0, 2.0) * Cf(-4.f, 3.0)) == "-10 - 5i");
 37 | }
 38 | 
 39 | ENOKI_TEST(test03_quat_mult) {
 40 |     assert(to_string(Qf(1.0, 2.0, -1.0, 3.0) * Qf(-4.f, 3.0, 5.f, -2.f)) == "-3 - 1i + 4j + 28k");
 41 | }
 42 | 
 43 | ENOKI_TEST(test04_complex_rcp) {
 44 |     auto a = Cf(2.f, 1.f);
 45 |     assert(to_string(rcp(a)) == "0.4 - 0.2i");
 46 |     assert(abs(a/a - Cf(1.f)) < 1e-5);
 47 | }
 48 | 
 49 | ENOKI_TEST(test05_quat_rcp) {
 50 |     auto a = normalize(Qf(1.f, 2.f, 3.f, 4.f));
 51 |     auto b = rcp(a);
 52 |     auto c = a*b - Qf(1.f);
 53 |     assert(abs(c) < 1e-5);
 54 |     assert(abs(a/a - Qf(1.f)) < 1e-5);
 55 | }
 56 | 
 57 | ENOKI_TEST(test06_complex_decomp) {
 58 |     assert(real(Cf(1, 2)) == 1);
 59 |     assert(imag(Cf(1, 2)) == 2);
 60 | }
 61 | 
 62 | ENOKI_TEST(test07_quat_decomp) {
 63 |     assert(real(Qf(1, 2, 3, 4)) == 4);
 64 |     assert(imag(Qf(1, 2, 3, 4)) == V3(1, 2, 3));
 65 | }
 66 | 
 67 | ENOKI_TEST(test08_complex_exp) {
 68 |     assert(abs(exp(Cf(1, 2)) - Cf(-1.1312, 2.47173)) < 1e-5);
 69 | }
 70 | 
 71 | ENOKI_TEST(test09_quat_exp) {
 72 |     assert(abs(exp(Qf(2, 3, 4, 1)) - Qf(-0.78956, -1.18434, -1.57912, 1.69392)) < 1e-5);
 73 | }
 74 | 
 75 | ENOKI_TEST(test10_complex_log) {
 76 |     assert(abs(log(Cf(1, 2)) - Cf(0.804719, 1.10715)) < 1e-5);
 77 | }
 78 | 
 79 | ENOKI_TEST(test11_quat_log) {
 80 |     assert(abs(log(Qf(2, 3, 4, 1)) - Qf(0.51519, 0.772785, 1.03038, 1.7006)) < 1e-5);
 81 | }
 82 | 
 83 | ENOKI_TEST(test12_complex_sqrt) {
 84 |     assert(abs(sqrt(Cf(1, 2)) - Cf(1.27202, 0.786151)) < 1e-6);
 85 | }
 86 | 
 87 | ENOKI_TEST(test13_quat_sqrt) {
 88 |     assert(abs(sqrt(Qf(2, 3, 4, 1)) - Qf(0.555675, 0.833512, 1.11135, 1.79961)) < 1e-5);
 89 | }
 90 | 
 91 | ENOKI_TEST(test14_complex_sin_cos_tan) {
 92 |     assert(abs(sin(Cf(1, 2)) - Cf(3.16578, 1.9596)) < 1e-5);
 93 |     assert(abs(cos(Cf(1, 2)) - Cf(2.03272, -3.0519)) < 1e-5);
 94 |     assert(abs(tan(Cf(1, 2)) - Cf(0.0338128, 1.01479)) < 1e-5);
 95 |     auto sc = sincos(Cf(1, 2));
 96 |     assert(abs(sc.first - Cf(3.16578, 1.9596)) < 1e-5);
 97 |     assert(abs(sc.second - Cf(2.03272, - 3.0519)) < 1e-5);
 98 | }
 99 | 
100 | ENOKI_TEST(test15_complex_sinh_cosh_tanh) {
101 |     assert(abs(sinh(Cf(1, 2)) - Cf(-0.489056, 1.40312)) < 1e-5);
102 |     assert(abs(cosh(Cf(1, 2)) - Cf(-0.642148, 1.06861)) < 1e-5);
103 |     assert(abs(tanh(Cf(1, 2)) - Cf(1.16674, -0.243458)) < 1e-5);
104 |     auto sc = sincosh(Cf(1, 2));
105 |     assert(abs(sc.first - Cf(-0.489056, 1.40312)) < 1e-5);
106 |     assert(abs(sc.second - Cf(-0.642148, 1.06861)) < 1e-5);
107 | }
108 | 
109 | ENOKI_TEST(test16_complex_asin_acos_atan) {
110 |     assert(abs(asin(Cf(1, 2)) - Cf(0.427079, 1.52857)) < 1e-5);
111 |     assert(abs(acos(Cf(1, 2)) - Cf(1.14372, -1.52857)) < 1e-5);
112 |     assert(abs(atan(Cf(1, 2)) - Cf(1.33897, 0.402359)) < 1e-5);
113 | }
114 | 
115 | ENOKI_TEST(test17_complex_asinh_acosh_atanh) {
116 |     assert(abs(asinh(Cf(1, 2)) - Cf(1.46935, 1.06344)) < 1e-5);
117 |     assert(abs(acosh(Cf(1, 2)) - Cf(1.52857, 1.14372)) < 1e-5);
118 |     assert(abs(atanh(Cf(1, 2)) - Cf(0.173287, 1.1781)) < 1e-5);
119 | }
120 | 
121 | using FloatP = Packet<float>;
122 | using FloatX = DynamicArray<FloatP>;
123 | using Quaternion4f = Quaternion<float>;
124 | using Quaternion4X = Quaternion<FloatX>;
125 | using Matrix4X  = Matrix<FloatX, 4>;
126 | using Matrix4f  = Matrix<float, 4>;
127 | using Matrix4fP = Matrix<FloatP, 4>;
128 | using Vector3f  = Array<float, 3>;
129 | using Vector4f  = Array<float, 4>;
130 | 
131 | Matrix4X slerp_matrix(const Quaternion4X &x, const Quaternion4X &y, float t) {
132 |     return vectorize([t](auto &&x, auto &&y) { return quat_to_matrix<Matrix4fP>(slerp(x, y, t)); }, x, y);
133 | };
134 | 
135 | Quaternion4X to_quat(const Matrix4X &m) {
136 |     return vectorize([](auto &&m) { return matrix_to_quat(m); }, m);
137 | };
138 | 
139 | ENOKI_TEST(test18_complex_vectorize_scalar) {
140 |     Quaternion4f a = normalize(Quaternion4f(1, 2, 3, 4));
141 |     Quaternion4f b = normalize(Quaternion4f(0, 0, 0, 1));
142 | 
143 |     Quaternion4X x, y;
144 |     set_slices(x, 1);
145 |     set_slices(y, 1);
146 |     slice(x, 0) = a;
147 |     slice(y, 0) = b;
148 |     auto tmp0 = slerp_matrix(x, y, 0.5f);
149 |     auto tmp1 = to_quat(tmp0);
150 |     Quaternion4f result = slice(tmp1, 0);
151 |     Quaternion4f ref = normalize(a+b);
152 |     assert(abs(result - ref) < 1e-5f);
153 | }
154 | 
155 | ENOKI_TEST(test19_rotation) {
156 |     auto axis = normalize(Vector3f(1.f, 2.f, 3.f));
157 |     Vector4f input(0.8f, 0.3f, 0.2f, 0.0f);
158 |     float angle = 0.5f;
159 | 
160 |     auto quat1 = rotate<Quaternion4f>(axis, angle);
161 |     auto r1 = Vector4f(quat1 * Quaternion4f(input) * conj(quat1));
162 | 
163 |     auto mtx2 = rotate<Matrix4f>(axis, angle);
164 |     auto r2 = mtx2 * input;
165 | 
166 |     auto mtx1 = quat_to_matrix<Matrix4f>(quat1);
167 |     auto r3 = mtx1 * input;
168 | 
169 |     auto quat2 = matrix_to_quat(mtx2);
170 |     auto r4 = Vector4f(quat2 * Quaternion4f(input) * conj(quat2));
171 | 
172 |     assert(norm(r1-r2) < 1e-6f);
173 |     assert(norm(r1-r3) < 1e-6f);
174 |     assert(norm(r1-r4) < 1e-6f);
175 | }
176 | 
177 | ENOKI_TEST(test20_sincos_arg) {
178 |     auto result = sincos_arg_diff(Cf(-1.01264771f, 1.1261553f), Cf(-0.70017226f, 1.24072149f));
179 |     assert(abs(result.first - 0.2168644f) < 1e-6f);
180 |     assert(abs(result.second - 0.97620174f) < 1e-6f);
181 | 
182 |     result = sincos_arg_diff(Cf(-0.08012004f, 0.86251237f), Cf(-1.22284338f, 0.86829703f));
183 |     assert(abs(result.first + 0.75831358f) < 1e-6f);
184 |     assert(abs(result.second - 0.65188996f) < 1e-6f);
185 | }
186 | 


--------------------------------------------------------------------------------
/tests/conv.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     tests/conv.cpp -- tests value and mask conversion routines
 3 | 
 4 |     Enoki is a C++ template library that enables transparent vectorization
 5 |     of numerical kernels using SIMD instruction sets available on current
 6 |     processor architectures.
 7 | 
 8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
 9 | 
10 |     All rights reserved. Use of this source code is governed by a BSD-style
11 |     license that can be found in the LICENSE file.
12 | */
13 | 
14 | #if defined(__GNUG__)
15 | #  pragma GCC diagnostic ignored "-Wconversion"
16 | #endif
17 | 
18 | #include "test.h"
19 | 
20 | template <typename T, typename Value2> void convtest() {
21 |     using T2 = replace_scalar_t<T, Value2>;
22 |     auto value1 = arange<T>();
23 |     auto value2 = T2(value1);
24 |     auto value3 = T(value2);
25 |     assert(value1 == value3);
26 | }
27 | 
28 | template <typename T, typename Value2> void masktest() {
29 |     using Value = typename T::Value;
30 |     using T2 = replace_scalar_t<T, Value2>;
31 |     for (size_t i = 0; i < T::Size; ++i) {
32 |         mask_t<T> mask = eq(arange<T>() - T(Value(i)), T(0));
33 |         mask_t<T2> mask2(mask);
34 |         T2 result = select(mask2, T2(Value2(1)), T2(Value2(0)));
35 |         Value2 out[T::Size];
36 |         store_unaligned(out, result);
37 |         for (size_t j = 0; j < T::Size; ++j)
38 |             assert(out[j] == ((j == i) ? Value2(1) : Value2(0)));
39 |     }
40 | }
41 | 
42 | ENOKI_TEST_ALL(test01_conv_int32_t)  { convtest<T, int32_t>();  }
43 | ENOKI_TEST_ALL(test02_conv_uint32_t) { convtest<T, uint32_t>(); }
44 | ENOKI_TEST_ALL(test03_conv_int64_t)  { convtest<T, int64_t>();  }
45 | ENOKI_TEST_ALL(test04_conv_uint64_t) { convtest<T, uint64_t>(); }
46 | ENOKI_TEST_ALL(test05_conv_half)     { convtest<T, half>();     }
47 | ENOKI_TEST_ALL(test06_conv_float)    { convtest<T, float>();    }
48 | ENOKI_TEST_ALL(test07_conv_double)   { convtest<T, double>();   }
49 | 
50 | ENOKI_TEST_ALL(test08_mask_int32_t)  { masktest<T, int32_t>();  }
51 | ENOKI_TEST_ALL(test09_mask_uint32_t) { masktest<T, uint32_t>(); }
52 | ENOKI_TEST_ALL(test10_mask_int64_t)  { masktest<T, int64_t>();  }
53 | ENOKI_TEST_ALL(test11_mask_uint64_t) { masktest<T, uint64_t>(); }
54 | ENOKI_TEST_ALL(test12_mask_float)    { masktest<T, float>();    }
55 | ENOKI_TEST_ALL(test13_mask_double)   { masktest<T, double>();   }
56 | ENOKI_TEST_ALL(test14_mask_half)     { masktest<T, half>();     }
57 | 
58 | ENOKI_TEST_ALL(test15_bool_conv) {
59 |     for (size_t i = 0; i < T::Size; ++i) {
60 |         mask_t<T> mask = eq(arange<T>() - T(Value(i)), T(0));
61 |         bool_array_t<T> mask3(mask);
62 |         mask_t<T> mask4(mask3);
63 |         T result  = select(mask, T(Value(1)), T(Value(0)));
64 |         T result2 = select(mask4, T(Value(1)), T(Value(0)));
65 | 
66 |         assert(result == result2);
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/tests/custom.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     tests/custom.cpp -- tests operations involving custom data structures
  3 | 
  4 |     Enoki is a C++ template library that enables transparent vectorization
  5 |     of numerical kernels using SIMD instruction sets available on current
  6 |     processor architectures.
  7 | 
  8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
  9 | 
 10 |     All rights reserved. Use of this source code is governed by a BSD-style
 11 |     license that can be found in the LICENSE file.
 12 | */
 13 | 
 14 | #include "test.h"
 15 | #include <enoki/array.h>
 16 | #include <enoki/dynamic.h>
 17 | 
 18 | template <typename Value_> struct Custom {
 19 |     using Value = Value_;
 20 |     using FloatVector = Array<Value, 3>;
 21 |     using DoubleVector = float64_array_t<FloatVector>;
 22 |     using IntVector = int32_array_t<Value>;
 23 | 
 24 |     FloatVector o;
 25 |     DoubleVector d;
 26 |     IntVector i = 0;
 27 | 
 28 |     template <typename T>
 29 |     bool operator==(const Custom<T> &other) const {
 30 |         return other.o == o && other.d == d && other.i == i;
 31 |     }
 32 | 
 33 |     template <typename T>
 34 |     bool operator!=(const Custom<T> &other) const {
 35 |         return !operator==(other);
 36 |     }
 37 | 
 38 |     ENOKI_STRUCT(Custom, o, d, i)
 39 | };
 40 | 
 41 | ENOKI_STRUCT_SUPPORT(Custom, o, d, i)
 42 | 
 43 | ENOKI_TEST(test01_mask_slice_custom) {
 44 |     using FloatP = Packet<float>;
 45 |     using Vector3f = Array<float, 3>;
 46 |     using Vector3d = Array<double, 3>;
 47 |     using Custom3f = Custom<float>;
 48 |     using Custom3fP = Custom<FloatP>;
 49 | 
 50 |     Custom3fP x = zero<Custom3fP>();
 51 |     Custom3fP y;
 52 |     y.o = Vector3f(1, 2, 3);
 53 |     y.d = Vector3d(4, 5, 6);
 54 |     auto mask = arange<FloatP>() > 0.f;
 55 | 
 56 |     masked(x, mask) = y;
 57 | 
 58 |     assert((slice(x, 0) == Custom3f(Vector3f(0, 0, 0), Vector3f(0, 0, 0), 0)));
 59 |     if (FloatP::Size > 1)
 60 |         assert((slice(x, 1) == Custom3f(Vector3f(1, 2, 3), Vector3f(4, 5, 6), 0)));
 61 | }
 62 | 
 63 | ENOKI_TEST(test02_mask_slice_custom_scalar) {
 64 |     using Custom3f = Custom<float>;
 65 |     using Vector3f = Array<float, 3>;
 66 | 
 67 |     Custom3f x = zero<Custom3f>();
 68 |     Custom3f y(Vector3f(1, 2, 3), Vector3f(4, 5, 6), 0);
 69 |     Custom3f z = zero<Custom3f>();
 70 |     masked(z, true) = y;
 71 | 
 72 |     assert(y != x);
 73 |     assert(y == y);
 74 | }
 75 | 
 76 | struct Test { };
 77 | 
 78 | template <typename T> struct TrickyStruct {
 79 |     using Ptr = replace_scalar_t<T, Test *>;
 80 |     using Mask = mask_t<T>;
 81 | 
 82 |     Ptr ptr;
 83 |     Mask mask;
 84 | 
 85 |     ENOKI_STRUCT(TrickyStruct, ptr, mask);
 86 | };
 87 | 
 88 | ENOKI_STRUCT_SUPPORT(TrickyStruct, ptr, mask);
 89 | 
 90 | ENOKI_TEST(test03_tricky) {
 91 |     using FloatP = Packet<float>;
 92 |     using FloatX = DynamicArray<FloatP>;
 93 |     using Tricky = TrickyStruct<float>;
 94 |     using TrickyP = TrickyStruct<FloatP>;
 95 |     using TrickyX = TrickyStruct<FloatX>;
 96 | 
 97 |     TrickyP x;
 98 |     for (size_t i = 0; i<FloatP::Size; ++i)
 99 |         slice(x, i) = Tricky((Test *) (0xdeadbeef + i), (i & 1) != 0);
100 | 
101 |     for (size_t i = 0; i<FloatP::Size; ++i) {
102 |         assert(x.mask.coeff(i) == ((i & 1) != 0));
103 |         assert(x.ptr.coeff(i) == (Test *) (0xdeadbeef + i));
104 |     }
105 | 
106 |     TrickyX xl;
107 |     set_slices(xl, 3);
108 |     xl.ptr = (Test *) 0x1337c0d3;
109 |     auto pslice = slice_ptr(xl, 1);
110 |     static_assert(std::is_same_v<decltype(pslice.ptr), Test**>);
111 |     *pslice.ptr = (Test *) 0xdeadbeef;
112 |     assert(slice(xl.ptr, 1) == (Test *) 0xdeadbeef);
113 | }
114 | 
115 | ENOKI_TEST(test04_gather_custom_struct) {
116 |     using FloatP = Packet<float>;
117 |     using UInt32P = Packet<uint32_t>;
118 |     using FloatX = DynamicArray<FloatP>;
119 |     using UInt32X = DynamicArray<UInt32P>;
120 |     using Custom3f = Custom<float>;
121 |     using Custom3fP = Custom<FloatP>;
122 |     using Custom3fX = Custom<FloatX>;
123 | 
124 |     Custom3fX z;
125 |     z.o.x() = arange<FloatX>(20) + 1.f;
126 |     z.o.y() = arange<FloatX>(20) + 100.f;
127 |     z.o.z() = arange<FloatX>(20) + 1000.f;
128 |     z.d.x() = arange<FloatX>(20) + 2.f;
129 |     z.d.y() = arange<FloatX>(20) + 200.f;
130 |     z.d.z() = arange<FloatX>(20) + 2000.f;
131 |     z.i = arange<UInt32X>(20) + 1234u;
132 | 
133 |     Custom3fP p = gather<Custom3fP>(z, arange<UInt32P>() + 1u);
134 | 
135 |     assert(p.o.x() == arange<FloatP>() + 2.f);
136 |     assert(p.o.y() == arange<FloatP>() + 101.f);
137 |     assert(p.o.z() == arange<FloatP>() + 1001.f);
138 | 
139 |     assert(p.d.x() == arange<FloatP>() + 3.f);
140 |     assert(p.d.y() == arange<FloatP>() + 201.f);
141 |     assert(p.d.z() == arange<FloatP>() + 2001.f);
142 | 
143 |     assert(p.i == arange<UInt32P>() + 1235u);
144 | 
145 |     Custom3f s = gather<Custom3f>(z, 1u);
146 | 
147 |     assert(s.o.x() == 2.f);
148 |     assert(s.o.y() == 101.f);
149 |     assert(s.o.z() == 1001.f);
150 | 
151 |     assert(s.d.x() == 3.f);
152 |     assert(s.d.y() == 201.f);
153 |     assert(s.d.z() == 2001.f);
154 | 
155 |     assert(s.i == 1235u);
156 | }
157 | 


--------------------------------------------------------------------------------
/tests/explog.cpp:
--------------------------------------------------------------------------------
 1 | #include "test.h"
 2 | 
 3 | ENOKI_TEST_FLOAT(test01_ldexp) {
 4 |     const Value inf = std::numeric_limits<Value>::infinity();
 5 |     const Value nan = std::numeric_limits<Value>::quiet_NaN();
 6 | 
 7 |     for (int i = -10; i < 10; ++i) {
 8 |         for (int j = -10; j < 10; ++j) {
 9 |             T f = T(std::ldexp(Value(i), j));
10 |             T f2 = ldexp(T(Value(i)), T(Value(j)));
11 |             assert(f == f2);
12 |         }
13 |     }
14 | 
15 |     assert(T(ldexp(T(inf), T(Value(2)))) == T(inf));
16 |     assert(T(ldexp(T(-inf), T(Value(2)))) == T(-inf));
17 |     assert(all(enoki::isnan(ldexp(T(nan), T(Value(2))))));
18 | }
19 | 
20 | // AVX512F frexp() uses slightly different conventions
21 | // It is used by log() where this is not a problem though
22 | ENOKI_TEST_FLOAT(test02_frexp) {
23 |     const Value inf = std::numeric_limits<Value>::infinity();
24 |     const Value nan = std::numeric_limits<Value>::quiet_NaN();
25 |     using int_array_t = enoki::int_array_t<T>;
26 |     using Int = typename int_array_t::Value;
27 | 
28 |     for (int i = -10; i < 10; ++i) {
29 |         if (i == 0)
30 |             continue;
31 |         int e;
32 |         Value f = std::frexp(Value(i), &e);
33 |         T e2, f2;
34 |         std::tie(f2, e2) = frexp(T(Value(i)));
35 |         assert(T(Value(e)) == e2 + 1.f);
36 |         assert(T(f) == f2);
37 |     }
38 | 
39 |     T e, f;
40 | 
41 |     std::tie(f, e) = frexp(T(inf));
42 |     assert((std::isinf(f[0]) && !std::isinf(e[0])) ||
43 |            (std::isinf(e[0]) && !std::isinf(f[0])));
44 |     assert(!std::isnan(f[0]) && !std::isnan(e[0]));
45 |     assert(f[0] > 0);
46 | 
47 |     std::tie(f, e) = frexp(T(-inf));
48 |     assert((std::isinf(f[0]) && !std::isinf(e[0])) ||
49 |            (std::isinf(e[0]) && !std::isinf(f[0])));
50 |     assert(!std::isnan(f[0]) && !std::isnan(e[0]));
51 |     assert(f[0] < 0);
52 | 
53 |     if (!has_avx512f) {
54 |         std::tie(f, e) = frexp(T(+0.f));
55 |         assert((reinterpret_array<int_array_t>(f) == int_array_t(memcpy_cast<Int>(Value(+0.f)))));
56 | 
57 |         std::tie(f, e) = frexp(T(-0.f));
58 |         assert((reinterpret_array<int_array_t>(f) == int_array_t(memcpy_cast<Int>(Value(-0.f)))));
59 |     }
60 | 
61 |     std::tie(f, e) = frexp(T(nan));
62 |     assert(std::isnan(f[0]) || std::isnan(e[0]));
63 | }
64 | 
65 | ENOKI_TEST_FLOAT(test03_exp) {
66 |     test::probe_accuracy<T>(
67 |         [](const T &a) -> T { return exp(a); },
68 |         [](double a) { return std::exp(a); },
69 |         Value(-20), Value(30),
70 | #if defined(ENOKI_X86_AVX512ER)
71 |         27
72 | #else
73 |         3
74 | #endif
75 |     );
76 | 
77 |     Array<T, 4> x((Value) M_PI);
78 |     Array<T&, 4> y(x);
79 |     assert(exp(x) == exp(y));
80 | }
81 | 
82 | ENOKI_TEST_FLOAT(test04_log) {
83 |     test::probe_accuracy<T>(
84 |         [](const T &a) -> T { return log(a); },
85 |         [](double a) { return std::log(a); },
86 |         Value(0), Value(2e30),
87 |         2
88 |     );
89 | 
90 |     Array<T, 4> x((Value) M_PI);
91 |     Array<T&, 4> y(x);
92 |     assert(log(x) == log(y));
93 | }
94 | 
95 | ENOKI_TEST_FLOAT(test05_pow) {
96 |     assert(T(abs(pow(T(Value(M_PI)), T(Value(-2))) -
97 |                T(Value(0.101321183642338))))[0] < 1e-6f);
98 | }
99 | 


--------------------------------------------------------------------------------
/tests/histogram.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     test/histogram.cpp -- Test which uses transform_<> to build a histogram
 3 |     of a set of normally distributed pseudorandom samples
 4 | 
 5 |     Enoki is a C++ template library that enables transparent vectorization
 6 |     of numerical kernels using SIMD instruction sets available on current
 7 |     processor architectures.
 8 | 
 9 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
10 | 
11 |     All rights reserved. Use of this source code is governed by a BSD-style
12 |     license that can be found in the LICENSE file.
13 | */
14 | 
15 | #if defined(NDEBUG)
16 | #  undef NDEBUG
17 | #endif
18 | 
19 | #include <enoki/random.h>
20 | #include <enoki/special.h>
21 | 
22 | using namespace enoki;
23 | 
24 | int main(int /* argc */, char * /* argv */[]) {
25 |     using UInt32      = Packet<uint32_t>;
26 |     using UInt32Mask  = mask_t<UInt32>;
27 |     using RNG         = PCG32<UInt32>;
28 |     using Float32     = RNG::Float32;
29 |     using UInt64      = RNG::UInt64;
30 | 
31 |     /* Bin configuration */
32 |     const float min_value = -4;
33 |     const float max_value =  4;
34 |     const uint32_t bin_count = 31;
35 |     uint32_t bins[bin_count] { };
36 | 
37 |     for (size_t j = 0; j < 16 / UInt32::Size; ++j) {
38 |         RNG rng(PCG32_DEFAULT_STATE, arange<UInt64>() + (j * UInt32::Size));
39 | 
40 |         for (size_t i = 0; i < 1024 * 1024; ++i) {
41 |             /* Generate a uniform variate */
42 |             Float32 x = rng.next_float32();
43 | 
44 |             /* Importance sample a normal distribution */
45 |             Float32 y = float(M_SQRT2) * erfinv(2.f*x - 1.f);
46 | 
47 |             /* Compute bin index */
48 |             UInt32 idx((y - min_value) * float(bin_count) / (max_value - min_value));
49 | 
50 |             /* Discard samples that are out of bounds */
51 |             UInt32Mask mask = idx >= zero<UInt32>() && idx < bin_count;
52 | 
53 |             /* Increment the bin indices */
54 |             scatter_add(bins, UInt32(1), idx, mask);
55 |         }
56 |     }
57 | 
58 |     uint32_t sum = 0;
59 |     for (uint32_t i = 0; i < bin_count; ++i) {
60 |         std::cout << "bin[" << i << "] = ";
61 |         for (uint32_t j = 0; j < bins[i] / 50000; ++j)
62 |             std::cout << "*";
63 |         std::cout << " " << bins[i] << std::endl;
64 |         sum += bins[i];
65 |     }
66 | 
67 | #if defined(__aarch64__)
68 |     assert(std::abs(int(16 * 1024 * 1024 - sum) - 743) <= 200);
69 | #else
70 |     assert(std::abs(int(16 * 1024 * 1024 - sum) - 743) <= 3);
71 | #endif
72 |     assert(bins[1] == 2558);
73 |     assert(bins[2] == 6380);
74 | 
75 |     return 0;
76 | }
77 | 


--------------------------------------------------------------------------------
/tests/hyperbolic.cpp:
--------------------------------------------------------------------------------
  1 | #include "test.h"
  2 | 
  3 | ENOKI_TEST_FLOAT(test01_sinh) {
  4 |     test::probe_accuracy<T>(
  5 |         [](const T &a) -> T { return sinh(a); },
  6 |         [](double a) { return std::sinh(a); },
  7 |         Value(-10), Value(10),
  8 |         8
  9 |     );
 10 | 
 11 |     Array<T, 4> x((Value) 1);
 12 |     Array<T&, 4> y(x);
 13 |     assert(sinh(x) == sinh(y));
 14 | }
 15 | 
 16 | ENOKI_TEST_FLOAT(test02_cosh) {
 17 |     test::probe_accuracy<T>(
 18 |         [](const T &a) -> T { return cosh(a); },
 19 |         [](double a) { return std::cosh(a); },
 20 |         Value(-10), Value(10),
 21 |         8
 22 |     );
 23 | 
 24 |     Array<T, 4> x((Value) 1);
 25 |     Array<T&, 4> y(x);
 26 |     assert(cosh(x) == cosh(y));
 27 | }
 28 | 
 29 | ENOKI_TEST_FLOAT(test03_sincosh_sin) {
 30 |     test::probe_accuracy<T>(
 31 |         [](const T &a) -> T { return sincosh(a).first; },
 32 |         [](double a) { return std::sinh(a); },
 33 |         Value(-10), Value(10),
 34 |         8
 35 |     );
 36 | 
 37 |     Array<T, 4> x((Value) 1), s, c;
 38 |     Array<T&, 4> y(x);
 39 |     auto result = sincosh(y);
 40 | #if !defined(_WIN32)
 41 |     assert(result.first == sinh(y) && result.second == cosh(y));
 42 | #else
 43 |     assert(all_nested(abs(result.first - sinh(y)) < T(1e-6f)) &&
 44 |            all_nested(abs(result.second - cosh(y)) < T(1e-6f)));
 45 | #endif
 46 | }
 47 | 
 48 | ENOKI_TEST_FLOAT(test04_sincosh_cos) {
 49 |     test::probe_accuracy<T>(
 50 |         [](const T &a) -> T { return sincosh(a).second; },
 51 |         [](double a) { return std::cosh(a); },
 52 |         Value(-10), Value(10),
 53 |         8
 54 |     );
 55 | }
 56 | 
 57 | ENOKI_TEST_FLOAT(test05_tanh) {
 58 |     test::probe_accuracy<T>(
 59 |         [](const T &a) -> T { return tanh(a); },
 60 |         [](double a) { return std::tanh(a); },
 61 |         Value(-10), Value(10),
 62 |         7
 63 |     );
 64 | 
 65 |     Array<T, 4> x((Value) 1);
 66 |     Array<T&, 4> y(x);
 67 |     assert(tanh(x) == tanh(y));
 68 | }
 69 | 
 70 | ENOKI_TEST_FLOAT(test06_csch) {
 71 |     test::probe_accuracy<T>(
 72 |         [](const T &a) -> T { return csch(a); },
 73 |         [](double a) { return 1/std::sinh(a); },
 74 |         Value(-10), Value(10),
 75 |         8
 76 |     );
 77 | }
 78 | 
 79 | ENOKI_TEST_FLOAT(test07_sech) {
 80 |     test::probe_accuracy<T>(
 81 |         [](const T &a) -> T { return sech(a); },
 82 |         [](double a) { return 1/std::cosh(a); },
 83 |         Value(-10), Value(10),
 84 |         9
 85 |     );
 86 | }
 87 | 
 88 | ENOKI_TEST_FLOAT(test08_coth) {
 89 |     test::probe_accuracy<T>(
 90 |         [](const T &a) -> T { return coth(a); },
 91 |         [](double a) { return 1/std::tanh(a); },
 92 |         Value(-10), Value(10),
 93 |         8
 94 |     );
 95 | }
 96 | 
 97 | ENOKI_TEST_FLOAT(test09_asinh) {
 98 |     test::probe_accuracy<T>(
 99 |         [](const T &a) -> T { return asinh(a); },
100 |         [](double a) { return std::asinh(a); },
101 |         Value(-30), Value(30),
102 |         3
103 |     );
104 |     Array<T, 4> x((Value) 2);
105 |     Array<T&, 4> y(x);
106 |     assert(asinh(x) == asinh(y));
107 | }
108 | 
109 | ENOKI_TEST_FLOAT(test11_acosh) {
110 |     if (Size == 2 && has_avx512er)
111 |         return; /// Skip for KNL, Clang 7 generates an unsupported SKX+ instruction :(
112 |     test::probe_accuracy<T>(
113 |         [](const T &a) -> T { return acosh(a); },
114 |         [](double a) { return std::acosh(a); },
115 |         Value(1), Value(10),
116 |         5
117 |     );
118 |     Array<T, 4> x((Value) 2);
119 |     Array<T&, 4> y(x);
120 |     assert(acosh(x) == acosh(y));
121 | }
122 | 
123 | ENOKI_TEST_FLOAT(test12_atanh) {
124 |     test::probe_accuracy<T>(
125 |         [](const T &a) -> T { return atanh(a); },
126 |         [](double a) { return std::atanh(a); },
127 |         Value(-1 + 0.001), Value(1 - 0.001),
128 |         3
129 |     );
130 |     Array<T, 4> x((Value) 0.5);
131 |     Array<T&, 4> y(x);
132 |     assert(atanh(x) == atanh(y));
133 | }
134 | 
135 | 


--------------------------------------------------------------------------------
/tests/idiv.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     tests/idiv.cpp -- tests integer division by constants
  3 | 
  4 |     Enoki is a C++ template library that enables transparent vectorization
  5 |     of numerical kernels using SIMD instruction sets available on current
  6 |     processor architectures.
  7 | 
  8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
  9 | 
 10 |     All rights reserved. Use of this source code is governed by a BSD-style
 11 |     license that can be found in the LICENSE file.
 12 | */
 13 | 
 14 | #include "test.h"
 15 | #include <random>
 16 | 
 17 | #define ITERATIONS 1000000
 18 | 
 19 | ENOKI_TEST(test01_idiv_u64) {
 20 |     std::mt19937_64 mt;
 21 | 
 22 |     for (uint64_t i = 2; i < ITERATIONS; ++i) {
 23 |         uint64_t x = (uint64_t) mt(), y = (uint64_t) mt(), z = y / x;
 24 | 
 25 |         divisor<uint64_t> precomp(x);
 26 |         uint64_t q = precomp(y);
 27 |         if (q != z)
 28 |             std::cout << y << " / " << x << " = " << q << " vs " << z
 29 |                       << std::endl;
 30 |         assert(q == z);
 31 | 
 32 |         divisor<uint64_t> precomp2(i);
 33 |         q = precomp2(y);
 34 |         z = y / i;
 35 |         if (q != z)
 36 |             std::cout << y << " / " << i << " = " << q << " vs " << z
 37 |                       << std::endl;
 38 |         assert(q == z);
 39 |     }
 40 | }
 41 | 
 42 | ENOKI_TEST(test02_idiv_u32) {
 43 |     std::mt19937 mt;
 44 | 
 45 |     for (uint32_t i = 2; i < ITERATIONS; ++i) {
 46 |         uint32_t x = (uint32_t) mt(), y = (uint32_t) mt(), z = y / x;
 47 | 
 48 |         divisor<uint32_t> precomp(x);
 49 |         uint32_t q = precomp(y);
 50 |         if (q != z)
 51 |             std::cout << y << " / " << x << " = " << q << " vs " << z
 52 |                       << std::endl;
 53 |         assert(q == z);
 54 | 
 55 |         divisor<uint32_t> precomp2(i);
 56 |         q = precomp2(y);
 57 |         z = y / i;
 58 |         if (q != z)
 59 |             std::cout << y << " / " << i << " = " << q << " vs " << z
 60 |                       << std::endl;
 61 |         assert(q == z);
 62 |     }
 63 | }
 64 | 
 65 | ENOKI_TEST(test03_idiv_s64) {
 66 |     std::mt19937_64 mt;
 67 | 
 68 |     for (uint64_t i = 2; i < ITERATIONS; ++i) {
 69 |         int64_t x = (int64_t) mt(), y = (int64_t) mt(), z = y / x;
 70 | 
 71 |         divisor<int64_t> precomp(x);
 72 |         int64_t q = precomp(y);
 73 |         if (q != z)
 74 |             std::cout << y << " / " << x << " = " << q << " vs " << z
 75 |                       << std::endl;
 76 |         assert(q == z);
 77 | 
 78 |         divisor<int64_t> precomp2((int64_t) i);
 79 |         q = precomp2(y);
 80 |         z = y / (int64_t) i;
 81 |         if (q != z)
 82 |             std::cout << y << " / " << i << " = " << q << " vs " << z
 83 |                       << std::endl;
 84 |         assert(q == z);
 85 | 
 86 |         divisor<int64_t> precomp3(-(int64_t) i);
 87 |         q = precomp3(y);
 88 |         z = y / -(int64_t) i;
 89 |         if (q != z)
 90 |             std::cout << y << " / " << i << " = " << q << " vs " << z
 91 |                       << std::endl;
 92 |         assert(q == z);
 93 |     }
 94 | }
 95 | 
 96 | ENOKI_TEST(test03_idiv_s32) {
 97 |     std::mt19937 mt;
 98 | 
 99 |     for (uint32_t i = 2; i < ITERATIONS; ++i) {
100 |         int32_t x = (int32_t) mt(), y = (int32_t) mt(), z = y / x;
101 | 
102 |         divisor<int32_t> precomp(x);
103 |         int32_t q = precomp(y);
104 |         if (q != z)
105 |             std::cout << y << " / " << x << " = " << q << " vs " << z
106 |                       << std::endl;
107 |         assert(q == z);
108 | 
109 |         divisor<int32_t> precomp2((int32_t) i);
110 |         q = precomp2(y);
111 |         z = y / (int32_t) i;
112 |         if (q != z)
113 |             std::cout << y << " / " << i << " = " << q << " vs " << z
114 |                       << std::endl;
115 |         assert(q == z);
116 | 
117 |         divisor<int32_t> precomp3(-(int32_t) i);
118 |         q = precomp3(y);
119 |         z = y / -(int32_t) i;
120 |         if (q != z)
121 |             std::cout << y << " / " << i << " = " << q << " vs " << z
122 |                       << std::endl;
123 |         assert(q == z);
124 |     }
125 | }
126 | 
127 | ENOKI_TEST_INT(test04_idiv_vector) {
128 |     std::mt19937_64 mt;
129 |     for (Value i = 2; i < 1000; ++i) {
130 |         Value x = (Value) mt(), y = (Value) mt();
131 |         assert((T(y) / x)[0] == y / x);
132 |         assert((T(y) / i)[0] == y / i);
133 |         assert((T(y) % x)[0] == y % x);
134 |         assert((T(y) % i)[0] == y % i);
135 |     }
136 | }
137 | 


--------------------------------------------------------------------------------
/tests/morton.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     tests/morton.cpp -- tests Morton/Z-order curve encoding and decoding
  3 | 
  4 |     Enoki is a C++ template library that enables transparent vectorization
  5 |     of numerical kernels using SIMD instruction sets available on current
  6 |     processor architectures.
  7 | 
  8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
  9 | 
 10 |     All rights reserved. Use of this source code is governed by a BSD-style
 11 |     license that can be found in the LICENSE file.
 12 | */
 13 | 
 14 | #include "test.h"
 15 | #include <enoki/morton.h>
 16 | 
 17 | ENOKI_TEST(test01_morton_u32_2d_scalar) {
 18 |     using T = uint32_t;
 19 |     using T2 = enoki::Array<T, 2>;
 20 | 
 21 |     T2 value = T2(123u, 456u);
 22 |     T value2 = morton_encode(value);
 23 |     T2 value3 = morton_decode<T2>(value2);
 24 | 
 25 |     assert(value == value3);
 26 | }
 27 | 
 28 | ENOKI_TEST(test02_morton_u32_2d_array) {
 29 |     using T = enoki::Array<uint32_t>;
 30 |     using T2 = enoki::Array<T, 2>;
 31 | 
 32 |     T2 value = T2(123u, 456u);
 33 |     T value2 = morton_encode(value);
 34 |     T2 value3 = morton_decode<T2>(value2);
 35 | 
 36 |     assert(value == value3);
 37 | }
 38 | 
 39 | ENOKI_TEST(test03_morton_u32_3d_scalar) {
 40 |     using T = uint32_t;
 41 |     using T2 = enoki::Array<T, 3>;
 42 | 
 43 |     T2 value = T2(123u, 456u, 789u);
 44 |     T value2 = morton_encode(value);
 45 |     T2 value3 = morton_decode<T2>(value2);
 46 | 
 47 |     assert(value == value3);
 48 | }
 49 | 
 50 | ENOKI_TEST(test04_morton_u32_3d_array) {
 51 |     using T = enoki::Array<uint32_t>;
 52 |     using T2 = enoki::Array<T, 3>;
 53 | 
 54 |     T2 value = T2(123u, 456u, 789u);
 55 |     T value2 = morton_encode(value);
 56 |     T2 value3 = morton_decode<T2>(value2);
 57 | 
 58 |     assert(value == value3);
 59 | }
 60 | 
 61 | ENOKI_TEST(test05_morton_u64_2d_scalar) {
 62 |     using T = uint64_t;
 63 |     using T2 = enoki::Array<T, 2>;
 64 | 
 65 |     T2 value = T2(123u, 456u);
 66 |     T value2 = morton_encode(value);
 67 |     T2 value3 = morton_decode<T2>(value2);
 68 | 
 69 |     assert(value == value3);
 70 | }
 71 | 
 72 | ENOKI_TEST(test06_morton_u64_2d_array) {
 73 |     using T = enoki::Array<uint64_t>;
 74 |     using T2 = enoki::Array<T, 2>;
 75 | 
 76 |     T2 value = T2(123u, 456u);
 77 |     T value2 = morton_encode(value);
 78 |     T2 value3 = morton_decode<T2>(value2);
 79 | 
 80 |     assert(value == value3);
 81 | }
 82 | 
 83 | ENOKI_TEST(test07_morton_u64_3d_scalar) {
 84 |     using T = uint64_t;
 85 |     using T2 = enoki::Array<T, 3>;
 86 | 
 87 |     T2 value = T2(123u, 456u, 789u);
 88 |     T value2 = morton_encode(value);
 89 |     T2 value3 = morton_decode<T2>(value2);
 90 | 
 91 |     assert(value == value3);
 92 | }
 93 | 
 94 | ENOKI_TEST(test08_morton_u64_3d_array) {
 95 |     using T = enoki::Array<uint64_t>;
 96 |     using T2 = enoki::Array<T, 3>;
 97 | 
 98 |     T2 value = T2(123u, 456u, 789u);
 99 |     T value2 = morton_encode(value);
100 |     T2 value3 = morton_decode<T2>(value2);
101 | 
102 |     assert(value == value3);
103 | }
104 | 


--------------------------------------------------------------------------------
/tests/nested.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     tests/nested.cpp -- tests nested arrays and other fancy scalar types
  3 | 
  4 |     Enoki is a C++ template library that enables transparent vectorization
  5 |     of numerical kernels using SIMD instruction sets available on current
  6 |     processor architectures.
  7 | 
  8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
  9 | 
 10 |     All rights reserved. Use of this source code is governed by a BSD-style
 11 |     license that can be found in the LICENSE file.
 12 | */
 13 | 
 14 | #include "test.h"
 15 | 
 16 | ENOKI_TEST(test01_string) { /* Arrays can be instantiated for all sorts of types */
 17 |     Array<std::string, 2> v1("Hello ", " How are ");
 18 |     Array<std::string, 2> v2("world!", "you?");
 19 | 
 20 |     assert(v1.x() == "Hello ");
 21 |     assert(to_string(v1) == "[Hello ,  How are ]");
 22 |     assert(to_string(v1 + v2) == "[Hello world!,  How are you?]");
 23 |     assert(hsum(v1 + v2) == "Hello world! How are you?");
 24 |     assert(hsum(v1 + std::string("you!")) == "Hello you! How are you!");
 25 | }
 26 | 
 27 | ENOKI_TEST(test02_float_array) {
 28 |     /* Value initialization */
 29 |     Array<float, 4> a(1.f);
 30 |     assert(to_string(a) == "[1, 1, 1, 1]");
 31 | 
 32 |     /* Value initialization */
 33 |     Array<float, 4> b(1.f, 2.f, 3.f, 4.f);
 34 |     assert(to_string(b) == "[1, 2, 3, 4]");
 35 |     assert(b.x() == 1.f && b.y() == 2.f && b.z() == 3.f && b.w() == 4.f);
 36 | 
 37 |     /* Copy initialization */
 38 |     Array<float, 4> c(b);
 39 |     assert(to_string(c) == "[1, 2, 3, 4]");
 40 | 
 41 |     /* Operations involving scalars (left) */
 42 |     assert(to_string(c + 1.f) == "[2, 3, 4, 5]");
 43 | 
 44 |     /* Operations involving scalars (right) */
 45 |     assert(to_string(1.f + c) == "[2, 3, 4, 5]");
 46 | 
 47 |     /* Binary operations */
 48 |     assert(to_string(c + c) == "[2, 4, 6, 8]");
 49 | }
 50 | 
 51 | ENOKI_TEST(test03_floatref_array) {
 52 |     float tmp1 = 1.f;
 53 |     Array<float, 4> tmp2(1.f, 2.f, 3.f, 4.f);
 54 | 
 55 |     /* Value initialization */
 56 |     Array<float&, 4> a(tmp1, tmp1, tmp1, tmp1);
 57 |     assert(to_string(a) == "[1, 1, 1, 1]");
 58 |     a.x() = 2.f;
 59 |     assert(to_string(a) == "[2, 2, 2, 2]");
 60 | 
 61 |     /* Reference an existing array */
 62 |     Array<float&, 4> b(tmp2);
 63 |     assert(to_string(b) == "[1, 2, 3, 4]");
 64 |     assert(to_string(a + b) == "[3, 4, 5, 6]");
 65 | 
 66 |     /* .. and reference it once more */
 67 |     Array<float&, 4> c(b);
 68 | 
 69 |     /* Convert back into a regular array */
 70 |     Array<float, 4> d(c);
 71 |     assert(to_string(d) == "[1, 2, 3, 4]");
 72 | 
 73 |     /* Operations involving scalars (left) */
 74 |     assert(to_string(c + 1.f) == "[2, 3, 4, 5]");
 75 | 
 76 |     /* Operations involving scalars (right) */
 77 |     assert(to_string(1.f + c) == "[2, 3, 4, 5]");
 78 | 
 79 |     /* Binary operations */
 80 |     assert(to_string(c + c) == "[2, 4, 6, 8]");
 81 |     assert(to_string(d + c) == "[2, 4, 6, 8]");
 82 |     assert(to_string(c + d) == "[2, 4, 6, 8]");
 83 | 
 84 |     c += c; c += d; c += 1.f;
 85 | 
 86 |     assert(to_string(c) == "[4, 7, 10, 13]");
 87 | }
 88 | 
 89 | ENOKI_TEST(test04_array_of_arrays) {
 90 |     using Vector4f = Array<float, 4>;
 91 |     using Vector4fP = Array<Vector4f, 2>;
 92 | 
 93 |     Vector4f a(1, 2, 3, 4);
 94 |     Vector4f b(1, 1, 1, 1);
 95 |     Vector4fP c(a, b);
 96 | 
 97 |     assert(to_string(c)         == "[[1, 1],\n [2, 1],\n [3, 1],\n [4, 1]]");
 98 |     assert(to_string(c + c)     == "[[2, 2],\n [4, 2],\n [6, 2],\n [8, 2]]");
 99 |     assert(to_string(c + c.x()) == "[[2, 2],\n [4, 3],\n [6, 4],\n [8, 5]]");
100 |     assert(to_string(c + 1.f)   == "[[2, 2],\n [3, 2],\n [4, 2],\n [5, 2]]");
101 |     assert(to_string(1.f + c)   == "[[2, 2],\n [3, 2],\n [4, 2],\n [5, 2]]");
102 | 
103 |     assert((std::is_same<value_t<Vector4fP>, Vector4f>::value));
104 |     assert((std::is_same<scalar_t<Vector4fP>, float>::value));
105 | }
106 | 
107 | ENOKI_TEST(test05_mask_types) {
108 |     assert((std::is_same<mask_t<bool>, bool>::value));
109 |     assert((std::is_same<value_t<float>, float>::value));
110 |     assert((std::is_same<value_t<Array<float, 1>>, float>::value));
111 | }
112 | 
113 | ENOKI_TEST(test06_nested_reductions) {
114 |     using FloatP = Array<float, 16>;
115 |     using IntP = Array<int, 16>;
116 |     using Vector3fP = Array<FloatP, 3>;
117 | 
118 |     auto my_all = [](Vector3fP x) { return all(x > 4.f); };
119 |     auto my_none = [](Vector3fP x) { return none(x > 4.f); };
120 |     auto my_any = [](Vector3fP x) { return any(x > 4.f); };
121 |     auto my_count = [](Vector3fP x) { return count(x > 4.f); };
122 | 
123 |     auto my_all_nested = [](Vector3fP x) { return all_nested(x > 4.f); };
124 |     auto my_none_nested = [](Vector3fP x) { return none_nested(x > 4.f); };
125 |     auto my_any_nested = [](Vector3fP x) { return any_nested(x > 4.f); };
126 |     auto my_count_nested = [](Vector3fP x) { return count_nested(x > 4.f); };
127 | 
128 |     auto data =
129 |         Vector3fP(arange<FloatP>() + 0.f, arange<FloatP>() + 1.f,
130 |                   arange<FloatP>() + 2.f);
131 | 
132 |     auto to_string = [](auto value) {
133 |         std::ostringstream oss;
134 |         detail::print(oss, value, false, shape(value));
135 |         return oss.str();
136 |     };
137 | 
138 |     auto str = [&](auto x) {
139 |         return to_string(select(reinterpret_array<mask_t<IntP>>(x), IntP(1), IntP(0)));
140 |     };
141 | 
142 |     assert(str(my_all(data)) == "[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]");
143 |     assert(str(my_none(data)) == "[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]");
144 |     assert(str(my_any(data)) == "[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]");
145 |     assert(to_string(my_count(data)) == "[0, 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]");
146 |     assert(!my_all_nested(data));
147 |     assert(!my_none_nested(data));
148 |     assert(my_any_nested(data));
149 |     assert(my_count_nested(data) == 36);
150 | }
151 | 


--------------------------------------------------------------------------------
/tests/python/test.cpp:
--------------------------------------------------------------------------------
 1 | #include <enoki/python.h>
 2 | #include <enoki/complex.h>
 3 | #include <enoki/dynamic.h>
 4 | 
 5 | using namespace enoki;
 6 | 
 7 | Array<float, 4> a1() {
 8 |     auto result = Array<float, 4>(1, 2, 3, 4);
 9 |     std::cout << result << std::endl;
10 |     return result;
11 | }
12 | 
13 | Array<Packet<float>, 4> a2() {
14 |     auto result = Array<Packet<float>, 4>(1, 2, 3, 4);
15 |     std::cout << result << std::endl;
16 |     return result;
17 | }
18 | 
19 | void a3(Array<float, 4> value) {
20 |     std::cout << value << std::endl;
21 | }
22 | 
23 | void a4(Array<Packet<float>, 4> value) {
24 |     std::cout << value << std::endl;
25 | }
26 | 
27 | Complex<float> c1() {
28 |     auto result = Complex<float>(1, 2);
29 |     std::cout << result << std::endl;
30 |     return result;
31 | }
32 | 
33 | Complex<Packet<float>> c2() {
34 |     auto result = Complex<Packet<float>>(1, 2);
35 |     std::cout << result << std::endl;
36 |     return result;
37 | }
38 | 
39 | Complex<Array<Packet<float>, 4>> c2_b() {
40 |     auto result = Complex<Array<Packet<float>, 4>>(1.f, 2.f);
41 |     std::cout << result << std::endl;
42 |     return result;
43 | }
44 | 
45 | 
46 | void c3(Complex<float> value) {
47 |     std::cout << value << std::endl;
48 | }
49 | 
50 | void c4(Complex<Packet<float>> value) {
51 |     std::cout << value << std::endl;
52 | }
53 | 
54 | template <typename Float> Float atan(Float x) {
55 |     return enoki::atan(x);
56 | }
57 | 
58 | PYBIND11_MODULE(test, m) {
59 |     /* Real */
60 |     m.def("a1", &a1);
61 |     m.def("a2", &a2);
62 |     m.def("a3", &a3);
63 |     m.def("a4", &a4);
64 | 
65 |     /* Complex */
66 |     m.def("c1", &c1);
67 |     m.def("c2", &c2);
68 |     m.def("c2_b", &c2_b);
69 |     m.def("c3", &c3);
70 |     m.def("c4", &c4);
71 | 
72 |     using FloatP = Packet<float>;
73 |     m.def("atan", enoki::vectorize_wrapper(&atan<FloatP>));
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/tests/python/test_pytorch.py:
--------------------------------------------------------------------------------
 1 | import enoki as ek
 2 | import numpy as np
 3 | import pytest
 4 | import torch
 5 | 
 6 | class EnokiAtan2(torch.autograd.Function):
 7 |     """PyTorch function example from the documentation."""
 8 |     @staticmethod
 9 |     def forward(ctx, arg1, arg2):
10 |         ctx.in1 = ek.FloatD(arg1)
11 |         ctx.in2 = ek.FloatD(arg2)
12 |         ek.set_requires_gradient(ctx.in1, arg1.requires_grad)
13 |         ek.set_requires_gradient(ctx.in2, arg2.requires_grad)
14 |         ctx.out = ek.atan2(ctx.in1, ctx.in2)
15 |         out_torch = ctx.out.torch()
16 |         ek.cuda_malloc_trim()
17 |         return out_torch
18 | 
19 |     @staticmethod
20 |     def backward(ctx, grad_out):
21 |         ek.set_gradient(ctx.out, ek.FloatC(grad_out))
22 |         ek.FloatD.backward()
23 |         result = (ek.gradient(ctx.in1).torch()
24 |                   if ek.requires_gradient(ctx.in1) else None,
25 |                   ek.gradient(ctx.in2).torch()
26 |                   if ek.requires_gradient(ctx.in2) else None)
27 |         del ctx.out, ctx.in1, ctx.in2
28 |         ek.cuda_malloc_trim()
29 |         return result
30 | 
31 | 
32 | def test01_set_gradient():
33 |     a = ek.FloatD(42, 10)
34 |     ek.set_requires_gradient(a)
35 | 
36 |     with pytest.raises(TypeError):
37 |         grad = ek.FloatD(-1, 10)
38 |         ek.set_gradient(a, grad)
39 | 
40 |     grad = ek.FloatC(-1, 10)
41 |     ek.set_gradient(a, grad)
42 |     assert np.allclose(grad.numpy(), ek.gradient(a).numpy())
43 | 
44 |     # Note: if `backward` is not called here, test03 segfaults later.
45 |     # TODO: we should not need this, there's most likely some missing cleanup when `a` is destructed
46 |     ek.FloatD.backward()
47 |     del a, grad
48 | 
49 | 
50 | def test02_array_to_torch():
51 |     a = ek.FloatD(42, 10)
52 |     a_torch = a.torch()
53 |     assert isinstance(a_torch, torch.Tensor)
54 |     a_torch += 8
55 |     a_np = a_torch.cpu().numpy()
56 |     assert isinstance(a_np, np.ndarray)
57 |     assert np.allclose(a_np, 50)
58 | 
59 | 
60 | def test03_pytorch_function():
61 |     enoki_atan2 = EnokiAtan2.apply
62 | 
63 |     y = torch.tensor(1.0, device='cuda')
64 |     x = torch.tensor(2.0, device='cuda')
65 |     y.requires_grad_()
66 |     x.requires_grad_()
67 | 
68 |     o = enoki_atan2(y, x)
69 |     o.backward()
70 |     assert np.allclose(y.grad.cpu(), 0.4)
71 |     assert np.allclose(x.grad.cpu(), -0.2)
72 | 


--------------------------------------------------------------------------------
/tests/ray.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     tests/ray.h -- showcases how to extend Enoki vectorization to custom
 3 |     data types
 4 | 
 5 |     Enoki is a C++ template library that enables transparent vectorization
 6 |     of numerical kernels using SIMD instruction sets available on current
 7 |     processor architectures.
 8 | 
 9 |     Copyright (c) 2018 Wenzel Jakob <wenzel.jakob@epfl.ch>
10 | 
11 |     All rights reserved. Use of this source code is governed by a BSD-style
12 |     license that can be found in the LICENSE file.
13 | */
14 | 
15 | #include <enoki/array.h>
16 | 
17 | using namespace enoki;
18 | 
19 | /**
20 |  * Generic 3D ray class: can represent either a single ray, a static ray
21 |  * bundle, or a dynamic heap-allocated bundle of rays
22 |  */
23 | template <typename Vector_> struct Ray {
24 |     using Vector = Vector_;
25 |     using Value = value_t<Vector>;
26 | 
27 |     Vector o;
28 |     Vector d;
29 | 
30 |     /// Compute a position along a ray
31 |     Vector operator()(const Value &t) const { return o + t*d; }
32 | 
33 |     ENOKI_STRUCT(Ray, o, d)
34 | };
35 | 
36 | ENOKI_STRUCT_SUPPORT(Ray, o, d)
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/sh.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     tests/sh.cpp -- tests spherical harmonics evaluation code
 3 | 
 4 |     Enoki is a C++ template library that enables transparent vectorization
 5 |     of numerical kernels using SIMD instruction sets available on current
 6 |     processor architectures.
 7 | 
 8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
 9 | 
10 |     All rights reserved. Use of this source code is governed by a BSD-style
11 |     license that can be found in the LICENSE file.
12 | */
13 | 
14 | #include "test.h"
15 | #include <enoki/sh.h>
16 | 
17 | ENOKI_TEST(test00_sh) {
18 |     using T = Array<float>;
19 | 
20 |     /* From mathematica */
21 |     const float ref[100] = {
22 |         0.2820947918f,   -0.2611690283f,    0.3917535424f,  -0.1305845141f,
23 |         0.1560783472f,   -0.4682350417f,    0.2928635963f,  -0.2341175208f,
24 |        -0.1170587604f,    0.02252796895f,   0.3310921732f,  -0.5409527810f,
25 |         0.06411572364f,  -0.2704763905f,   -0.2483191299f,   0.1239038292f,
26 |        -0.07663294720f,   0.05418767663f,   0.4730873479f,  -0.4301013494f,
27 |        -0.1926808176f,   -0.2150506747f,   -0.3548155109f,   0.2980322214f,
28 |        -0.02235127627f,   0.03401106316f,  -0.2037835426f,   0.08939333855f,
29 |         0.5098360937f,   -0.1642890433f,   -0.3717265730f,  -0.08214452164f,
30 |        -0.3823770702f,    0.4916633621f,   -0.05943686658f, -0.03669614710f,
31 |         0.01095484717f,   0.09832164158f,  -0.3751138475f,   0.1148149412f,
32 |         0.4035308753f,    0.1617920839f,   -0.4010373398f,   0.08089604195f,
33 |        -0.3026481565f,    0.6314821768f,   -0.1094082055f,  -0.1060838764f,
34 |         0.02912993451f,  -0.01914767451f,   0.03401803232f,  0.1978196393f,
35 |        -0.5458487747f,    0.1174915463f,    0.1714596820f,   0.4252852218f,
36 |        -0.2693765472f,    0.2126426109f,   -0.1285947615f,   0.6462035049f,
37 |        -0.1592058926f,   -0.2134369792f,    0.09045704049f, -0.001997419283f,
38 |         0.006375451838f, -0.06329912897f,   0.07377498039f,  0.3173673505f,
39 |        -0.6530654560f,    0.09087000611f,  -0.1162228838f,   0.5245254059f,
40 |        -0.02984068370f,   0.2622627030f,    0.08716716288f,  0.4997850336f,
41 |        -0.1904774247f,   -0.3424226677f,    0.1961743797f,  -0.006603146547f,
42 |        -0.009999592615f,  0.003740870431f,  0.02228152988f, -0.1464888644f,
43 |         0.1282747834f,    0.4253232566f,   -0.6393164269f,   0.03874458847f,
44 |        -0.3611982142f,    0.4202921001f,    0.2217097319f,   0.2101460500f,
45 |         0.2708986607f,    0.2130952366f,   -0.1864672912f,  -0.4589014084f,
46 |         0.3410943104f,   -0.01528121247f,  -0.03494751858f,  0.006246941013f
47 |     };
48 | 
49 |     using Vector3f = Array<T, 3>;
50 |     using Scalar = scalar_t<T>;
51 | 
52 |     Vector3f d = normalize(Vector3f(Scalar(1), Scalar(2), Scalar(3)));
53 | 
54 |     T out[100];
55 |     for (size_t i = 0; i < 10; ++i) {
56 |         sh_eval(d, i, out);
57 |         for (size_t j = 0; j<(i+1)*(i+1); ++j) {
58 |             assert(std::abs(out[j].coeff(0) - ref[j]) < 5e-6f);
59 |         }
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/tests/sphere.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     tests/sphere.cpp -- a simple sphere ray tracer
  3 | 
  4 |     Enoki is a C++ template library that enables transparent vectorization
  5 |     of numerical kernels using SIMD instruction sets available on current
  6 |     processor architectures.
  7 | 
  8 |     Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
  9 | 
 10 |     All rights reserved. Use of this source code is governed by a BSD-style
 11 |     license that can be found in the LICENSE file.
 12 | */
 13 | 
 14 | #if defined(_MSC_VER)
 15 | #  pragma warning(disable: 4723) /// potential divide by 0
 16 | #endif
 17 | 
 18 | #include <enoki/dynamic.h>
 19 | #include <fstream>
 20 | #include "ray.h"
 21 | #include <chrono>
 22 | 
 23 | // -----------------------------------------------------------------------
 24 | //! @{ \name Convenient type aliases
 25 | // -----------------------------------------------------------------------
 26 | 
 27 | /* Floats and packets of floats */
 28 | using FloatP    = Array<float>;
 29 | using FloatX    = DynamicArray<FloatP>;
 30 | 
 31 | /* 2D vectors and static/dynamic packets of 2D vectors */
 32 | using Vector2f  = Array<float, 2>;
 33 | using Vector2fP = Array<FloatP, 2>;
 34 | using Vector2fX = Array<FloatX, 2>;
 35 | 
 36 | /* 3D vectors and static/dynamic packets of 3D vectors */
 37 | using Vector3f  = Array<float, 3>;
 38 | using Vector3fP = Array<FloatP, 3>;
 39 | using Vector3fX = Array<FloatX, 3>;
 40 | 
 41 | /* rays and static/dynamic packets of rays */
 42 | using Ray3f     = Ray<Vector3f>;
 43 | using Ray3fP    = Ray<Vector3fP>;
 44 | using Ray3fX    = Ray<Vector3fX>;
 45 | 
 46 | /* Aliases to create types that are compatible with other type */
 47 | template <typename T> using vector3f_t = Array<value_t<T>, 3>;
 48 | template <typename T> using ray3f_t    = Ray<vector3f_t<T>>;
 49 | 
 50 | //! @}
 51 | // -----------------------------------------------------------------------
 52 | 
 53 | // -----------------------------------------------------------------------
 54 | //! @{ \name Computational kernels of the ray tracer
 55 | // -----------------------------------------------------------------------
 56 | 
 57 | /// "Sensor": trace rays for a given X and Y coordinate
 58 | template <typename Vector2f> ray3f_t<Vector2f> make_rays(const Vector2f &p) {
 59 |     using Vector3f = vector3f_t<Vector2f>;
 60 |     using Ray3f = ray3f_t<Vector2f>;
 61 | 
 62 |     return Ray3f(Vector3f(p.x(), p.y(), -1.f),
 63 |                  Vector3f(0.f, 0.f, 1.f));
 64 | }
 65 | 
 66 | /// "Shape": intersect against sphere
 67 | template <typename Ray> ENOKI_INLINE typename Ray::Vector intersect_rays(const Ray &r) {
 68 |     /* Coefficients of quadratic */
 69 |     auto a = dot(r.d, r.d);
 70 |     auto b = 2.f * dot(r.o, r.d);
 71 |     auto c = dot(r.o, r.o) - 1.f;
 72 | 
 73 |     /* Solve quadratic equation */
 74 |     auto discrim = b*b - 4.f*a*c;
 75 |     auto t = (-b + sqrt(discrim)) / (2.f * a);
 76 | 
 77 |     return select(discrim >= 0.f, r(t), 0.f);
 78 | }
 79 | 
 80 | /// "Shader": directional illumination
 81 | template <typename Vector3f> ENOKI_INLINE typename Vector3f::Value shade_hits(Vector3f n) {
 82 |     return 0.2f + max(dot(n, Vector3f(-1.f, -1.f, 2.f)), 0.f) * 90.f;
 83 | }
 84 | 
 85 | /// All three kernels combined into one
 86 | template <typename Vector2> ENOKI_INLINE typename Vector2::Value combined(Vector2 n) {
 87 |     return shade_hits(intersect_rays(make_rays(n)));
 88 | }
 89 | 
 90 | //! @}
 91 | // -----------------------------------------------------------------------
 92 | 
 93 | // -----------------------------------------------------------------------
 94 | //! @{ \name Wrappers which execute the above kernels for dynamic arrays
 95 | // -----------------------------------------------------------------------
 96 | 
 97 | Ray3fX make_rays_dynamic(const Vector2fX &p) {
 98 |     return vectorize([](auto &&p) { return make_rays<Vector2fP>(p); }, p);
 99 | }
100 | 
101 | Vector3fX intersect_rays_dynamic(const Ray3fX &r) {
102 |     return vectorize([](auto &&r) { return intersect_rays<Ray3fP>(r); }, r);
103 | }
104 | 
105 | FloatX shade_hits_dynamic(const Vector3fX &n) {
106 |     return vectorize([](auto &&n) { return shade_hits<Vector3fP>(n); }, n);
107 | }
108 | 
109 | FloatX combined_dynamic(const Vector2fX &p) {
110 |     return vectorize([](auto &&p) { return combined<Vector2fP>(p); }, p);
111 | }
112 | 
113 | //! @}
114 | // -----------------------------------------------------------------------
115 | 
116 | auto clk() { return std::chrono::high_resolution_clock::now(); }
117 | 
118 | template <typename T> float clkdiff(T a, T b) {
119 |     return std::chrono::duration<float>(b - a).count() * 1000;
120 | }
121 | 
122 | void write_image(const std::string &filename, const FloatX &image) {
123 |     std::ofstream os(filename);
124 |     os << "P3\n1024 1024\n255\n";
125 |     for (float v : image)
126 |         os << (int) v << " " << (int) v << " " << (int) v << "\n";
127 | }
128 | 
129 | int main(int /* argc */, char ** /* argv */) {
130 |     auto idx = linspace<FloatX>(-1.2f, 1.2f, 1024);
131 |     auto grid = meshgrid(idx, idx);
132 | 
133 |     /* benchmark1 */ {
134 |         auto time_start = clk();
135 |         Ray3fX    rays  = make_rays_dynamic(grid);
136 |         Vector3fX hits  = intersect_rays_dynamic(rays);
137 |         FloatX image = shade_hits_dynamic(hits);
138 |         auto time_end = clk();
139 |         std::cerr << "Separate kernels: " << clkdiff(time_start, time_end) << " ms " << std::endl;
140 |         write_image("sphere1.ppm", image);
141 |     }
142 | 
143 |     /* benchmark2 */ {
144 |         auto time_start = clk();
145 |         FloatX image = combined_dynamic(grid);
146 |         auto time_end = clk();
147 |         std::cerr << "Combined kernels: " << clkdiff(time_start, time_end) << " ms " << std::endl;
148 |         write_image("sphere2.ppm", image);
149 |     }
150 | 
151 |     return 0;
152 | }
153 | 


--------------------------------------------------------------------------------
/tests/trig.cpp:
--------------------------------------------------------------------------------
  1 | #include "test.h"
  2 | 
  3 | ENOKI_TEST_FLOAT(test01_sin) {
  4 |     test::probe_accuracy<T>(
  5 |         [](const T &a) -> T { return sin(a); },
  6 |         [](double a) { return std::sin(a); },
  7 |         Value(-8192), Value(8192),
  8 |         19
  9 |     );
 10 | 
 11 |     Array<T, 4> x((Value) M_PI);
 12 |     Array<T&, 4> y(x);
 13 |     assert(sin(x) == sin(y));
 14 | }
 15 | 
 16 | ENOKI_TEST_FLOAT(test02_cos) {
 17 |     test::probe_accuracy<T>(
 18 |         [](const T &a) -> T { return cos(a); },
 19 |         [](double a) { return std::cos(a); },
 20 |         Value(-8192), Value(8192),
 21 |         47
 22 |     );
 23 | 
 24 |     Array<T, 4> x((Value) M_PI);
 25 |     Array<T&, 4> y(x);
 26 |     assert(cos(x) == cos(y));
 27 | }
 28 | 
 29 | ENOKI_TEST_FLOAT(test03_sincos_sin) {
 30 |     test::probe_accuracy<T>(
 31 |         [](const T &a) -> T { return sincos(a).first; },
 32 |         [](double a) { return std::sin(a); },
 33 |         Value(-8192), Value(8192),
 34 |         19
 35 |     );
 36 | }
 37 | 
 38 | ENOKI_TEST_FLOAT(test04_sincos_cos) {
 39 |     test::probe_accuracy<T>(
 40 |         [](const T &a) -> T { return sincos(a).second; },
 41 |         [](double a) { return std::cos(a); },
 42 |         Value(-8192), Value(8192),
 43 |         47
 44 |     );
 45 | 
 46 |     Array<T, 4> x((Value) M_PI), s, c;
 47 |     Array<T&, 4> y(x);
 48 |     auto result = sincos(y);
 49 |     assert(result.first == sin(y) && result.second == cos(y));
 50 | }
 51 | 
 52 | ENOKI_TEST_FLOAT(test05_tan) {
 53 |     test::probe_accuracy<T>(
 54 |         [](const T &a) -> T { return tan(a); },
 55 |         [](double a) { return std::tan(a); },
 56 |         Value(-8192), Value(8192),
 57 |         30
 58 |     );
 59 | 
 60 |     Array<T, 4> x((Value) M_PI);
 61 |     Array<T&, 4> y(x);
 62 |     assert(tan(x) == tan(y));
 63 | }
 64 | 
 65 | ENOKI_TEST_FLOAT(test06_cot) {
 66 |     test::probe_accuracy<T>(
 67 |         [](const T &a) -> T { return cot(a); },
 68 |         [](double a) { return 1.0 / std::tan(a); },
 69 |         Value(-8192), Value(8192),
 70 |         47
 71 |     );
 72 | 
 73 |     Array<T, 4> x((Value) M_PI);
 74 |     Array<T&, 4> y(x);
 75 |     assert(tan(x) == tan(y));
 76 | }
 77 | 
 78 | ENOKI_TEST_FLOAT(test07_asin) {
 79 |     test::probe_accuracy<T>(
 80 |         [](const T &a) -> T { return asin(a); },
 81 |         [](double a) { return std::asin(a); },
 82 |         Value(-1), Value(1),
 83 |         61
 84 |     );
 85 | 
 86 |     Array<T, 4> x((Value) 0.5);
 87 |     Array<T&, 4> y(x);
 88 |     assert(asin(x) == asin(y));
 89 | }
 90 | 
 91 | ENOKI_TEST_FLOAT(test08_acos) {
 92 |     test::probe_accuracy<T>(
 93 |         [](const T &a) -> T { return acos(a); },
 94 |         [](double a) { return std::acos(a); },
 95 |         Value(-1), Value(1),
 96 |         4
 97 |     );
 98 | 
 99 |     Array<T, 4> x((Value) 0.5);
100 |     Array<T&, 4> y(x);
101 |     assert(acos(x) == acos(y));
102 | }
103 | 
104 | ENOKI_TEST_FLOAT(test09_atan) {
105 |     test::probe_accuracy<T>(
106 |         [](const T &a) -> T { return atan(a); },
107 |         [](double a) { return std::atan(a); },
108 |         Value(-1), Value(1),
109 |         12
110 |     );
111 | 
112 |     Array<T, 4> x((Value) 0.5);
113 |     Array<T&, 4> y(x);
114 |     assert(atan(x) == atan(y));
115 | }
116 | 
117 | ENOKI_TEST_FLOAT(test10_atan2) {
118 |     for (int ix = 0; ix <= 100; ++ix) {
119 |         for (int iy = 0; iy <= 100; ++iy) {
120 |             Value x = Value(ix) / Value(100) * 2 - 1;
121 |             Value y = Value(iy) / Value(100) * 2 - 1;
122 |             T atan2_ = T(atan2(T(y), T(x)))[0];
123 |             Value atan2_ref = std::atan2(y, x);
124 |             if (x == 0 || y == 0)
125 |                 continue;
126 |             assert(std::abs(atan2_[0] - atan2_ref) < 3.58e-6f);
127 |         }
128 |     }
129 | }
130 | 
131 | ENOKI_TEST_FLOAT(test11_csc_sec_cot) {
132 |     assert(std::abs(T(csc(T(1.f)) - 1 / std::sin(1.f))[0]) < 1e-6f);
133 |     assert(std::abs(T(sec(T(1.f)) - 1 / std::cos(1.f))[0]) < 1e-6f);
134 |     assert(std::abs(T(cot(T(1.f)) - 1 / std::tan(1.f))[0]) < 1e-6f);
135 | }
136 | 
137 | ENOKI_TEST_FLOAT(test12_safe_math) {
138 | #if defined(_MSC_VER)
139 |     // MSVC codegen issue :-|
140 |     std::cout << abs(safe_asin(T(Value(-10))) - Value(-M_PI / 2)) << std::endl;
141 | #endif
142 |     assert(all(abs(safe_asin(T(Value(-10))) - Value(-M_PI / 2)) < 1e-6f));
143 |     assert(all(abs(safe_asin(T(Value( 10))) - Value( M_PI / 2)) < 1e-6f));
144 |     assert(all(abs(safe_acos(T(Value(-10))) - Value(M_PI)) < 1e-6f));
145 |     assert(all(abs(safe_acos(T(Value( 10))) - Value(0)) < 1e-6f));
146 |     assert(all(abs(safe_sqrt(T(Value(4)))   - Value(2)) < 1e-6f));
147 |     assert(all(abs(safe_sqrt(T(Value(-1)))  - Value(0)) < 1e-6f));
148 | }
149 | 


--------------------------------------------------------------------------------