├── .gitattributes
├── .gitignore
├── .gitmodules
├── docs
    ├── README.md
    ├── index.md
    └── DoxygenLayout.xml
├── include
    └── tue
    │   ├── unused.hpp
    │   ├── detail_
    │       ├── simd_support.hpp
    │       ├── is_floating_point_simd_component.hpp
    │       ├── simd_specializations.hpp
    │       ├── is_sized_bool.hpp
    │       ├── is_integral_simd_component.hpp
    │       ├── is_arithmetic_simd_component.hpp
    │       ├── is_simd_component.hpp
    │       ├── is_vec_component.hpp
    │       ├── simd
    │       │   ├── sse2
    │       │   │   ├── bool64x2.sse2.hpp
    │       │   │   ├── bool16x8.sse2.hpp
    │       │   │   ├── bool8x16.sse2.hpp
    │       │   │   ├── uint64x2.sse2.hpp
    │       │   │   ├── uint16x8.sse2.hpp
    │       │   │   ├── uint32x4.sse2.hpp
    │       │   │   ├── int64x2.sse2.hpp
    │       │   │   ├── uint8x16.sse2.hpp
    │       │   │   ├── int16x8.sse2.hpp
    │       │   │   └── int32x4.sse2.hpp
    │       │   └── sse
    │       │   │   └── bool32x4.sse.hpp
    │       └── matmult.hpp
    │   ├── nocopy_cast.hpp
    │   └── sized_bool.hpp
├── tests
    ├── tue.tests.hpp
    ├── unused.tests.cpp
    ├── nocopy_cast.tests.cpp
    ├── vec.tests.cpp
    ├── math.tests.cpp
    ├── sized_bool.tests.cpp
    ├── quat.tests.cpp
    └── transform.tests.cpp
├── LICENSE_1_0.txt
├── CMakeLists.txt
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | docs/html
3 | *.user
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/mon"]
2 | 	path = lib/mon
3 | 	url = https://github.com/Cincinesh/mon.git
4 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | Documentation
 2 | -------------
 3 | Tuesday documentation can be generated with
 4 | [Doxygen](http://www.stack.nl/~dimitri/doxygen/) using the `Doxyfile` at the
 5 | root of this project. A copy is also published at:
 6 | 
 7 | http://cincinesh.github.io/tue/master/docs/index.html
 8 | 
 9 | License
10 | -------
11 | Copyright Jo Bates 2015.
12 | 
13 | Distributed under the Boost Software License, Version 1.0.
14 | 
15 | See accompanying file [LICENSE_1_0.txt](LICENSE_1_0.txt) or copy at
16 | http://www.boost.org/LICENSE_1_0.txt.
17 | 
18 | Bug Reporting
19 | -------------
20 | Please report any bugs, typos, or suggestions to
21 | https://github.com/Cincinesh/tue/issues.
22 | 


--------------------------------------------------------------------------------
/include/tue/unused.hpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #pragma once
10 | 
11 | namespace tue
12 | {
13 |     /*!
14 |      * \defgroup  unused_hpp <tue/unused.hpp>
15 |      *
16 |      * \brief     The `unused()` function template.
17 |      *
18 |      * @{
19 |      */
20 | 
21 |     /*!
22 |      * \brief        Suppresses unused variable warnings.
23 |      *
24 |      * \tparam Args  The parameter types with reference qualifiers removed.
25 |      */
26 |     template<typename... Args>
27 |     inline void unused(Args&&...) noexcept
28 |     {
29 |     }
30 | 
31 |     /*!@}*/
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/tue.tests.hpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #pragma once
10 | 
11 | #include <mon/test_case.hpp>
12 | 
13 | #include <cmath>
14 | #include <limits>
15 | 
16 | #ifdef _MSC_VER
17 | #define CONST_OR_CONSTEXPR const
18 | #else
19 | #define CONST_OR_CONSTEXPR constexpr
20 | #endif
21 | 
22 | namespace
23 | {
24 |     template<typename T>
25 |     bool nearly_equal(T actual, T expected) noexcept
26 |     {
27 |         return actual == expected
28 |             || std::abs(actual - expected) < std::abs(expected * 0.0003f)
29 |             || std::abs(expected) == std::numeric_limits<T>::infinity();
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd_support.hpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #pragma once
10 | 
11 | /*!
12 |  * \addtogroup  simd_hpp
13 |  * @{
14 |  */
15 | #if defined(__SSE__) \
16 |     || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) \
17 |     || defined(_M_X64)
18 | /*!
19 |  * \brief Defined if the current compiler configuration supports SSE
20 |  *        intrinsics.
21 |  */
22 | #define TUE_SSE
23 | #endif
24 | 
25 | #if defined(__SSE2__) \
26 |     || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) \
27 |     || defined(_M_X64)
28 | /*!
29 |  * \brief Defined if the current compiler configuration supports SSE2
30 |  *        intrinsics.
31 |  */
32 | #define TUE_SSE2
33 | #endif
34 | 
35 | /*!@}*/
36 | 


--------------------------------------------------------------------------------
/include/tue/detail_/is_floating_point_simd_component.hpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #pragma once
10 | 
11 | #include <type_traits>
12 | 
13 | namespace tue
14 | {
15 |     template<typename T>
16 |     struct is_floating_point_simd_component
17 |     :
18 |         public std::integral_constant<bool, false>
19 |     {
20 |         using std::integral_constant<bool, false>::integral_constant;
21 |     };
22 | 
23 |     template<>
24 |     struct is_floating_point_simd_component<float>
25 |     :
26 |         public std::integral_constant<bool, true>
27 |     {
28 |         using std::integral_constant<bool, true>::integral_constant;
29 |     };
30 | 
31 |     template<>
32 |     struct is_floating_point_simd_component<double>
33 |     :
34 |         public std::integral_constant<bool, true>
35 |     {
36 |         using std::integral_constant<bool, true>::integral_constant;
37 |     };
38 | }
39 | 


--------------------------------------------------------------------------------
/tests/unused.tests.cpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #include <tue/unused.hpp>
10 | #include "tue.tests.hpp"
11 | 
12 | #include <utility>
13 | 
14 | namespace
15 | {
16 |     using namespace tue;
17 | 
18 |     struct test_struct
19 |     {
20 |         mutable bool was_copied;
21 | 
22 |         test_struct() noexcept :
23 |             was_copied(false)
24 |         {
25 |         }
26 | 
27 |         test_struct(const test_struct& other) noexcept :
28 |             was_copied(false)
29 |         {
30 |             other.was_copied = true;
31 |         }
32 | 
33 |         test_struct(test_struct&& other) noexcept :
34 |             was_copied(false)
35 |         {
36 |             other.was_copied = true;
37 |         }
38 |     };
39 | 
40 |     TEST_CASE(unused)
41 |     {
42 |         test_struct x;
43 |         test_struct y;
44 |         const test_struct z;
45 |         tue::unused(x, std::move(y), z);
46 |         test_assert(x.was_copied == false);
47 |         test_assert(y.was_copied == false);
48 |         test_assert(z.was_copied == false);
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/LICENSE_1_0.txt:
--------------------------------------------------------------------------------
 1 | Boost Software License - Version 1.0 - August 17th, 2003
 2 | 
 3 | Permission is hereby granted, free of charge, to any person or organization
 4 | obtaining a copy of the software and accompanying documentation covered by
 5 | this license (the "Software") to use, reproduce, display, distribute,
 6 | execute, and transmit the Software, and to prepare derivative works of the
 7 | Software, and to permit third-parties to whom the Software is furnished to
 8 | do so, all subject to the following:
 9 | 
10 | The copyright notices in the Software and this entire statement, including
11 | the above license grant, this restriction and the following disclaimer,
12 | must be included in all copies of the Software, in whole or in part, and
13 | all derivative works of the Software, unless such copies or derivative
14 | works are solely in the form of machine-executable object code generated by
15 | a source language processor.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd_specializations.hpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #pragma once
10 | 
11 | #include "../simd.hpp"
12 | 
13 | // SSE
14 | #ifdef TUE_SSE
15 | #include <cstdint>
16 | 
17 | namespace tue
18 | {
19 |     namespace detail_
20 |     {
21 |         inline float binary_float(std::uint32_t x) noexcept
22 |         {
23 |             return reinterpret_cast<const float&>(x);
24 |         }
25 |     }
26 | }
27 | 
28 | #include "simd/sse/bool32x4.sse.hpp"
29 | #include "simd/sse/float32x4.sse.hpp"
30 | 
31 | #ifdef TUE_SSE2
32 | 
33 | namespace tue
34 | {
35 |     namespace detail_
36 |     {
37 |         inline double binary_double(std::uint64_t x) noexcept
38 |         {
39 |             return reinterpret_cast<const double&>(x);
40 |         }
41 |     }
42 | }
43 | 
44 | #include "simd/sse2/bool8x16.sse2.hpp"
45 | #include "simd/sse2/bool16x8.sse2.hpp"
46 | #include "simd/sse2/bool64x2.sse2.hpp"
47 | #include "simd/sse2/float64x2.sse2.hpp"
48 | #include "simd/sse2/int8x16.sse2.hpp"
49 | #include "simd/sse2/int16x8.sse2.hpp"
50 | #include "simd/sse2/int32x4.sse2.hpp"
51 | #include "simd/sse2/int64x2.sse2.hpp"
52 | #include "simd/sse2/uint8x16.sse2.hpp"
53 | #include "simd/sse2/uint16x8.sse2.hpp"
54 | #include "simd/sse2/uint32x4.sse2.hpp"
55 | #include "simd/sse2/uint64x2.sse2.hpp"
56 | 
57 | #endif
58 | #endif
59 | 


--------------------------------------------------------------------------------
/include/tue/detail_/is_sized_bool.hpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #pragma once
10 | 
11 | #include <cstdint>
12 | #include <type_traits>
13 | 
14 | namespace tue
15 | {
16 |     enum bool8 : std::uint8_t;
17 |     enum bool16 : std::uint16_t;
18 |     enum bool32 : std::uint32_t;
19 |     enum bool64 : std::uint64_t;
20 | 
21 |     template<typename T>
22 |     struct is_sized_bool : public std::integral_constant<bool, false>
23 |     {
24 |         using std::integral_constant<bool, false>::integral_constant;
25 |     };
26 | 
27 |     template<>
28 |     struct is_sized_bool<bool8> : public std::integral_constant<bool, true>
29 |     {
30 |         using std::integral_constant<bool, true>::integral_constant;
31 |     };
32 | 
33 |     template<>
34 |     struct is_sized_bool<bool16> : public std::integral_constant<bool, true>
35 |     {
36 |         using std::integral_constant<bool, true>::integral_constant;
37 |     };
38 | 
39 |     template<>
40 |     struct is_sized_bool<bool32> : public std::integral_constant<bool, true>
41 |     {
42 |         using std::integral_constant<bool, true>::integral_constant;
43 |     };
44 | 
45 |     template<>
46 |     struct is_sized_bool<bool64> : public std::integral_constant<bool, true>
47 |     {
48 |         using std::integral_constant<bool, true>::integral_constant;
49 |     };
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/nocopy_cast.tests.cpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #include <tue/nocopy_cast.hpp>
10 | #include "tue.tests.hpp"
11 | 
12 | #include <tue/unused.hpp>
13 | 
14 | namespace
15 | {
16 |     using namespace tue;
17 | 
18 |     struct A
19 |     {
20 |         mutable bool was_copied;
21 | 
22 |         constexpr A() noexcept :
23 |             was_copied(false)
24 |         {
25 |         }
26 | 
27 |         A(const A& other) noexcept :
28 |             was_copied(false)
29 |         {
30 |             other.was_copied = true;
31 |         }
32 |     };
33 | 
34 |     struct B
35 |     {
36 |         B() = delete;
37 | 
38 |         constexpr explicit B(const A&) noexcept
39 |         {
40 |         }
41 |     };
42 | 
43 |     TEST_CASE(nocopy_cast_same_type)
44 |     {
45 |         const A a1;
46 |         const A& a2 = nocopy_cast<A>(a1);
47 |         test_assert(&a2 == &a1);
48 |         test_assert(!a1.was_copied);
49 | 
50 |         CONST_OR_CONSTEXPR A a3;
51 |         CONST_OR_CONSTEXPR B b1(a3);
52 |         CONST_OR_CONSTEXPR B b2 = nocopy_cast<B>(b1);
53 |         unused(b2);
54 |     }
55 | 
56 |     TEST_CASE(nocopy_cast_different_type)
57 |     {
58 |         CONST_OR_CONSTEXPR A a;
59 |         CONST_OR_CONSTEXPR B b = nocopy_cast<B>(a);
60 |         test_assert(!a.was_copied);
61 |         unused(b);
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/tests/vec.tests.cpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #include <tue/vec.hpp>
10 | #include "tue.tests.hpp"
11 | 
12 | #include <cstdint>
13 | 
14 | namespace
15 | {
16 |     using namespace tue;
17 | 
18 |     TEST_CASE(is_simd_component)
19 |     {
20 |         test_assert(is_vec_component<bool>::value == false);
21 |         test_assert(is_vec_component<float>::value == true);
22 |         test_assert(is_vec_component<double>::value == true);
23 |         test_assert(is_vec_component<std::int8_t>::value == true);
24 |         test_assert(is_vec_component<std::int16_t>::value == true);
25 |         test_assert(is_vec_component<std::int32_t>::value == true);
26 |         test_assert(is_vec_component<std::int64_t>::value == true);
27 |         test_assert(is_vec_component<std::uint8_t>::value == true);
28 |         test_assert(is_vec_component<std::uint16_t>::value == true);
29 |         test_assert(is_vec_component<std::uint32_t>::value == true);
30 |         test_assert(is_vec_component<std::uint64_t>::value == true);
31 |         test_assert(is_vec_component<bool8>::value == true);
32 |         test_assert(is_vec_component<bool16>::value == true);
33 |         test_assert(is_vec_component<bool32>::value == true);
34 |         test_assert(is_vec_component<bool64>::value == true);
35 |         test_assert((is_vec_component<simd<float, 4>>::value == true));
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/include/tue/nocopy_cast.hpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #pragma once
10 | 
11 | #include <type_traits>
12 | 
13 | /*!
14 |  * \defgroup  nocopy_cast_hpp <tue/nocopy_cast.hpp>
15 |  *
16 |  * \brief     The `nocopy_cast<T>()` function template.
17 |  */
18 | namespace tue
19 | {
20 |     namespace detail_
21 |     {
22 |         template<typename T, typename U>
23 |         inline constexpr std::enable_if_t<std::is_same<T, U>::value, const T&>
24 |         nocopy_cast(const U& x) noexcept
25 |         {
26 |             return x;
27 |         }
28 | 
29 |         template<typename T, typename U>
30 |         inline constexpr std::enable_if_t<!std::is_same<T, U>::value, T>
31 |         nocopy_cast(const U& x) noexcept
32 |         {
33 |             return static_cast<T>(x);
34 |         }
35 |     }
36 | 
37 |     /*!
38 |      * \addtogroup  nocopy_cast_hpp
39 |      * @{
40 |      */
41 | 
42 |     /*!
43 |      * \brief     Casts `x` to type `T`, avoiding a copy if possible.
44 |      *
45 |      * \tparam T  The return type.
46 |      * \tparam U  The type of parameter `x`.
47 |      *
48 |      * \param x   The value to cast.
49 |      *
50 |      * \return    A const reference to `x` if `x` is already of type `T`.
51 |      *            `static_cast<T>(x)` otherwise.
52 |      */
53 |     template<typename T, typename U>
54 |     inline constexpr std::conditional_t<std::is_same<T, U>::value, const T&, T>
55 |     nocopy_cast(const U& x) noexcept
56 |     {
57 |         return tue::detail_::nocopy_cast<T, U>(x);
58 |     }
59 | 
60 |     template<typename T, typename U>
61 |     void nocopy_cast(U&& x) = delete;
62 | 
63 |     /*!@}*/
64 | }
65 | 


--------------------------------------------------------------------------------
/include/tue/detail_/is_integral_simd_component.hpp:
--------------------------------------------------------------------------------
 1 | //                Copyright Jo Bates 2015.
 2 | // Distributed under the Boost Software License, Version 1.0.
 3 | //    (See accompanying file LICENSE_1_0.txt or copy at
 4 | //          http://www.boost.org/LICENSE_1_0.txt)
 5 | //
 6 | //     Please report any bugs, typos, or suggestions to
 7 | //         https://github.com/Cincinesh/tue/issues
 8 | 
 9 | #pragma once
10 | 
11 | #include <cstdint>
12 | #include <type_traits>
13 | 
14 | namespace tue
15 | {
16 |     template<typename T>
17 |     struct is_integral_simd_component
18 |     :
19 |         public std::integral_constant<bool, false>
20 |     {
21 |         using std::integral_constant<bool, false>::integral_constant;
22 |     };
23 | 
24 |     template<>
25 |     struct is_integral_simd_component<std::int8_t>
26 |     :
27 |         public std::integral_constant<bool, true>
28 |     {
29 |         using std::integral_constant<bool, true>::integral_constant;
30 |     };
31 | 
32 |     template<>
33 |     struct is_integral_simd_component<std::int16_t>
34 |     :
35 |         public std::integral_constant<bool, true>
36 |     {
37 |         using std::integral_constant<bool, true>::integral_constant;
38 |     };
39 | 
40 |     template<>
41 |     struct is_integral_simd_component<std::int32_t>
42 |     :
43 |         public std::integral_constant<bool, true>
44 |     {
45 |         using std::integral_constant<bool, true>::integral_constant;
46 |     };
47 | 
48 |     template<>
49 |     struct is_integral_simd_component<std::int64_t>
50 |     :
51 |         public std::integral_constant<bool, true>
52 |     {
53 |         using std::integral_constant<bool, true>::integral_constant;
54 |     };
55 | 
56 |     template<>
57 |     struct is_integral_simd_component<std::uint8_t>
58 |     :
59 |         public std::integral_constant<bool, true>
60 |     {
61 |         using std::integral_constant<bool, true>::integral_constant;
62 |     };
63 | 
64 |     template<>
65 |     struct is_integral_simd_component<std::uint16_t>
66 |     :
67 |         public std::integral_constant<bool, true>
68 |     {
69 |         using std::integral_constant<bool, true>::integral_constant;
70 |     };
71 | 
72 |     template<>
73 |     struct is_integral_simd_component<std::uint32_t>
74 |     :
75 |         public std::integral_constant<bool, true>
76 |     {
77 |         using std::integral_constant<bool, true>::integral_constant;
78 |     };
79 | 
80 |     template<>
81 |     struct is_integral_simd_component<std::uint64_t>
82 |     :
83 |         public std::integral_constant<bool, true>
84 |     {
85 |         using std::integral_constant<bool, true>::integral_constant;
86 |     };
87 | }
88 | 


--------------------------------------------------------------------------------
/include/tue/detail_/is_arithmetic_simd_component.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <cstdint>
 12 | #include <type_traits>
 13 | 
 14 | namespace tue
 15 | {
 16 |     template<typename T>
 17 |     struct is_arithmetic_simd_component
 18 |     :
 19 |         public std::integral_constant<bool, false>
 20 |     {
 21 |         using std::integral_constant<bool, false>::integral_constant;
 22 |     };
 23 | 
 24 |     template<>
 25 |     struct is_arithmetic_simd_component<float>
 26 |     :
 27 |         public std::integral_constant<bool, true>
 28 |     {
 29 |         using std::integral_constant<bool, true>::integral_constant;
 30 |     };
 31 | 
 32 |     template<>
 33 |     struct is_arithmetic_simd_component<double>
 34 |     :
 35 |         public std::integral_constant<bool, true>
 36 |     {
 37 |         using std::integral_constant<bool, true>::integral_constant;
 38 |     };
 39 | 
 40 |     template<>
 41 |     struct is_arithmetic_simd_component<std::int8_t>
 42 |     :
 43 |         public std::integral_constant<bool, true>
 44 |     {
 45 |         using std::integral_constant<bool, true>::integral_constant;
 46 |     };
 47 | 
 48 |     template<>
 49 |     struct is_arithmetic_simd_component<std::int16_t>
 50 |     :
 51 |         public std::integral_constant<bool, true>
 52 |     {
 53 |         using std::integral_constant<bool, true>::integral_constant;
 54 |     };
 55 | 
 56 |     template<>
 57 |     struct is_arithmetic_simd_component<std::int32_t>
 58 |     :
 59 |         public std::integral_constant<bool, true>
 60 |     {
 61 |         using std::integral_constant<bool, true>::integral_constant;
 62 |     };
 63 | 
 64 |     template<>
 65 |     struct is_arithmetic_simd_component<std::int64_t>
 66 |     :
 67 |         public std::integral_constant<bool, true>
 68 |     {
 69 |         using std::integral_constant<bool, true>::integral_constant;
 70 |     };
 71 | 
 72 |     template<>
 73 |     struct is_arithmetic_simd_component<std::uint8_t>
 74 |     :
 75 |         public std::integral_constant<bool, true>
 76 |     {
 77 |         using std::integral_constant<bool, true>::integral_constant;
 78 |     };
 79 | 
 80 |     template<>
 81 |     struct is_arithmetic_simd_component<std::uint16_t>
 82 |     :
 83 |         public std::integral_constant<bool, true>
 84 |     {
 85 |         using std::integral_constant<bool, true>::integral_constant;
 86 |     };
 87 | 
 88 |     template<>
 89 |     struct is_arithmetic_simd_component<std::uint32_t>
 90 |     :
 91 |         public std::integral_constant<bool, true>
 92 |     {
 93 |         using std::integral_constant<bool, true>::integral_constant;
 94 |     };
 95 | 
 96 |     template<>
 97 |     struct is_arithmetic_simd_component<std::uint64_t>
 98 |     :
 99 |         public std::integral_constant<bool, true>
100 |     {
101 |         using std::integral_constant<bool, true>::integral_constant;
102 |     };
103 | }
104 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.3)
  2 | project(tue)
  3 | enable_testing()
  4 | 
  5 | set(CMAKE_CXX_STANDARD 14)
  6 | 
  7 | if(MSVC)
  8 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
  9 | endif()
 10 | 
 11 | include_directories(
 12 |     lib/mon/include
 13 |     include)
 14 | 
 15 | # mon
 16 | set(MON_SOURCES
 17 |     lib/mon/include/mon/test_case.hpp
 18 |     lib/mon/include/mon/test_failure.hpp
 19 |     lib/mon/include/mon/test_runner.hpp
 20 |     lib/mon/src/main.cpp
 21 |     lib/mon/src/mon.test_case.cpp
 22 |     lib/mon/src/mon.test_failure.cpp
 23 |     lib/mon/src/mon.test_runner.cpp)
 24 | 
 25 | # tue
 26 | set(TUE_SOURCES
 27 |     include/tue/detail_/is_arithmetic_simd_component.hpp
 28 |     include/tue/detail_/is_floating_point_simd_component.hpp
 29 |     include/tue/detail_/is_integral_simd_component.hpp
 30 |     include/tue/detail_/is_simd_component.hpp
 31 |     include/tue/detail_/is_sized_bool.hpp
 32 |     include/tue/detail_/is_vec_component.hpp
 33 |     include/tue/detail_/mat2xR.hpp
 34 |     include/tue/detail_/mat3xR.hpp
 35 |     include/tue/detail_/mat4xR.hpp
 36 |     include/tue/detail_/matmult.hpp
 37 |     include/tue/detail_/simd2.hpp
 38 |     include/tue/detail_/simdN.hpp
 39 |     include/tue/detail_/simd_specializations.hpp
 40 |     include/tue/detail_/simd_support.hpp
 41 |     include/tue/detail_/simd/sse/bool32x4.sse.hpp
 42 |     include/tue/detail_/simd/sse/float32x4.sse.hpp
 43 |     include/tue/detail_/simd/sse2/bool8x16.sse2.hpp
 44 |     include/tue/detail_/simd/sse2/bool16x8.sse2.hpp
 45 |     include/tue/detail_/simd/sse2/bool64x2.sse2.hpp
 46 |     include/tue/detail_/simd/sse2/float64x2.sse2.hpp
 47 |     include/tue/detail_/simd/sse2/int8x16.sse2.hpp
 48 |     include/tue/detail_/simd/sse2/int16x8.sse2.hpp
 49 |     include/tue/detail_/simd/sse2/int32x4.sse2.hpp
 50 |     include/tue/detail_/simd/sse2/int64x2.sse2.hpp
 51 |     include/tue/detail_/simd/sse2/uint8x16.sse2.hpp
 52 |     include/tue/detail_/simd/sse2/uint16x8.sse2.hpp
 53 |     include/tue/detail_/simd/sse2/uint32x4.sse2.hpp
 54 |     include/tue/detail_/simd/sse2/uint64x2.sse2.hpp
 55 |     include/tue/detail_/vec2.hpp
 56 |     include/tue/detail_/vec3.hpp
 57 |     include/tue/detail_/vec4.hpp
 58 |     include/tue/mat.hpp
 59 |     include/tue/math.hpp
 60 |     include/tue/nocopy_cast.hpp
 61 |     include/tue/quat.hpp
 62 |     include/tue/simd.hpp
 63 |     include/tue/sized_bool.hpp
 64 |     include/tue/transform.hpp
 65 |     include/tue/unused.hpp
 66 |     include/tue/vec.hpp
 67 |     docs/DoxygenLayout.xml
 68 |     docs/index.md
 69 |     Doxyfile
 70 |     LICENSE_1_0.txt
 71 |     README.md)
 72 | 
 73 | # tue.tests
 74 | set(TUE_TEST_SOURCES
 75 |     tests/mat2xR.tests.cpp
 76 |     tests/mat3xR.tests.cpp
 77 |     tests/mat4xR.tests.cpp
 78 |     tests/matmult.tests.cpp
 79 |     tests/math.tests.cpp
 80 |     tests/nocopy_cast.tests.cpp
 81 |     tests/quat.tests.cpp
 82 |     tests/simd.tests.cpp
 83 |     tests/sized_bool.tests.cpp
 84 |     tests/transform.tests.cpp
 85 |     tests/tue.tests.hpp
 86 |     tests/unused.tests.cpp
 87 |     tests/vec.tests.cpp
 88 |     tests/vec2.tests.cpp
 89 |     tests/vec3.tests.cpp
 90 |     tests/vec4.tests.cpp)
 91 | 
 92 | add_executable(
 93 |     tue.tests
 94 |     ${MON_SOURCES}
 95 |     ${TUE_SOURCES}
 96 |     ${TUE_TEST_SOURCES})
 97 | 
 98 | add_test(
 99 |     tue.tests
100 |     tue.tests)
101 | 
102 | # check
103 | add_custom_target(
104 |     check
105 |     COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure
106 |     DEPENDS tue.tests)
107 | 


--------------------------------------------------------------------------------
/include/tue/detail_/is_simd_component.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <cstdint>
 12 | #include <type_traits>
 13 | 
 14 | namespace tue
 15 | {
 16 |     enum bool8 : std::uint8_t;
 17 |     enum bool16 : std::uint16_t;
 18 |     enum bool32 : std::uint32_t;
 19 |     enum bool64 : std::uint64_t;
 20 | 
 21 |     template<typename T>
 22 |     struct is_simd_component
 23 |     :
 24 |         public std::integral_constant<bool, false>
 25 |     {
 26 |         using std::integral_constant<bool, false>::integral_constant;
 27 |     };
 28 | 
 29 |     template<>
 30 |     struct is_simd_component<float>
 31 |     :
 32 |         public std::integral_constant<bool, true>
 33 |     {
 34 |         using std::integral_constant<bool, true>::integral_constant;
 35 |     };
 36 | 
 37 |     template<>
 38 |     struct is_simd_component<double>
 39 |     :
 40 |         public std::integral_constant<bool, true>
 41 |     {
 42 |         using std::integral_constant<bool, true>::integral_constant;
 43 |     };
 44 | 
 45 |     template<>
 46 |     struct is_simd_component<std::int8_t>
 47 |     :
 48 |         public std::integral_constant<bool, true>
 49 |     {
 50 |         using std::integral_constant<bool, true>::integral_constant;
 51 |     };
 52 | 
 53 |     template<>
 54 |     struct is_simd_component<std::int16_t>
 55 |     :
 56 |         public std::integral_constant<bool, true>
 57 |     {
 58 |         using std::integral_constant<bool, true>::integral_constant;
 59 |     };
 60 | 
 61 |     template<>
 62 |     struct is_simd_component<std::int32_t>
 63 |     :
 64 |         public std::integral_constant<bool, true>
 65 |     {
 66 |         using std::integral_constant<bool, true>::integral_constant;
 67 |     };
 68 | 
 69 |     template<>
 70 |     struct is_simd_component<std::int64_t>
 71 |     :
 72 |         public std::integral_constant<bool, true>
 73 |     {
 74 |         using std::integral_constant<bool, true>::integral_constant;
 75 |     };
 76 | 
 77 |     template<>
 78 |     struct is_simd_component<std::uint8_t>
 79 |     :
 80 |         public std::integral_constant<bool, true>
 81 |     {
 82 |         using std::integral_constant<bool, true>::integral_constant;
 83 |     };
 84 | 
 85 |     template<>
 86 |     struct is_simd_component<std::uint16_t>
 87 |     :
 88 |         public std::integral_constant<bool, true>
 89 |     {
 90 |         using std::integral_constant<bool, true>::integral_constant;
 91 |     };
 92 | 
 93 |     template<>
 94 |     struct is_simd_component<std::uint32_t>
 95 |     :
 96 |         public std::integral_constant<bool, true>
 97 |     {
 98 |         using std::integral_constant<bool, true>::integral_constant;
 99 |     };
100 | 
101 |     template<>
102 |     struct is_simd_component<std::uint64_t>
103 |     :
104 |         public std::integral_constant<bool, true>
105 |     {
106 |         using std::integral_constant<bool, true>::integral_constant;
107 |     };
108 | 
109 |     template<>
110 |     struct is_simd_component<bool8>
111 |     :
112 |         public std::integral_constant<bool, true>
113 |     {
114 |         using std::integral_constant<bool, true>::integral_constant;
115 |     };
116 | 
117 |     template<>
118 |     struct is_simd_component<bool16>
119 |     :
120 |         public std::integral_constant<bool, true>
121 |     {
122 |         using std::integral_constant<bool, true>::integral_constant;
123 |     };
124 | 
125 |     template<>
126 |     struct is_simd_component<bool32>
127 |     :
128 |         public std::integral_constant<bool, true>
129 |     {
130 |         using std::integral_constant<bool, true>::integral_constant;
131 |     };
132 | 
133 |     template<>
134 |     struct is_simd_component<bool64>
135 |     :
136 |         public std::integral_constant<bool, true>
137 |     {
138 |         using std::integral_constant<bool, true>::integral_constant;
139 |     };
140 | }
141 | 


--------------------------------------------------------------------------------
/include/tue/detail_/is_vec_component.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <cstdint>
 12 | #include <type_traits>
 13 | 
 14 | namespace tue
 15 | {
 16 |     enum bool8 : std::uint8_t;
 17 |     enum bool16 : std::uint16_t;
 18 |     enum bool32 : std::uint32_t;
 19 |     enum bool64 : std::uint64_t;
 20 | 
 21 |     template<typename T, int N>
 22 |     class simd;
 23 | 
 24 |     template<typename T>
 25 |     struct is_vec_component
 26 |     :
 27 |         public std::integral_constant<bool, false>
 28 |     {
 29 |         using std::integral_constant<bool, false>::integral_constant;
 30 |     };
 31 | 
 32 |     template<>
 33 |     struct is_vec_component<float>
 34 |     :
 35 |         public std::integral_constant<bool, true>
 36 |     {
 37 |         using std::integral_constant<bool, true>::integral_constant;
 38 |     };
 39 | 
 40 |     template<>
 41 |     struct is_vec_component<double>
 42 |     :
 43 |         public std::integral_constant<bool, true>
 44 |     {
 45 |         using std::integral_constant<bool, true>::integral_constant;
 46 |     };
 47 | 
 48 |     template<>
 49 |     struct is_vec_component<std::int8_t>
 50 |     :
 51 |         public std::integral_constant<bool, true>
 52 |     {
 53 |         using std::integral_constant<bool, true>::integral_constant;
 54 |     };
 55 | 
 56 |     template<>
 57 |     struct is_vec_component<std::int16_t>
 58 |     :
 59 |         public std::integral_constant<bool, true>
 60 |     {
 61 |         using std::integral_constant<bool, true>::integral_constant;
 62 |     };
 63 | 
 64 |     template<>
 65 |     struct is_vec_component<std::int32_t>
 66 |     :
 67 |         public std::integral_constant<bool, true>
 68 |     {
 69 |         using std::integral_constant<bool, true>::integral_constant;
 70 |     };
 71 | 
 72 |     template<>
 73 |     struct is_vec_component<std::int64_t>
 74 |     :
 75 |         public std::integral_constant<bool, true>
 76 |     {
 77 |         using std::integral_constant<bool, true>::integral_constant;
 78 |     };
 79 | 
 80 |     template<>
 81 |     struct is_vec_component<std::uint8_t>
 82 |     :
 83 |         public std::integral_constant<bool, true>
 84 |     {
 85 |         using std::integral_constant<bool, true>::integral_constant;
 86 |     };
 87 | 
 88 |     template<>
 89 |     struct is_vec_component<std::uint16_t>
 90 |     :
 91 |         public std::integral_constant<bool, true>
 92 |     {
 93 |         using std::integral_constant<bool, true>::integral_constant;
 94 |     };
 95 | 
 96 |     template<>
 97 |     struct is_vec_component<std::uint32_t>
 98 |     :
 99 |         public std::integral_constant<bool, true>
100 |     {
101 |         using std::integral_constant<bool, true>::integral_constant;
102 |     };
103 | 
104 |     template<>
105 |     struct is_vec_component<std::uint64_t>
106 |     :
107 |         public std::integral_constant<bool, true>
108 |     {
109 |         using std::integral_constant<bool, true>::integral_constant;
110 |     };
111 | 
112 |     template<>
113 |     struct is_vec_component<bool8>
114 |     :
115 |         public std::integral_constant<bool, true>
116 |     {
117 |         using std::integral_constant<bool, true>::integral_constant;
118 |     };
119 | 
120 |     template<>
121 |     struct is_vec_component<bool16>
122 |     :
123 |         public std::integral_constant<bool, true>
124 |     {
125 |         using std::integral_constant<bool, true>::integral_constant;
126 |     };
127 | 
128 |     template<>
129 |     struct is_vec_component<bool32>
130 |     :
131 |         public std::integral_constant<bool, true>
132 |     {
133 |         using std::integral_constant<bool, true>::integral_constant;
134 |     };
135 | 
136 |     template<>
137 |     struct is_vec_component<bool64>
138 |     :
139 |         public std::integral_constant<bool, true>
140 |     {
141 |         using std::integral_constant<bool, true>::integral_constant;
142 |     };
143 | 
144 |     template<typename T, int N>
145 |     struct is_vec_component<simd<T, N>>
146 |     :
147 |         public std::integral_constant<bool, true>
148 |     {
149 |         using std::integral_constant<bool, true>::integral_constant;
150 |     };
151 | }
152 | 


--------------------------------------------------------------------------------
/tests/math.tests.cpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #include <tue/math.hpp>
 10 | #include "tue.tests.hpp"
 11 | 
 12 | #include <cmath>
 13 | 
 14 | namespace
 15 | {
 16 |     using namespace tue;
 17 | 
 18 |     TEST_CASE(sin)
 19 |     {
 20 |         test_assert(nearly_equal(math::sin(1.2), std::sin(1.2)));
 21 |     }
 22 | 
 23 |     TEST_CASE(cos)
 24 |     {
 25 |         test_assert(nearly_equal(math::cos(1.2), std::cos(1.2)));
 26 |     }
 27 | 
 28 |     TEST_CASE(sincos)
 29 |     {
 30 |         double s, c;
 31 |         math::sincos(1.2, s, c);
 32 |         test_assert(nearly_equal(s, std::sin(1.2)));
 33 |         test_assert(nearly_equal(c, std::cos(1.2)));
 34 |     }
 35 | 
 36 |     TEST_CASE(exp)
 37 |     {
 38 |         test_assert(nearly_equal(math::exp(1.2), std::exp(1.2)));
 39 |     }
 40 | 
 41 |     TEST_CASE(log)
 42 |     {
 43 |         test_assert(nearly_equal(math::log(1.2), std::log(1.2)));
 44 |     }
 45 | 
 46 |     TEST_CASE(abs)
 47 |     {
 48 |         test_assert(math::abs(1.2) == 1.2);
 49 |         test_assert(math::abs(-1.2) == 1.2);
 50 |         test_assert(math::abs(12) == 12);
 51 |         test_assert(math::abs(-12) == 12);
 52 |         test_assert(math::abs(12u) == 12u);
 53 |     }
 54 | 
 55 |     TEST_CASE(pow)
 56 |     {
 57 |         test_assert(nearly_equal(math::pow(1.2, 3.4), std::pow(1.2, 3.4)));
 58 |     }
 59 | 
 60 |     TEST_CASE(recip)
 61 |     {
 62 |         test_assert(nearly_equal(math::recip(1.2), 1 / 1.2));
 63 |     }
 64 | 
 65 |     TEST_CASE(sqrt)
 66 |     {
 67 |         test_assert(nearly_equal(math::sqrt(1.2), std::sqrt(1.2)));
 68 |     }
 69 | 
 70 |     TEST_CASE(rsqrt)
 71 |     {
 72 |         test_assert(nearly_equal(math::rsqrt(1.2), 1 / std::sqrt(1.2)));
 73 |     }
 74 | 
 75 |     TEST_CASE(min)
 76 |     {
 77 |         test_assert(math::min(1.2, 3.4) == 1.2);
 78 |         test_assert(math::min(1.2, -3.4) == -3.4);
 79 | 
 80 |         test_assert(math::min(12, 34) == 12);
 81 |         test_assert(math::min(12, -34) == -34);
 82 |     }
 83 | 
 84 |     TEST_CASE(max)
 85 |     {
 86 |         test_assert(math::max(1.2, 3.4) == 3.4);
 87 |         test_assert(math::max(1.2, -3.4) == 1.2);
 88 | 
 89 |         test_assert(math::max(12, 34) == 34);
 90 |         test_assert(math::max(12, -34) == 12);
 91 |     }
 92 | 
 93 |     TEST_CASE(mask)
 94 |     {
 95 |         test_assert(math::mask(true64, 1.2) == 1.2);
 96 |         test_assert(math::mask(false64, 1.2) == 0.0);
 97 | 
 98 |         test_assert(math::mask(true32, 1) == 1);
 99 |         test_assert(math::mask(false32, 1) == 0);
100 | 
101 |         const auto t = math::mask(
102 |             true16, static_cast<bool16>(1));
103 |         const auto f = math::mask(
104 |             false16, static_cast<bool16>(1));
105 |         test_assert(t == static_cast<bool16>(1));
106 |         test_assert(f == static_cast<bool16>(0));
107 |     }
108 | 
109 |     TEST_CASE(select)
110 |     {
111 |         test_assert(math::select(true64, 1.2, 3.4) == 1.2);
112 |         test_assert(math::select(false64, 1.2, 3.4) == 3.4);
113 | 
114 |         test_assert(math::select(true32, 1, 2) == 1);
115 |         test_assert(math::select(false32, 1, 2) == 2);
116 | 
117 |         const auto t = math::select(
118 |             true16, static_cast<bool16>(1), static_cast<bool16>(2));
119 |         const auto f = math::select(
120 |             false16, static_cast<bool16>(1), static_cast<bool16>(2));
121 |         test_assert(t == static_cast<bool16>(1));
122 |         test_assert(f == static_cast<bool16>(2));
123 |     }
124 | 
125 |     TEST_CASE(less)
126 |     {
127 |         test_assert(math::less(1, 2) == true32);
128 |         test_assert(math::less(2, 2) == false32);
129 |         test_assert(math::less(3, 2) == false32);
130 |     }
131 | 
132 |     TEST_CASE(less_equal)
133 |     {
134 |         test_assert(math::less_equal(1, 2) == true32);
135 |         test_assert(math::less_equal(2, 2) == true32);
136 |         test_assert(math::less_equal(3, 2) == false32);
137 |     }
138 | 
139 |     TEST_CASE(greater)
140 |     {
141 |         test_assert(math::greater(1, 2) == false32);
142 |         test_assert(math::greater(2, 2) == false32);
143 |         test_assert(math::greater(3, 2) == true32);
144 |     }
145 | 
146 |     TEST_CASE(greater_equal)
147 |     {
148 |         test_assert(math::greater_equal(1, 2) == false32);
149 |         test_assert(math::greater_equal(2, 2) == true32);
150 |         test_assert(math::greater_equal(3, 2) == true32);
151 |     }
152 | 
153 |     TEST_CASE(equal)
154 |     {
155 |         test_assert(math::equal(1, 2) == false32);
156 |         test_assert(math::equal(2, 2) == true32);
157 |         test_assert(math::equal(3, 2) == false32);
158 | 
159 |         test_assert(math::equal(true32, true32) == true32);
160 |         test_assert(math::equal(true32, false32) == false32);
161 |         test_assert(math::equal(false32, true32) == false32);
162 |         test_assert(math::equal(false32, false32) == true32);
163 |     }
164 | 
165 |     TEST_CASE(not_equal)
166 |     {
167 |         test_assert(math::not_equal(1, 2) == true32);
168 |         test_assert(math::not_equal(2, 2) == false32);
169 |         test_assert(math::not_equal(3, 2) == true32);
170 | 
171 |         test_assert(math::not_equal(true32, true32) == false32);
172 |         test_assert(math::not_equal(true32, false32) == true32);
173 |         test_assert(math::not_equal(false32, true32) == true32);
174 |         test_assert(math::not_equal(false32, false32) == false32);
175 |     }
176 | }
177 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | The Tuesday C++ Vector Math and SIMD Library
  2 | ============================================
  3 | The Tuesday C++ Vector Math and SIMD Library is a library of template classes
  4 | and math functions with a focus on physics and graphics applications. It
  5 | provides data types commonly used in games and other simulations such as
  6 | vectors, quaternions, and matrices, SIMD intrinsic wrapper classes completely
  7 | separate from (but compatible with) the other types, operator overloads for
  8 | combining and manipulating all these types, as well as some other common
  9 | mathematical functions. It was written to match the style of the C++ Standard
 10 | Library and uses modern C++ features (i.e., C++14) extensively.
 11 | 
 12 | Major Features
 13 | --------------
 14 | Tuesday provides the following unique features over other similar libraries such
 15 | as [GLM](https://github.com/g-truc/glm):
 16 | 
 17 | - The dimensions of vector and matrix types are template parameters,
 18 |   unlike GLM where, e.g., `tvec2`, `tvec3`, and `tvec4` are separate types. By
 19 |   making the dimensions template parameters, it's possible to write one template
 20 |   function that can operate on and/or produce vectors or matrices of multiple
 21 |   dimensions. For example, the transformation matrix generation functions
 22 |   (`translation_mat`, `rotation_mat`, etc.) can produce matrices of multiple
 23 |   sizes so long as they meet the minimum requirements of each transformation and
 24 |   are, at the largest, 4x4.
 25 | 
 26 | - It makes heavy use of `decltype` in return types. This makes it possible for
 27 |   composite types to behave much more like their component types when it comes
 28 |   to things like implicit type conversions. For example, `fvec3 + dvec3` results
 29 |   in a `dvec3` just as `float + double` results in a `double`.
 30 | 
 31 | - It uses `constexpr` whenever possible which, as it turns out, is often.
 32 | 
 33 | - SIMD types are completely separate from vector types. This may seem
 34 |   counter-intuitive, but SIMD vectors aren't very efficient when used as
 35 |   traditional 3D vectors. The fourth component of an SIMD vector would often go
 36 |   to waste, and functions where multiple components interact (such as the
 37 |   `length` function, `dot` product, or `cross` product) would be horribly
 38 |   inefficient with SIMD intrinsics. Instead, SIMD instructions should be used to
 39 |   perform the same logic on multiple vectors in parallel. Tuesday is designed
 40 |   for this use case. For example, `vec3<float32x4> v` could be thought of as 4
 41 |   parallel 3D vectors (4 x-values, followed by 4 y-values, and finally 4
 42 |   z-values). Something like `math::dot(v)` would then compute a single
 43 |   `float32x4` containing the dot products of those 4 parallel vectors without
 44 |   any inefficient component shuffling. See
 45 |   [this answer](http://stackoverflow.com/a/11620369/1195206) to a naive question
 46 |   I asked on Stack Overflow a few years back for some more rationale.
 47 | 
 48 | - The SIMD system supports a huge number of types. You can create 2, 4, 8, 16,
 49 |   32, and 64-component vectors of all the major arithmetic types (`float`,
 50 |   `double`, `int8_t`, `int16_t`, `int32_t`, `int64_t`, `uint8_t`, `uint16_t`,
 51 |   `uint32_t`, and `uint64_t`) along with sized boolean types (`bool8`, `bool16`,
 52 |   `bool32` and `bool64`). If SIMD-intrinsic acceleration isn't available for a
 53 |   particular type, there's a standard C++-compliant fallback. If a vector has
 54 |   too many components for acceleration, but a smaller vector with the same
 55 |   component type can be accelerated, then the larger vector is simply the
 56 |   composite of two smaller vectors. For example, if `float32x4` is accelerated
 57 |   but `float32x8` isn't, then `float32x8` will at least be partially-accelerated
 58 |   in that it's made of two `float32x4`'s.
 59 | 
 60 | Requirements
 61 | ------------
 62 | Tuesday requires Visual Studio 2015 or a fully C++14 compliant compiler such as
 63 | GCC 5 or Clang 3.4.
 64 | 
 65 | Usage
 66 | -----
 67 | Tuesday is a header-only library. Simply make sure the `include` directory in
 68 | the root of this project is on your include path. For GCC and Clang, you might
 69 | have to provide the compiler option `-std=c++14` or higher as well.
 70 | 
 71 | Here's a small usage example:
 72 | ~~~{.cpp}
 73 | #include <tue/mat.hpp>
 74 | #include <tue/quat.hpp>
 75 | #include <tue/simd.hpp>
 76 | #include <tue/transform.hpp>
 77 | #include <tue/vec.hpp>
 78 | 
 79 | using namespace tue;
 80 | 
 81 | void UpdatePose(
 82 |     fvec3& translation,
 83 |     fquat& rotation,
 84 |     fmat3x4& matrix,
 85 |     const fvec3& linearVelocity,
 86 |     const fvec3& angularVelocity,
 87 |     float deltaTime)
 88 | {
 89 |     translation += linearVelocity * deltaTime;
 90 |     rotation *= transform::rotation_quat(angularVelocity * deltaTime);
 91 |     matrix = transform::rotation_mat<float, 4, 4>(rotation)
 92 |         * transform::translation_mat<float, 3, 4>(translation);
 93 | }
 94 | 
 95 | void SimdUpdatePoses(
 96 |     vec3<float32x4>& translations,
 97 |     quat<float32x4>& rotations,
 98 |     mat3x4<float32x4>& matrices,
 99 |     const vec3<float32x4>& linearVelocities,
100 |     const vec3<float32x4>& angularVelocities,
101 |     float deltaTime)
102 | {
103 |     const float32x4 deltaTimes(deltaTime);
104 |     translations += linearVelocities * deltaTimes;
105 |     rotations *= transform::rotation_quat(angularVelocities * deltaTimes);
106 |     matrices = transform::rotation_mat<float32x4, 4, 4>(rotations)
107 |         * transform::translation_mat<float32x4, 3, 4>(translations);
108 | }
109 | ~~~
110 | 
111 | License
112 | -------
113 | Copyright Jo Bates 2015.
114 | 
115 | Distributed under the Boost Software License, Version 1.0.
116 | 
117 | See accompanying file [LICENSE_1_0.txt](LICENSE_1_0.txt) or copy at
118 | http://www.boost.org/LICENSE_1_0.txt.
119 | 
120 | Bug Reporting
121 | -------------
122 | Please report any bugs, typos, or suggestions to
123 | https://github.com/Cincinesh/tue/issues.
124 | 


--------------------------------------------------------------------------------
/tests/sized_bool.tests.cpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #include <tue/sized_bool.hpp>
 10 | #include "tue.tests.hpp"
 11 | 
 12 | #include <type_traits>
 13 | 
 14 | namespace
 15 | {
 16 |     using namespace tue;
 17 | 
 18 |     TEST_CASE(bool8)
 19 |     {
 20 |         test_assert(sizeof(bool8) == 1);
 21 |         test_assert(sizeof(true8) == 1);
 22 |         test_assert(sizeof(false8) == 1);
 23 |         test_assert(true8 == static_cast<bool8>(~0LL));
 24 |         test_assert(false8 == static_cast<bool8>(0LL));
 25 |     }
 26 | 
 27 |     TEST_CASE(bool16)
 28 |     {
 29 |         test_assert(sizeof(bool16) == 2);
 30 |         test_assert(sizeof(true16) == 2);
 31 |         test_assert(sizeof(false16) == 2);
 32 |         test_assert(true16 == static_cast<bool16>(~0LL));
 33 |         test_assert(false16 == static_cast<bool16>(0LL));
 34 |     }
 35 | 
 36 |     TEST_CASE(bool32)
 37 |     {
 38 |         test_assert(sizeof(bool32) == 4);
 39 |         test_assert(sizeof(true32) == 4);
 40 |         test_assert(sizeof(false32) == 4);
 41 |         test_assert(true32 == static_cast<bool32>(~0LL));
 42 |         test_assert(false32 == static_cast<bool32>(0LL));
 43 |     }
 44 | 
 45 |     TEST_CASE(bool64)
 46 |     {
 47 |         test_assert(sizeof(bool64) == 8);
 48 |         test_assert(sizeof(true64) == 8);
 49 |         test_assert(sizeof(false64) == 8);
 50 |         test_assert(true64 == static_cast<bool64>(~0LL));
 51 |         test_assert(false64 == static_cast<bool64>(0LL));
 52 |     }
 53 | 
 54 |     TEST_CASE(implicit_cast_to_bool)
 55 |     {
 56 |         if (!true8 || !true16 || !true32 || !true64
 57 |             || false8 || false16 || false32 || false64)
 58 |         {
 59 |             test_fail(
 60 |                 "Sized bool types didn't implicitly cast to bool as expected");
 61 |         }
 62 |     }
 63 | 
 64 |     TEST_CASE(sized_bool_t)
 65 |     {
 66 |         test_assert((std::is_same<sized_bool_t<1>, bool8>::value));
 67 |         test_assert((std::is_same<sized_bool_t<2>, bool16>::value));
 68 |         test_assert((std::is_same<sized_bool_t<4>, bool32>::value));
 69 |         test_assert((std::is_same<sized_bool_t<8>, bool64>::value));
 70 |     }
 71 | 
 72 |     TEST_CASE(is_sized_bool)
 73 |     {
 74 |         test_assert(is_sized_bool<bool>::value == false);
 75 |         test_assert(is_sized_bool<bool8>::value == true);
 76 |         test_assert(is_sized_bool<bool16>::value == true);
 77 |         test_assert(is_sized_bool<bool32>::value == true);
 78 |         test_assert(is_sized_bool<bool64>::value == true);
 79 |     }
 80 | 
 81 |     TEST_CASE(bitwise_not_operator)
 82 |     {
 83 |         constexpr auto nt = ~true32;
 84 |         constexpr auto nf = ~false32;
 85 |         test_assert(nt == false32);
 86 |         test_assert(nf == true32);
 87 |     }
 88 | 
 89 |     TEST_CASE(bitwise_and_operator)
 90 |     {
 91 |         constexpr auto tt =  true32 &  true32;
 92 |         constexpr auto tf =  true32 & false32;
 93 |         constexpr auto ft = false32 &  true32;
 94 |         constexpr auto ff = false32 & false32;
 95 |         test_assert(tt == true32);
 96 |         test_assert(tf == false32);
 97 |         test_assert(ft == false32);
 98 |         test_assert(ff == false32);
 99 |     }
100 | 
101 |     TEST_CASE(bitwise_or_operator)
102 |     {
103 |         constexpr auto tt =  true32 |  true32;
104 |         constexpr auto tf =  true32 | false32;
105 |         constexpr auto ft = false32 |  true32;
106 |         constexpr auto ff = false32 | false32;
107 |         test_assert(tt == true32);
108 |         test_assert(tf == true32);
109 |         test_assert(ft == true32);
110 |         test_assert(ff == false32);
111 |     }
112 | 
113 |     TEST_CASE(bitwise_xor_operator)
114 |     {
115 |         constexpr auto tt =  true32 ^  true32;
116 |         constexpr auto tf =  true32 ^ false32;
117 |         constexpr auto ft = false32 ^  true32;
118 |         constexpr auto ff = false32 ^ false32;
119 |         test_assert(tt == false32);
120 |         test_assert(tf == true32);
121 |         test_assert(ft == true32);
122 |         test_assert(ff == false32);
123 |     }
124 | 
125 |     TEST_CASE(bitwise_and_assignment_operator)
126 |     {
127 |         auto tt = true32;
128 |         test_assert(&(tt &= true32) == &tt);
129 |         test_assert(tt == true32);
130 | 
131 |         auto tf = true32;
132 |         test_assert(&(tf &= false32) == &tf);
133 |         test_assert(tf == false32);
134 | 
135 |         auto ft = false32;
136 |         test_assert(&(ft &= true32) == &ft);
137 |         test_assert(ft == false32);
138 | 
139 |         auto ff = false32;
140 |         test_assert(&(ff &= false32) == &ff);
141 |         test_assert(ff == false32);
142 |     }
143 | 
144 |     TEST_CASE(bitwise_or_assignment_operator)
145 |     {
146 |         auto tt = true32;
147 |         test_assert(&(tt |= true32) == &tt);
148 |         test_assert(tt == true32);
149 | 
150 |         auto tf = true32;
151 |         test_assert(&(tf |= false32) == &tf);
152 |         test_assert(tf == true32);
153 | 
154 |         auto ft = false32;
155 |         test_assert(&(ft |= true32) == &ft);
156 |         test_assert(ft == true32);
157 | 
158 |         auto ff = false32;
159 |         test_assert(&(ff |= false32) == &ff);
160 |         test_assert(ff == false32);
161 |     }
162 | 
163 |     TEST_CASE(bitwise_xor_assignment_operator)
164 |     {
165 |         auto tt = true32;
166 |         test_assert(&(tt ^= true32) == &tt);
167 |         test_assert(tt == false32);
168 | 
169 |         auto tf = true32;
170 |         test_assert(&(tf ^= false32) == &tf);
171 |         test_assert(tf == true32);
172 | 
173 |         auto ft = false32;
174 |         test_assert(&(ft ^= true32) == &ft);
175 |         test_assert(ft == true32);
176 | 
177 |         auto ff = false32;
178 |         test_assert(&(ff ^= false32) == &ff);
179 |         test_assert(ff == false32);
180 |     }
181 | }
182 | 


--------------------------------------------------------------------------------
/docs/DoxygenLayout.xml:
--------------------------------------------------------------------------------
  1 | <doxygenlayout version="1.0">
  2 |   <!-- Generated by doxygen 1.8.10 -->
  3 |   <!-- Navigation index tabs for HTML output -->
  4 |   <navindex>
  5 |     <tab type="mainpage" visible="yes" title=""/>
  6 |     <tab type="pages" visible="yes" title="" intro=""/>
  7 |     <tab type="modules" visible="yes" title="" intro=""/>
  8 |     <tab type="namespaces" visible="yes" title="">
  9 |       <tab type="namespacelist" visible="yes" title="" intro=""/>
 10 |       <tab type="namespacemembers" visible="yes" title="" intro=""/>
 11 |     </tab>
 12 |     <tab type="classes" visible="yes" title="">
 13 |       <tab type="classlist" visible="yes" title="" intro=""/>
 14 |       <tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/> 
 15 |       <tab type="hierarchy" visible="yes" title="" intro=""/>
 16 |       <tab type="classmembers" visible="yes" title="" intro=""/>
 17 |     </tab>
 18 |     <tab type="files" visible="yes" title="">
 19 |       <tab type="filelist" visible="yes" title="" intro=""/>
 20 |       <tab type="globals" visible="yes" title="" intro=""/>
 21 |     </tab>
 22 |     <tab type="examples" visible="yes" title="" intro=""/>  
 23 |   </navindex>
 24 | 
 25 |   <!-- Layout definition for a class page -->
 26 |   <class>
 27 |     <briefdescription visible="yes"/>
 28 |     <includes visible="$SHOW_INCLUDE_FILES"/>
 29 |     <inheritancegraph visible="$CLASS_GRAPH"/>
 30 |     <collaborationgraph visible="$COLLABORATION_GRAPH"/>
 31 |     <memberdecl>
 32 |       <friends title=""/>
 33 |       <nestedclasses visible="yes" title=""/>
 34 |       <publictypes title=""/>
 35 |       <protectedtypes title=""/>
 36 |       <privatetypes title=""/>
 37 |       <publicstaticattributes title=""/>
 38 |       <protectedstaticattributes title=""/>
 39 |       <privatestaticattributes title=""/>
 40 |       <publicattributes title=""/>
 41 |       <protectedattributes title=""/>
 42 |       <privateattributes title=""/>
 43 |       <membergroups visible="yes"/>
 44 |       <publicmethods title=""/>
 45 |       <publicstaticmethods title=""/>
 46 |       <protectedmethods title=""/>
 47 |       <protectedstaticmethods title=""/>
 48 |       <privatemethods title=""/>
 49 |       <privatestaticmethods title=""/>
 50 |       <related title="" subtitle=""/>
 51 |     </memberdecl>
 52 |     <detaileddescription title=""/>
 53 |     <memberdef>
 54 |       <inlineclasses title=""/>
 55 |       <typedefs title=""/>
 56 |       <enums title=""/>
 57 |       <variables title=""/>
 58 |       <constructors title=""/>
 59 |       <functions title=""/>
 60 |       <related title=""/>
 61 |     </memberdef>
 62 |     <allmemberslink visible="yes"/>
 63 |     <usedfiles visible="$SHOW_USED_FILES"/>
 64 |     <authorsection visible="yes"/>
 65 |   </class>
 66 | 
 67 |   <!-- Layout definition for a namespace page -->
 68 |   <namespace>
 69 |     <briefdescription visible="yes"/>
 70 |     <memberdecl>
 71 |       <nestednamespaces visible="yes" title=""/>
 72 |       <constantgroups visible="yes" title=""/>
 73 |       <classes visible="yes" title=""/>
 74 |       <typedefs title=""/>
 75 |       <enums title=""/>
 76 |       <functions title=""/>
 77 |       <variables title=""/>
 78 |       <membergroups visible="yes"/>
 79 |     </memberdecl>
 80 |     <detaileddescription title=""/>
 81 |     <memberdef>
 82 |       <inlineclasses title=""/>
 83 |       <typedefs title=""/>
 84 |       <enums title=""/>
 85 |       <functions title=""/>
 86 |       <variables title=""/>
 87 |     </memberdef>
 88 |     <authorsection visible="yes"/>
 89 |   </namespace>
 90 | 
 91 |   <!-- Layout definition for a file page -->
 92 |   <file>
 93 |     <briefdescription visible="yes"/>
 94 |     <includes visible="$SHOW_INCLUDE_FILES"/>
 95 |     <includegraph visible="$INCLUDE_GRAPH"/>
 96 |     <includedbygraph visible="$INCLUDED_BY_GRAPH"/>
 97 |     <sourcelink visible="yes"/>
 98 |     <memberdecl>
 99 |       <classes visible="yes" title=""/>
100 |       <namespaces visible="yes" title=""/>
101 |       <constantgroups visible="yes" title=""/>
102 |       <defines title=""/>
103 |       <typedefs title=""/>
104 |       <enums title=""/>
105 |       <functions title=""/>
106 |       <variables title=""/>
107 |       <membergroups visible="yes"/>
108 |     </memberdecl>
109 |     <detaileddescription title=""/>
110 |     <memberdef>
111 |       <inlineclasses title=""/>
112 |       <defines title=""/>
113 |       <typedefs title=""/>
114 |       <enums title=""/>
115 |       <functions title=""/>
116 |       <variables title=""/>
117 |     </memberdef>
118 |     <authorsection/>
119 |   </file>
120 | 
121 |   <!-- Layout definition for a group page -->
122 |   <group>
123 |     <detaileddescription title=""/>
124 |     <groupgraph visible="$GROUP_GRAPHS"/>
125 |     <memberdecl>
126 |       <nestedgroups visible="yes" title=""/>
127 |       <dirs visible="yes" title=""/>
128 |       <files visible="yes" title=""/>
129 |       <namespaces visible="yes" title=""/>
130 |       <classes visible="yes" title=""/>
131 |       <defines title=""/>
132 |       <typedefs title=""/>
133 |       <enums title=""/>
134 |       <enumvalues title=""/>
135 |       <functions title=""/>
136 |       <variables title=""/>
137 |       <signals title=""/>
138 |       <publicslots title=""/>
139 |       <protectedslots title=""/>
140 |       <privateslots title=""/>
141 |       <events title=""/>
142 |       <properties title=""/>
143 |       <friends title=""/>
144 |       <membergroups visible="yes"/>
145 |     </memberdecl>
146 |     <memberdef>
147 |       <pagedocs/>
148 |       <inlineclasses title=""/>
149 |       <defines title=""/>
150 |       <typedefs title=""/>
151 |       <enums title=""/>
152 |       <enumvalues title=""/>
153 |       <functions title=""/>
154 |       <variables title=""/>
155 |       <signals title=""/>
156 |       <publicslots title=""/>
157 |       <protectedslots title=""/>
158 |       <privateslots title=""/>
159 |       <events title=""/>
160 |       <properties title=""/>
161 |       <friends title=""/>
162 |     </memberdef>
163 |     <authorsection visible="yes"/>
164 |   </group>
165 | 
166 |   <!-- Layout definition for a directory page -->
167 |   <directory>
168 |     <briefdescription visible="yes"/>
169 |     <directorygraph visible="yes"/>
170 |     <memberdecl>
171 |       <dirs visible="yes"/>
172 |       <files visible="yes"/>
173 |     </memberdecl>
174 |     <detaileddescription title=""/>
175 |   </directory>
176 | </doxygenlayout>
177 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | The Tuesday C++ Vector Math and SIMD Library
  2 | ============================================
  3 | The Tuesday C++ Vector Math and SIMD Library is a library of template classes
  4 | and math functions with a focus on physics and graphics applications. It
  5 | provides data types commonly used in games and other simulations such as
  6 | vectors, quaternions, and matrices, SIMD intrinsic wrapper classes completely
  7 | separate from (but compatible with) the other types, operator overloads for
  8 | combining and manipulating all these types, as well as some other common
  9 | mathematical functions. It was written to match the style of the C++ Standard
 10 | Library and uses modern C++ features (i.e., C++14) extensively.
 11 | 
 12 | Major Features
 13 | --------------
 14 | Tuesday provides the following unique features over other similar libraries such
 15 | as [GLM](https://github.com/g-truc/glm):
 16 | 
 17 | - The dimensions of vector and matrix types are template parameters,
 18 |   unlike GLM where, e.g., `tvec2`, `tvec3`, and `tvec4` are separate types. By
 19 |   making the dimensions template parameters, it's possible to write one template
 20 |   function that can operate on and/or produce vectors or matrices of multiple
 21 |   dimensions. For example, the transformation matrix generation functions
 22 |   (`translation_mat`, `rotation_mat`, etc.) can produce matrices of multiple
 23 |   sizes so long as they meet the minimum requirements of each transformation and
 24 |   are, at the largest, 4x4.
 25 | 
 26 | - It makes heavy use of `decltype` in return types. This makes it possible for
 27 |   composite types to behave much more like their component types when it comes
 28 |   to things like implicit type conversions. For example, `fvec3 + dvec3` results
 29 |   in a `dvec3` just as `float + double` results in a `double`.
 30 | 
 31 | - It uses `constexpr` whenever possible which, as it turns out, is often.
 32 | 
 33 | - SIMD types are completely separate from vector types. This may seem
 34 |   counter-intuitive, but SIMD vectors aren't very efficient when used as
 35 |   traditional 3D vectors. The fourth component of an SIMD vector would often go
 36 |   to waste, and functions where multiple components interact (such as the
 37 |   `length` function, `dot` product, or `cross` product) would be horribly
 38 |   inefficient with SIMD intrinsics. Instead, SIMD instructions should be used to
 39 |   perform the same logic on multiple vectors in parallel. Tuesday is designed
 40 |   for this use case. For example, `vec3<float32x4> v` could be thought of as 4
 41 |   parallel 3D vectors (4 x-values, followed by 4 y-values, and finally 4
 42 |   z-values). Something like `math::dot(v)` would then compute a single
 43 |   `float32x4` containing the dot products of those 4 parallel vectors without
 44 |   any inefficient component shuffling. See
 45 |   [this answer](http://stackoverflow.com/a/11620369/1195206) to a naive question
 46 |   I asked on Stack Overflow a few years back for some more rationale.
 47 | 
 48 | - The SIMD system supports a huge number of types. You can create 2, 4, 8, 16,
 49 |   32, and 64-component vectors of all the major arithmetic types (`float`,
 50 |   `double`, `int8_t`, `int16_t`, `int32_t`, `int64_t`, `uint8_t`, `uint16_t`,
 51 |   `uint32_t`, and `uint64_t`) along with sized boolean types (`bool8`, `bool16`,
 52 |   `bool32` and `bool64`). If SIMD-intrinsic acceleration isn't available for a
 53 |   particular type, there's a standard C++-compliant fallback. If a vector has
 54 |   too many components for acceleration, but a smaller vector with the same
 55 |   component type can be accelerated, then the larger vector is simply the
 56 |   composite of two smaller vectors. For example, if `float32x4` is accelerated
 57 |   but `float32x8` isn't, then `float32x8` will at least be partially-accelerated
 58 |   in that it's made of two `float32x4`'s.
 59 | 
 60 | Requirements
 61 | ------------
 62 | Tuesday requires Visual Studio 2015 or a fully C++14 compliant compiler such as
 63 | GCC 5 or Clang 3.4.
 64 | 
 65 | Usage
 66 | -----
 67 | Tuesday is a header-only library. Simply make sure the `include` directory in
 68 | the root of this project is on your include path. For GCC and Clang, you might
 69 | have to provide the compiler option `-std=c++14` or higher as well.
 70 | 
 71 | Here's a small usage example:
 72 | ~~~{.cpp}
 73 | #include <tue/mat.hpp>
 74 | #include <tue/quat.hpp>
 75 | #include <tue/simd.hpp>
 76 | #include <tue/transform.hpp>
 77 | #include <tue/vec.hpp>
 78 | 
 79 | using namespace tue;
 80 | 
 81 | void UpdatePose(
 82 |     fvec3& translation,
 83 |     fquat& rotation,
 84 |     fmat3x4& matrix,
 85 |     const fvec3& linearVelocity,
 86 |     const fvec3& angularVelocity,
 87 |     float deltaTime)
 88 | {
 89 |     translation += linearVelocity * deltaTime;
 90 |     rotation *= transform::rotation_quat(angularVelocity * deltaTime);
 91 |     matrix = transform::rotation_mat<float, 4, 4>(rotation)
 92 |         * transform::translation_mat<float, 3, 4>(translation);
 93 | }
 94 | 
 95 | void SimdUpdatePoses(
 96 |     vec3<float32x4>& translations,
 97 |     quat<float32x4>& rotations,
 98 |     mat3x4<float32x4>& matrices,
 99 |     const vec3<float32x4>& linearVelocities,
100 |     const vec3<float32x4>& angularVelocities,
101 |     float deltaTime)
102 | {
103 |     const float32x4 deltaTimes(deltaTime);
104 |     translations += linearVelocities * deltaTimes;
105 |     rotations *= transform::rotation_quat(angularVelocities * deltaTimes);
106 |     matrices = transform::rotation_mat<float32x4, 4, 4>(rotations)
107 |         * transform::translation_mat<float32x4, 3, 4>(translations);
108 | }
109 | ~~~
110 | 
111 | Documentation
112 | -------------
113 | Tuesday documentation can be generated with
114 | [Doxygen](http://www.stack.nl/~dimitri/doxygen/) using the `Doxyfile` at the
115 | root of this project. A copy is also published at:
116 | 
117 | http://cincinesh.github.io/tue/master/docs/index.html
118 | 
119 | Testing
120 | -------
121 | Here are some tips for running the unit tests:
122 | - This repository uses git submodules. After cloning, make sure to use
123 |   `git submodule init` and `git submodule update`.
124 | - This project uses a fairly simple [CMake](http://www.cmake.org/)
125 |   configuration. Use CMake to generate IDE project files or build scripts and
126 |   simply build the `check` target to run the unit tests.
127 | 
128 | License
129 | -------
130 | Copyright Jo Bates 2015.
131 | 
132 | Distributed under the Boost Software License, Version 1.0.
133 | 
134 | See accompanying file [LICENSE_1_0.txt](LICENSE_1_0.txt) or copy at
135 | http://www.boost.org/LICENSE_1_0.txt.
136 | 
137 | Bug Reporting
138 | -------------
139 | Please report any bugs, typos, or suggestions to
140 | https://github.com/Cincinesh/tue/issues.
141 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/bool64x2.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <type_traits>
 14 | 
 15 | #include "../../../simd.hpp"
 16 | #include "../../../sized_bool.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<bool64, 2>())
 22 |     simd<bool64, 2>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static bool64x2 explicit_cast(const simd<U, 2>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 bool64(s.data()[0]),
 32 |                 bool64(s.data()[1]),
 33 |             };
 34 |         }
 35 | 
 36 |     public:
 37 |         using component_type = bool64;
 38 | 
 39 |         static constexpr int component_count = 2;
 40 | 
 41 |         static constexpr bool is_accelerated = true;
 42 | 
 43 |         simd() noexcept = default;
 44 | 
 45 |         explicit simd(bool64 x) noexcept
 46 |         :
 47 |             underlying_(_mm_set1_epi64x(x))
 48 |         {
 49 |         }
 50 | 
 51 |         template<int M = 2, typename = std::enable_if_t<M == 2>>
 52 |         inline simd(
 53 |             bool64 x, bool64 y) noexcept
 54 |         :
 55 |             underlying_(_mm_set_epi64x(y, x))
 56 |         {
 57 |         }
 58 | 
 59 |         template<int M = 2, typename = std::enable_if_t<M == 4>>
 60 |         inline simd(
 61 |             bool64 x, bool64 y, bool64 z, bool64 w) noexcept;
 62 | 
 63 |         template<int M = 2, typename = std::enable_if_t<M == 8>>
 64 |         inline simd(
 65 |             bool64 s0, bool64 s1, bool64 s2, bool64 s3,
 66 |             bool64 s4, bool64 s5, bool64 s6, bool64 s7) noexcept;
 67 | 
 68 |         template<int M = 2, typename = std::enable_if_t<M == 16>>
 69 |         inline simd(
 70 |             bool64  s0, bool64  s1, bool64  s2, bool64  s3,
 71 |             bool64  s4, bool64  s5, bool64  s6, bool64  s7,
 72 |             bool64  s8, bool64  s9, bool64 s10, bool64 s11,
 73 |             bool64 s12, bool64 s13, bool64 s14, bool64 s15) noexcept;
 74 | 
 75 |         template<typename U>
 76 |         explicit simd(const simd<U, 2>& s) noexcept
 77 |         {
 78 |             *this = explicit_cast(s);
 79 |         }
 80 | 
 81 |         simd(__m128d underlying) noexcept
 82 |         :
 83 |             underlying_(_mm_castpd_si128(underlying))
 84 |         {
 85 |         }
 86 | 
 87 |         operator __m128d() const noexcept
 88 |         {
 89 |             return _mm_castsi128_pd(underlying_);
 90 |         }
 91 | 
 92 |         simd(__m128i underlying) noexcept
 93 |         :
 94 |             underlying_(underlying)
 95 |         {
 96 |         }
 97 | 
 98 |         operator __m128i() const noexcept
 99 |         {
100 |             return underlying_;
101 |         }
102 | 
103 |         static bool64x2 zero() noexcept
104 |         {
105 |             return _mm_setzero_si128();
106 |         }
107 | 
108 |         static bool64x2 load(const bool64* data) noexcept
109 |         {
110 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
111 |         }
112 | 
113 |         static bool64x2 loadu(const bool64* data) noexcept
114 |         {
115 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
116 |         }
117 | 
118 |         void store(bool64* data) const noexcept
119 |         {
120 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
121 |         }
122 | 
123 |         void storeu(bool64* data) const noexcept
124 |         {
125 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
126 |         }
127 | 
128 |         const bool64* data() const noexcept
129 |         {
130 |             return reinterpret_cast<const bool64*>(&underlying_);
131 |         }
132 | 
133 |         bool64* data() noexcept
134 |         {
135 |             return reinterpret_cast<bool64*>(&underlying_);
136 |         }
137 |     };
138 | }
139 | 
140 | namespace tue
141 | {
142 |     namespace detail_
143 |     {
144 |         inline bool64x2 bitwise_not_operator_s(
145 |             const bool64x2& s) noexcept
146 |         {
147 |             return _mm_xor_si128(s, bool64x2(true64));
148 |         }
149 | 
150 |         inline bool64x2 bitwise_and_operator_ss(
151 |             const bool64x2& lhs, const bool64x2& rhs) noexcept
152 |         {
153 |             return _mm_and_si128(lhs, rhs);
154 |         }
155 | 
156 |         inline bool64x2 bitwise_or_operator_ss(
157 |             const bool64x2& lhs, const bool64x2& rhs) noexcept
158 |         {
159 |             return _mm_or_si128(lhs, rhs);
160 |         }
161 | 
162 |         inline bool64x2 bitwise_xor_operator_ss(
163 |             const bool64x2& lhs, const bool64x2& rhs) noexcept
164 |         {
165 |             return _mm_xor_si128(lhs, rhs);
166 |         }
167 | 
168 |         inline bool64x2& bitwise_and_assignment_operator_ss(
169 |             bool64x2& lhs, const bool64x2& rhs) noexcept
170 |         {
171 |             return lhs = _mm_and_si128(lhs, rhs);
172 |         }
173 | 
174 |         inline bool64x2& bitwise_or_assignment_operator_ss(
175 |             bool64x2& lhs, const bool64x2& rhs) noexcept
176 |         {
177 |             return lhs = _mm_or_si128(lhs, rhs);
178 |         }
179 | 
180 |         inline bool64x2& bitwise_xor_assignment_operator_ss(
181 |             bool64x2& lhs, const bool64x2& rhs) noexcept
182 |         {
183 |             return lhs = _mm_xor_si128(lhs, rhs);
184 |         }
185 | 
186 |         inline bool equality_operator_ss(
187 |             bool64x2& lhs, const bool64x2& rhs) noexcept
188 |         {
189 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
190 |         }
191 | 
192 |         inline bool inequality_operator_ss(
193 |             bool64x2& lhs, const bool64x2& rhs) noexcept
194 |         {
195 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
196 |         }
197 | 
198 |         inline bool64x2 mask_ss(
199 |             const bool64x2& conditions,
200 |             const bool64x2& values) noexcept
201 |         {
202 |             return _mm_and_si128(conditions, values);
203 |         }
204 | 
205 |         inline bool64x2 select_sss(
206 |             const bool64x2& conditions,
207 |             const bool64x2& values,
208 |             const bool64x2& otherwise) noexcept
209 |         {
210 |             return _mm_or_si128(
211 |                 _mm_and_si128(conditions, values),
212 |                 _mm_andnot_si128(conditions, otherwise));
213 |         }
214 | 
215 |         inline bool64x2 equal_ss(
216 |             const bool64x2& lhs, const bool64x2& rhs) noexcept
217 |         {
218 |             const auto cmp = _mm_cmpeq_epi32(lhs, rhs);
219 |             const auto hi = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(3, 3, 1, 1));
220 |             const auto lo = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 2, 0, 0));
221 |             return _mm_and_si128(hi, lo);
222 |         }
223 | 
224 |         inline bool64x2 not_equal_ss(
225 |             const bool64x2& lhs, const bool64x2& rhs) noexcept
226 |         {
227 |             return _mm_xor_si128(equal_ss(lhs, rhs), bool64x2(true64));
228 |         }
229 |     }
230 | }
231 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse/bool32x4.sse.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <xmmintrin.h>
 12 | 
 13 | #include <type_traits>
 14 | 
 15 | #include "../../../simd.hpp"
 16 | #include "../../../sized_bool.hpp"
 17 | 
 18 | #ifdef TUE_SSE2
 19 | #include <emmintrin.h>
 20 | #endif
 21 | 
 22 | namespace tue
 23 | {
 24 |     template<>
 25 |     class alignas(tue::detail_::alignof_simd<bool32, 4>())
 26 |     simd<bool32, 4>
 27 |     {
 28 |         __m128 underlying_;
 29 | 
 30 |     private:
 31 |         template<typename U>
 32 |         static bool32x4 explicit_cast(const simd<U, 4>& s) noexcept
 33 |         {
 34 |             return {
 35 |                 bool32(s.data()[0]),
 36 |                 bool32(s.data()[1]),
 37 |                 bool32(s.data()[2]),
 38 |                 bool32(s.data()[3]),
 39 |             };
 40 |         }
 41 | 
 42 |     public:
 43 |         using component_type = bool32;
 44 | 
 45 |         static constexpr int component_count = 4;
 46 | 
 47 |         static constexpr bool is_accelerated = true;
 48 | 
 49 |         simd() noexcept = default;
 50 | 
 51 |         explicit simd(bool32 x) noexcept
 52 |         :
 53 |             underlying_(_mm_set_ps1(tue::detail_::binary_float(x)))
 54 |         {
 55 |         }
 56 | 
 57 |         template<int M = 4, typename = std::enable_if_t<M == 2>>
 58 |         inline simd(
 59 |             bool32 x, bool32 y) noexcept;
 60 | 
 61 |         template<int M = 4, typename = std::enable_if_t<M == 4>>
 62 |         inline simd(
 63 |             bool32 x, bool32 y, bool32 z, bool32 w) noexcept
 64 |         :
 65 |             underlying_(_mm_setr_ps(
 66 |                 tue::detail_::binary_float(x),
 67 |                 tue::detail_::binary_float(y),
 68 |                 tue::detail_::binary_float(z),
 69 |                 tue::detail_::binary_float(w)))
 70 |         {
 71 |         }
 72 | 
 73 |         template<int M = 4, typename = std::enable_if_t<M == 8>>
 74 |         inline simd(
 75 |             bool32 s0, bool32 s1, bool32 s2, bool32 s3,
 76 |             bool32 s4, bool32 s5, bool32 s6, bool32 s7) noexcept;
 77 | 
 78 |         template<int M = 4, typename = std::enable_if_t<M == 16>>
 79 |         inline simd(
 80 |             bool32  s0, bool32  s1, bool32  s2, bool32  s3,
 81 |             bool32  s4, bool32  s5, bool32  s6, bool32  s7,
 82 |             bool32  s8, bool32  s9, bool32 s10, bool32 s11,
 83 |             bool32 s12, bool32 s13, bool32 s14, bool32 s15) noexcept;
 84 | 
 85 |         template<typename U>
 86 |         explicit simd(const simd<U, 4>& s) noexcept
 87 |         {
 88 |             *this = explicit_cast(s);
 89 |         }
 90 | 
 91 |         simd(__m128 underlying) noexcept
 92 |         :
 93 |             underlying_(underlying)
 94 |         {
 95 |         }
 96 | 
 97 |         operator __m128() const noexcept
 98 |         {
 99 |             return underlying_;
100 |         }
101 | 
102 | #ifdef TUE_SSE2
103 |         simd(__m128i underlying) noexcept
104 |         :
105 |             underlying_(_mm_castsi128_ps(underlying))
106 |         {
107 |         }
108 | 
109 |         operator __m128i() const noexcept
110 |         {
111 |             return _mm_castps_si128(underlying_);
112 |         }
113 | #endif
114 | 
115 |         static bool32x4 zero() noexcept
116 |         {
117 |             return _mm_setzero_ps();
118 |         }
119 | 
120 |         static bool32x4 load(const bool32* data) noexcept
121 |         {
122 |             return _mm_load_ps(reinterpret_cast<const float*>(data));
123 |         }
124 | 
125 |         static bool32x4 loadu(const bool32* data) noexcept
126 |         {
127 |             return _mm_loadu_ps(reinterpret_cast<const float*>(data));
128 |         }
129 | 
130 |         void store(bool32* data) const noexcept
131 |         {
132 |             _mm_store_ps(reinterpret_cast<float*>(data), underlying_);
133 |         }
134 | 
135 |         void storeu(bool32* data) const noexcept
136 |         {
137 |             _mm_storeu_ps(reinterpret_cast<float*>(data), underlying_);
138 |         }
139 | 
140 |         const bool32* data() const noexcept
141 |         {
142 |             return reinterpret_cast<const bool32*>(&underlying_);
143 |         }
144 | 
145 |         bool32* data() noexcept
146 |         {
147 |             return reinterpret_cast<bool32*>(&underlying_);
148 |         }
149 |     };
150 | }
151 | 
152 | namespace tue
153 | {
154 |     namespace detail_
155 |     {
156 |         inline bool32x4 bitwise_not_operator_s(
157 |             const bool32x4& s) noexcept
158 |         {
159 |             return _mm_xor_ps(s, bool32x4(true32));
160 |         }
161 | 
162 |         inline bool32x4 bitwise_and_operator_ss(
163 |             const bool32x4& lhs, const bool32x4& rhs) noexcept
164 |         {
165 |             return _mm_and_ps(lhs, rhs);
166 |         }
167 | 
168 |         inline bool32x4 bitwise_or_operator_ss(
169 |             const bool32x4& lhs, const bool32x4& rhs) noexcept
170 |         {
171 |             return _mm_or_ps(lhs, rhs);
172 |         }
173 | 
174 |         inline bool32x4 bitwise_xor_operator_ss(
175 |             const bool32x4& lhs, const bool32x4& rhs) noexcept
176 |         {
177 |             return _mm_xor_ps(lhs, rhs);
178 |         }
179 | 
180 |         inline bool32x4& bitwise_and_assignment_operator_ss(
181 |             bool32x4& lhs, const bool32x4& rhs) noexcept
182 |         {
183 |             return lhs = _mm_and_ps(lhs, rhs);
184 |         }
185 | 
186 |         inline bool32x4& bitwise_or_assignment_operator_ss(
187 |             bool32x4& lhs, const bool32x4& rhs) noexcept
188 |         {
189 |             return lhs = _mm_or_ps(lhs, rhs);
190 |         }
191 | 
192 |         inline bool32x4& bitwise_xor_assignment_operator_ss(
193 |             bool32x4& lhs, const bool32x4& rhs) noexcept
194 |         {
195 |             return lhs = _mm_xor_ps(lhs, rhs);
196 |         }
197 | 
198 | #ifdef TUE_SSE2
199 |         inline bool equality_operator_ss(
200 |             bool32x4& lhs, const bool32x4& rhs) noexcept
201 |         {
202 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
203 |         }
204 | 
205 |         inline bool inequality_operator_ss(
206 |             bool32x4& lhs, const bool32x4& rhs) noexcept
207 |         {
208 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
209 |         }
210 | #endif
211 | 
212 |         inline bool32x4 mask_ss(
213 |             const bool32x4& conditions,
214 |             const bool32x4& values) noexcept
215 |         {
216 |             return _mm_and_ps(conditions, values);
217 |         }
218 | 
219 |         inline bool32x4 select_sss(
220 |             const bool32x4& conditions,
221 |             const bool32x4& values,
222 |             const bool32x4& otherwise) noexcept
223 |         {
224 |             return _mm_or_ps(
225 |                 _mm_and_ps(conditions, values),
226 |                 _mm_andnot_ps(conditions, otherwise));
227 |         }
228 | 
229 | #ifdef TUE_SSE2
230 |         inline bool32x4 equal_ss(
231 |             const bool32x4& lhs, const bool32x4& rhs) noexcept
232 |         {
233 |             return _mm_cmpeq_epi32(lhs, rhs);
234 |         }
235 | 
236 |         inline bool32x4 not_equal_ss(
237 |             const bool32x4& lhs, const bool32x4& rhs) noexcept
238 |         {
239 |             return _mm_xor_si128(_mm_cmpeq_epi32(lhs, rhs), bool32x4(true32));
240 |         }
241 | #endif
242 |     }
243 | }
244 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/bool16x8.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <type_traits>
 14 | 
 15 | #include "../../../simd.hpp"
 16 | #include "../../../sized_bool.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<bool16, 8>())
 22 |     simd<bool16, 8>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static bool16x8 explicit_cast(const simd<U, 8>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 bool16(s.data()[0]),
 32 |                 bool16(s.data()[1]),
 33 |                 bool16(s.data()[2]),
 34 |                 bool16(s.data()[3]),
 35 |                 bool16(s.data()[4]),
 36 |                 bool16(s.data()[5]),
 37 |                 bool16(s.data()[6]),
 38 |                 bool16(s.data()[7]),
 39 |             };
 40 |         }
 41 | 
 42 |         inline static bool16x8 explicit_cast(const int16x8& s) noexcept;
 43 | 
 44 |         inline static bool16x8 explicit_cast(const uint16x8& s) noexcept;
 45 | 
 46 |     public:
 47 |         using component_type = bool16;
 48 | 
 49 |         static constexpr int component_count = 8;
 50 | 
 51 |         static constexpr bool is_accelerated = true;
 52 | 
 53 |         simd() noexcept = default;
 54 | 
 55 |         explicit simd(bool16 x) noexcept
 56 |         :
 57 |             underlying_(_mm_set1_epi16(x))
 58 |         {
 59 |         }
 60 | 
 61 |         template<int M = 8, typename = std::enable_if_t<M == 2>>
 62 |         inline simd(
 63 |             bool16 x, bool16 y) noexcept;
 64 | 
 65 |         template<int M = 8, typename = std::enable_if_t<M == 4>>
 66 |         inline simd(
 67 |             bool16 x, bool16 y, bool16 z, bool16 w) noexcept;
 68 | 
 69 |         template<int M = 8, typename = std::enable_if_t<M == 8>>
 70 |         inline simd(
 71 |             bool16 s0, bool16 s1, bool16 s2, bool16 s3,
 72 |             bool16 s4, bool16 s5, bool16 s6, bool16 s7) noexcept
 73 |         :
 74 |             underlying_(_mm_setr_epi16(
 75 |                 s0, s1, s2, s3, s4, s5, s6, s7))
 76 |         {
 77 |         }
 78 | 
 79 |         template<int M = 8, typename = std::enable_if_t<M == 16>>
 80 |         inline simd(
 81 |             bool16  s0, bool16  s1, bool16  s2, bool16  s3,
 82 |             bool16  s4, bool16  s5, bool16  s6, bool16  s7,
 83 |             bool16  s8, bool16  s9, bool16 s10, bool16 s11,
 84 |             bool16 s12, bool16 s13, bool16 s14, bool16 s15) noexcept;
 85 | 
 86 |         template<typename U>
 87 |         explicit simd(const simd<U, 8>& s) noexcept
 88 |         {
 89 |             *this = explicit_cast(s);
 90 |         }
 91 | 
 92 |         simd(__m128i underlying) noexcept
 93 |         :
 94 |             underlying_(underlying)
 95 |         {
 96 |         }
 97 | 
 98 |         operator __m128i() const noexcept
 99 |         {
100 |             return underlying_;
101 |         }
102 | 
103 |         static bool16x8 zero() noexcept
104 |         {
105 |             return _mm_setzero_si128();
106 |         }
107 | 
108 |         static bool16x8 load(const bool16* data) noexcept
109 |         {
110 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
111 |         }
112 | 
113 |         static bool16x8 loadu(const bool16* data) noexcept
114 |         {
115 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
116 |         }
117 | 
118 |         void store(bool16* data) const noexcept
119 |         {
120 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
121 |         }
122 | 
123 |         void storeu(bool16* data) const noexcept
124 |         {
125 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
126 |         }
127 | 
128 |         const bool16* data() const noexcept
129 |         {
130 |             return reinterpret_cast<const bool16*>(&underlying_);
131 |         }
132 | 
133 |         bool16* data() noexcept
134 |         {
135 |             return reinterpret_cast<bool16*>(&underlying_);
136 |         }
137 |     };
138 | }
139 | 
140 | #include "int16x8.sse2.hpp"
141 | #include "uint16x8.sse2.hpp"
142 | 
143 | namespace tue
144 | {
145 |     inline bool16x8 bool16x8::explicit_cast(const int16x8& s) noexcept
146 |     {
147 |         return __m128i(s);
148 |     }
149 | 
150 |     inline bool16x8 bool16x8::explicit_cast(const uint16x8& s) noexcept
151 |     {
152 |         return __m128i(s);
153 |     }
154 | 
155 |     namespace detail_
156 |     {
157 |         inline bool16x8 bitwise_not_operator_s(
158 |             const bool16x8& s) noexcept
159 |         {
160 |             return _mm_xor_si128(s, bool16x8(true16));
161 |         }
162 | 
163 |         inline bool16x8 bitwise_and_operator_ss(
164 |             const bool16x8& lhs, const bool16x8& rhs) noexcept
165 |         {
166 |             return _mm_and_si128(lhs, rhs);
167 |         }
168 | 
169 |         inline bool16x8 bitwise_or_operator_ss(
170 |             const bool16x8& lhs, const bool16x8& rhs) noexcept
171 |         {
172 |             return _mm_or_si128(lhs, rhs);
173 |         }
174 | 
175 |         inline bool16x8 bitwise_xor_operator_ss(
176 |             const bool16x8& lhs, const bool16x8& rhs) noexcept
177 |         {
178 |             return _mm_xor_si128(lhs, rhs);
179 |         }
180 | 
181 |         inline bool16x8& bitwise_and_assignment_operator_ss(
182 |             bool16x8& lhs, const bool16x8& rhs) noexcept
183 |         {
184 |             return lhs = _mm_and_si128(lhs, rhs);
185 |         }
186 | 
187 |         inline bool16x8& bitwise_or_assignment_operator_ss(
188 |             bool16x8& lhs, const bool16x8& rhs) noexcept
189 |         {
190 |             return lhs = _mm_or_si128(lhs, rhs);
191 |         }
192 | 
193 |         inline bool16x8& bitwise_xor_assignment_operator_ss(
194 |             bool16x8& lhs, const bool16x8& rhs) noexcept
195 |         {
196 |             return lhs = _mm_xor_si128(lhs, rhs);
197 |         }
198 | 
199 |         inline bool equality_operator_ss(
200 |             bool16x8& lhs, const bool16x8& rhs) noexcept
201 |         {
202 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
203 |         }
204 | 
205 |         inline bool inequality_operator_ss(
206 |             bool16x8& lhs, const bool16x8& rhs) noexcept
207 |         {
208 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
209 |         }
210 | 
211 |         inline bool16x8 mask_ss(
212 |             const bool16x8& conditions,
213 |             const bool16x8& values) noexcept
214 |         {
215 |             return _mm_and_si128(conditions, values);
216 |         }
217 | 
218 |         inline bool16x8 select_sss(
219 |             const bool16x8& conditions,
220 |             const bool16x8& values,
221 |             const bool16x8& otherwise) noexcept
222 |         {
223 |             return _mm_or_si128(
224 |                 _mm_and_si128(conditions, values),
225 |                 _mm_andnot_si128(conditions, otherwise));
226 |         }
227 | 
228 |         inline bool16x8 equal_ss(
229 |             const bool16x8& lhs, const bool16x8& rhs) noexcept
230 |         {
231 |             return _mm_cmpeq_epi16(lhs, rhs);
232 |         }
233 | 
234 |         inline bool16x8 not_equal_ss(
235 |             const bool16x8& lhs, const bool16x8& rhs) noexcept
236 |         {
237 |             return _mm_xor_si128(_mm_cmpeq_epi16(lhs, rhs), bool16x8(true16));
238 |         }
239 |     }
240 | }
241 | 


--------------------------------------------------------------------------------
/include/tue/sized_bool.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <cstddef>
 12 | #include <cstdint>
 13 | #include <type_traits>
 14 | 
 15 | namespace tue
 16 | {
 17 |     /*!
 18 |      * \defgroup  sized_bool_hpp <tue/sized_bool.hpp>
 19 |      *
 20 |      * \brief     The sized boolean types and their associated utility structs
 21 |      *            and functions.
 22 |      * @{
 23 |      */
 24 | 
 25 |     /*!
 26 |      * \brief  An 8-bit boolean value.
 27 |      */
 28 |     enum bool8 : std::uint8_t
 29 |     {
 30 |         /*!
 31 |          * \brief  An 8-bit boolean value with all 8 bits set to 1 (true).
 32 |          */
 33 |         true8 = UINT8_C(0xFF),
 34 | 
 35 |         /*!
 36 |          * \brief  An 8-bit boolean value with all 8 bits set to 0 (false).
 37 |          */
 38 |         false8 = UINT8_C(0x00),
 39 |     };
 40 | 
 41 |     /*!
 42 |      * \brief  A 16-bit boolean value.
 43 |      */
 44 |     enum bool16 : std::uint16_t
 45 |     {
 46 |         /*!
 47 |          * \brief  A 16-bit boolean value with all 16 bits set to 1 (true).
 48 |          */
 49 |         true16 = UINT16_C(0xFFFF),
 50 | 
 51 |         /*!
 52 |          * \brief  A 16-bit boolean value with all 16 bits set to 0 (false).
 53 |          */
 54 |         false16 = UINT16_C(0x0000),
 55 |     };
 56 | 
 57 |     /*!
 58 |      * \brief  A 32-bit boolean value.
 59 |      */
 60 |     enum bool32 : std::uint32_t
 61 |     {
 62 |         /*!
 63 |          * \brief  A 32-bit boolean value with all 32 bits set to 1 (true).
 64 |          */
 65 |         true32 = UINT32_C(0xFFFFFFFF),
 66 | 
 67 |         /*!
 68 |          * \brief  A 32-bit boolean value with all 32 bits set to 0 (false).
 69 |          */
 70 |         false32 = UINT32_C(0x00000000),
 71 |     };
 72 | 
 73 |     /*!
 74 |      * \brief  A 64-bit boolean value.
 75 |      */
 76 |     enum bool64 : std::uint64_t
 77 |     {
 78 |         /*!
 79 |          * \brief  A 64-bit boolean value with all 64 bits set to 1 (true).
 80 |          */
 81 |         true64 = UINT64_C(0xFFFFFFFFFFFFFFFF),
 82 | 
 83 |         /*!
 84 |          * \brief  A 64-bit boolean value with all 64 bits set to 0 (false).
 85 |          */
 86 |         false64 = UINT64_C(0x0000000000000000),
 87 |     };
 88 | 
 89 |     /**/
 90 |     namespace detail_
 91 |     {
 92 |         template<std::size_t Size> struct sized_bool_utils;
 93 |         template<> struct sized_bool_utils<1> { using type = bool8; };
 94 |         template<> struct sized_bool_utils<2> { using type = bool16; };
 95 |         template<> struct sized_bool_utils<4> { using type = bool32; };
 96 |         template<> struct sized_bool_utils<8> { using type = bool64; };
 97 |     }
 98 | 
 99 |     /*!
100 |      * \brief        A type alias for the sized boolean type (`bool8`, `bool16`,
101 |      *               `bool32`, or `bool64`) with the given size.
102 |      *
103 |      * \tparam Size  The desired sized boolean type size (in bytes).
104 |      */
105 |     template<std::size_t Size>
106 |     using sized_bool_t = typename detail_::sized_bool_utils<Size>::type;
107 | 
108 |     /*!
109 |      * \brief     Checks if a type is a sized boolean type.
110 |      * \details   Extends `std::integral_constant<bool, true>` for the following
111 |      *            types and `std::integral_constant<bool, false>` otherwise:
112 |      *            - `tue::bool8`
113 |      *            - `tue::bool16`
114 |      *            - `tue::bool32`
115 |      *            - `tue::bool64`
116 |      *
117 |      * \tparam T  The type to check.
118 |      */
119 |     template<typename T>
120 |     struct is_sized_bool;
121 | 
122 |     /*!@}*/
123 | }
124 | 
125 | #include "detail_/is_sized_bool.hpp"
126 | 
127 | namespace tue
128 | {
129 |     /*!
130 |      * \addtogroup  sized_bool_hpp
131 |      * @{
132 |      */
133 | 
134 |     /*!
135 |      * \brief     Computes the bitwise NOT of `b`.
136 |      *
137 |      * \tparam T  The type of parameter 'b'.
138 |      *
139 |      * \param b   A sized boolean value.
140 |      *
141 |      * \return    The bitwise NOT of `b`.
142 |      */
143 |     template<typename T>
144 |     inline constexpr std::enable_if_t<is_sized_bool<T>::value, T>
145 |     operator~(T b) noexcept
146 |     {
147 |         using U = std::underlying_type_t<T>;
148 |         return T(~U(b));
149 |     }
150 | 
151 |     /*!
152 |      * \brief      Computes the bitwise AND of `lhs` and `rhs`.
153 |      *
154 |      * \tparam T   The type of parameters `lhs` and `rhs`.
155 |      *
156 |      * \param lhs  The left-hand side operand.
157 |      * \param rhs  The right-hand side operand.
158 |      *
159 |      * \return     The bitwise AND of `lhs` and `rhs`.
160 |      */
161 |     template<typename T>
162 |     inline constexpr std::enable_if_t<is_sized_bool<T>::value, T>
163 |     operator&(T lhs, T rhs) noexcept
164 |     {
165 |         using U = std::underlying_type_t<T>;
166 |         return T(U(lhs) & U(rhs));
167 |     }
168 | 
169 |     /*!
170 |      * \brief      Computes the bitwise OR of `lhs` and `rhs`.
171 |      *
172 |      * \tparam T   The type of parameters `lhs` and `rhs`.
173 |      *
174 |      * \param lhs  The left-hand side operand.
175 |      * \param rhs  The right-hand side operand.
176 |      *
177 |      * \return     The bitwise OR of `lhs` and `rhs`.
178 |      */
179 |     template<typename T>
180 |     inline constexpr std::enable_if_t<is_sized_bool<T>::value, T>
181 |     operator|(T lhs, T rhs) noexcept
182 |     {
183 |         using U = std::underlying_type_t<T>;
184 |         return T(U(lhs) | U(rhs));
185 |     }
186 | 
187 |     /*!
188 |      * \brief      Computes the bitwise XOR of `lhs` and `rhs`.
189 |      *
190 |      * \tparam T   The type of parameters `lhs` and `rhs`.
191 |      *
192 |      * \param lhs  The left-hand side operand.
193 |      * \param rhs  The right-hand side operand.
194 |      *
195 |      * \return     The bitwise XOR of `lhs` and `rhs`.
196 |      */
197 |     template<typename T>
198 |     inline constexpr std::enable_if_t<is_sized_bool<T>::value, T>
199 |     operator^(T lhs, T rhs) noexcept
200 |     {
201 |         using U = std::underlying_type_t<T>;
202 |         return T(U(lhs) ^ U(rhs));
203 |     }
204 | 
205 |     /*!
206 |      * \brief      Bitwise ANDs `lhs` with `rhs`.
207 |      *
208 |      * \tparam T   The type of parameters `lhs` and `rhs`.
209 |      *
210 |      * \param lhs  The left-hand side operand.
211 |      * \param rhs  The right-hand side operand.
212 |      *
213 |      * \return     A reference to `lhs`.
214 |      */
215 |     template<typename T>
216 |     inline std::enable_if_t<is_sized_bool<T>::value, T&>
217 |     operator&=(T& lhs, T rhs) noexcept
218 |     {
219 |         return lhs = lhs & rhs;
220 |     }
221 | 
222 |     /*!
223 |      * \brief      Bitwise ORs `lhs` with `rhs`.
224 |      *
225 |      * \tparam T   The type of parameters `lhs` and `rhs`.
226 |      *
227 |      * \param lhs  The left-hand side operand.
228 |      * \param rhs  The right-hand side operand.
229 |      *
230 |      * \return     A reference to `lhs`.
231 |      */
232 |     template<typename T>
233 |     inline std::enable_if_t<is_sized_bool<T>::value, T&>
234 |     operator|=(T& lhs, T rhs) noexcept
235 |     {
236 |         return lhs = lhs | rhs;
237 |     }
238 | 
239 |     /*!
240 |      * \brief      Bitwise XORs `lhs` with `rhs`.
241 |      *
242 |      * \tparam T   The type of parameters `lhs` and `rhs`.
243 |      *
244 |      * \param lhs  The left-hand side operand.
245 |      * \param rhs  The right-hand side operand.
246 |      *
247 |      * \return     A reference to `lhs`.
248 |      */
249 |     template<typename T>
250 |     inline std::enable_if_t<is_sized_bool<T>::value, T&>
251 |     operator^=(T& lhs, T rhs) noexcept
252 |     {
253 |         return lhs = lhs ^ rhs;
254 |     }
255 | }
256 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/bool8x16.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <type_traits>
 14 | 
 15 | #include "../../../simd.hpp"
 16 | #include "../../../sized_bool.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<bool8, 16>())
 22 |     simd<bool8, 16>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static bool8x16 explicit_cast(const simd<U, 16>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 bool8(s.data()[0]),
 32 |                 bool8(s.data()[1]),
 33 |                 bool8(s.data()[2]),
 34 |                 bool8(s.data()[3]),
 35 |                 bool8(s.data()[4]),
 36 |                 bool8(s.data()[5]),
 37 |                 bool8(s.data()[6]),
 38 |                 bool8(s.data()[7]),
 39 |                 bool8(s.data()[8]),
 40 |                 bool8(s.data()[9]),
 41 |                 bool8(s.data()[10]),
 42 |                 bool8(s.data()[11]),
 43 |                 bool8(s.data()[12]),
 44 |                 bool8(s.data()[13]),
 45 |                 bool8(s.data()[14]),
 46 |                 bool8(s.data()[15]),
 47 |             };
 48 |         }
 49 | 
 50 |         inline static bool8x16 explicit_cast(const int8x16& s) noexcept;
 51 | 
 52 |         inline static bool8x16 explicit_cast(const uint8x16& s) noexcept;
 53 | 
 54 |     public:
 55 |         using component_type = bool8;
 56 | 
 57 |         static constexpr int component_count = 16;
 58 | 
 59 |         static constexpr bool is_accelerated = true;
 60 | 
 61 |         simd() noexcept = default;
 62 | 
 63 |         explicit simd(bool8 x) noexcept
 64 |         :
 65 |             underlying_(_mm_set1_epi8(x))
 66 |         {
 67 |         }
 68 | 
 69 |         template<int M = 16, typename = std::enable_if_t<M == 2>>
 70 |         inline simd(
 71 |             bool8 x, bool8 y) noexcept;
 72 | 
 73 |         template<int M = 16, typename = std::enable_if_t<M == 4>>
 74 |         inline simd(
 75 |             bool8 x, bool8 y, bool8 z, bool8 w) noexcept;
 76 | 
 77 |         template<int M = 16, typename = std::enable_if_t<M == 8>>
 78 |         inline simd(
 79 |             bool8 s0, bool8 s1, bool8 s2, bool8 s3,
 80 |             bool8 s4, bool8 s5, bool8 s6, bool8 s7) noexcept;
 81 | 
 82 |         template<int M = 16, typename = std::enable_if_t<M == 16>>
 83 |         inline simd(
 84 |             bool8  s0, bool8  s1, bool8  s2, bool8  s3,
 85 |             bool8  s4, bool8  s5, bool8  s6, bool8  s7,
 86 |             bool8  s8, bool8  s9, bool8 s10, bool8 s11,
 87 |             bool8 s12, bool8 s13, bool8 s14, bool8 s15) noexcept
 88 |         :
 89 |             underlying_(_mm_setr_epi8(
 90 |                 s0, s1,  s2,  s3,  s4,  s5,  s6,  s7,
 91 |                 s8, s9, s10, s11, s12, s13, s14, s15))
 92 |         {
 93 |         }
 94 | 
 95 |         template<typename U>
 96 |         explicit simd(const simd<U, 16>& s) noexcept
 97 |         {
 98 |             *this = explicit_cast(s);
 99 |         }
100 | 
101 |         simd(__m128i underlying) noexcept
102 |         :
103 |             underlying_(underlying)
104 |         {
105 |         }
106 | 
107 |         operator __m128i() const noexcept
108 |         {
109 |             return underlying_;
110 |         }
111 | 
112 |         static bool8x16 zero() noexcept
113 |         {
114 |             return _mm_setzero_si128();
115 |         }
116 | 
117 |         static bool8x16 load(const bool8* data) noexcept
118 |         {
119 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
120 |         }
121 | 
122 |         static bool8x16 loadu(const bool8* data) noexcept
123 |         {
124 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
125 |         }
126 | 
127 |         void store(bool8* data) const noexcept
128 |         {
129 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
130 |         }
131 | 
132 |         void storeu(bool8* data) const noexcept
133 |         {
134 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
135 |         }
136 | 
137 |         const bool8* data() const noexcept
138 |         {
139 |             return reinterpret_cast<const bool8*>(&underlying_);
140 |         }
141 | 
142 |         bool8* data() noexcept
143 |         {
144 |             return reinterpret_cast<bool8*>(&underlying_);
145 |         }
146 |     };
147 | }
148 | 
149 | #include "int8x16.sse2.hpp"
150 | #include "uint8x16.sse2.hpp"
151 | 
152 | namespace tue
153 | {
154 |     inline bool8x16 bool8x16::explicit_cast(const int8x16& s) noexcept
155 |     {
156 |         return __m128i(s);
157 |     }
158 | 
159 |     inline bool8x16 bool8x16::explicit_cast(const uint8x16& s) noexcept
160 |     {
161 |         return __m128i(s);
162 |     }
163 | 
164 |     namespace detail_
165 |     {
166 |         inline bool8x16 bitwise_not_operator_s(
167 |             const bool8x16& s) noexcept
168 |         {
169 |             return _mm_xor_si128(s, bool8x16(true8));
170 |         }
171 | 
172 |         inline bool8x16 bitwise_and_operator_ss(
173 |             const bool8x16& lhs, const bool8x16& rhs) noexcept
174 |         {
175 |             return _mm_and_si128(lhs, rhs);
176 |         }
177 | 
178 |         inline bool8x16 bitwise_or_operator_ss(
179 |             const bool8x16& lhs, const bool8x16& rhs) noexcept
180 |         {
181 |             return _mm_or_si128(lhs, rhs);
182 |         }
183 | 
184 |         inline bool8x16 bitwise_xor_operator_ss(
185 |             const bool8x16& lhs, const bool8x16& rhs) noexcept
186 |         {
187 |             return _mm_xor_si128(lhs, rhs);
188 |         }
189 | 
190 |         inline bool8x16& bitwise_and_assignment_operator_ss(
191 |             bool8x16& lhs, const bool8x16& rhs) noexcept
192 |         {
193 |             return lhs = _mm_and_si128(lhs, rhs);
194 |         }
195 | 
196 |         inline bool8x16& bitwise_or_assignment_operator_ss(
197 |             bool8x16& lhs, const bool8x16& rhs) noexcept
198 |         {
199 |             return lhs = _mm_or_si128(lhs, rhs);
200 |         }
201 | 
202 |         inline bool8x16& bitwise_xor_assignment_operator_ss(
203 |             bool8x16& lhs, const bool8x16& rhs) noexcept
204 |         {
205 |             return lhs = _mm_xor_si128(lhs, rhs);
206 |         }
207 | 
208 |         inline bool equality_operator_ss(
209 |             bool8x16& lhs, const bool8x16& rhs) noexcept
210 |         {
211 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
212 |         }
213 | 
214 |         inline bool inequality_operator_ss(
215 |             bool8x16& lhs, const bool8x16& rhs) noexcept
216 |         {
217 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
218 |         }
219 | 
220 |         inline bool8x16 mask_ss(
221 |             const bool8x16& conditions,
222 |             const bool8x16& values) noexcept
223 |         {
224 |             return _mm_and_si128(conditions, values);
225 |         }
226 | 
227 |         inline bool8x16 select_sss(
228 |             const bool8x16& conditions,
229 |             const bool8x16& values,
230 |             const bool8x16& otherwise) noexcept
231 |         {
232 |             return _mm_or_si128(
233 |                 _mm_and_si128(conditions, values),
234 |                 _mm_andnot_si128(conditions, otherwise));
235 |         }
236 | 
237 |         inline bool8x16 equal_ss(
238 |             const bool8x16& lhs, const bool8x16& rhs) noexcept
239 |         {
240 |             return _mm_cmpeq_epi8(lhs, rhs);
241 |         }
242 | 
243 |         inline bool8x16 not_equal_ss(
244 |             const bool8x16& lhs, const bool8x16& rhs) noexcept
245 |         {
246 |             return _mm_xor_si128(_mm_cmpeq_epi8(lhs, rhs), bool8x16(true8));
247 |         }
248 |     }
249 | }
250 | 


--------------------------------------------------------------------------------
/include/tue/detail_/matmult.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <utility>
 12 | 
 13 | #include "../mat.hpp"
 14 | #include "../vec.hpp"
 15 | 
 16 | namespace tue
 17 | {
 18 |     namespace detail_
 19 |     {
 20 |         template<typename T, typename U, int R>
 21 |         inline constexpr decltype(std::declval<T>() * std::declval<U>())
 22 |         matmult_component_mv(
 23 |             const mat<T, 2, R>& lhs, const vec<U, 2>& rhs, int j) noexcept
 24 |         {
 25 |             return lhs[0][j] * rhs[0]
 26 |                  + lhs[1][j] * rhs[1];
 27 |         }
 28 | 
 29 |         template<typename T, typename U, int R>
 30 |         inline constexpr decltype(std::declval<T>() * std::declval<U>())
 31 |         matmult_component_mv(
 32 |             const mat<T, 3, R>& lhs, const vec<U, 3>& rhs, int j) noexcept
 33 |         {
 34 |             return lhs[0][j] * rhs[0]
 35 |                  + lhs[1][j] * rhs[1]
 36 |                  + lhs[2][j] * rhs[2];
 37 |         }
 38 | 
 39 |         template<typename T, typename U, int R>
 40 |         inline constexpr decltype(std::declval<T>() * std::declval<U>())
 41 |         matmult_component_mv(
 42 |             const mat<T, 4, R>& lhs, const vec<U, 4>& rhs, int j) noexcept
 43 |         {
 44 |             return lhs[0][j] * rhs[0]
 45 |                  + lhs[1][j] * rhs[1]
 46 |                  + lhs[2][j] * rhs[2]
 47 |                  + lhs[3][j] * rhs[3];
 48 |         }
 49 | 
 50 |         template<typename T, typename U, int C, int R>
 51 |         inline constexpr decltype(std::declval<T>() * std::declval<U>())
 52 |         matmult_component_mm(
 53 |             const mat<T, 2, R>& lhs, const mat<U, C, 2>& rhs,
 54 |             int i, int j) noexcept
 55 |         {
 56 |             return lhs[0][j] * rhs[i][0]
 57 |                  + lhs[1][j] * rhs[i][1];
 58 |         }
 59 | 
 60 |         template<typename T, typename U, int C, int R>
 61 |         inline constexpr decltype(std::declval<T>() * std::declval<U>())
 62 |         matmult_component_mm(
 63 |             const mat<T, 3, R>& lhs, const mat<U, C, 3>& rhs,
 64 |             int i, int j) noexcept
 65 |         {
 66 |             return lhs[0][j] * rhs[i][0]
 67 |                  + lhs[1][j] * rhs[i][1]
 68 |                  + lhs[2][j] * rhs[i][2];
 69 |         }
 70 | 
 71 |         template<typename T, typename U, int C, int R>
 72 |         inline constexpr decltype(std::declval<T>() * std::declval<U>())
 73 |         matmult_component_mm(
 74 |             const mat<T, 4, R>& lhs, const mat<U, C, 4>& rhs,
 75 |             int i, int j) noexcept
 76 |         {
 77 |             return lhs[0][j] * rhs[i][0]
 78 |                  + lhs[1][j] * rhs[i][1]
 79 |                  + lhs[2][j] * rhs[i][2]
 80 |                  + lhs[3][j] * rhs[i][3];
 81 |         }
 82 | 
 83 |         template<typename T, typename U, int C, int N>
 84 |         inline constexpr vec<decltype(std::declval<T>() * std::declval<U>()), 2>
 85 |         matmult_column_mm(
 86 |             const mat<T, N, 2>& lhs, const mat<U, C, N>& rhs, int i) noexcept
 87 |         {
 88 |             return {
 89 |                 tue::detail_::matmult_component_mm(lhs, rhs, i, 0),
 90 |                 tue::detail_::matmult_component_mm(lhs, rhs, i, 1),
 91 |             };
 92 |         }
 93 | 
 94 |         template<typename T, typename U, int C, int N>
 95 |         inline constexpr vec<decltype(std::declval<T>() * std::declval<U>()), 3>
 96 |         matmult_column_mm(
 97 |             const mat<T, N, 3>& lhs, const mat<U, C, N>& rhs, int i) noexcept
 98 |         {
 99 |             return {
100 |                 tue::detail_::matmult_component_mm(lhs, rhs, i, 0),
101 |                 tue::detail_::matmult_component_mm(lhs, rhs, i, 1),
102 |                 tue::detail_::matmult_component_mm(lhs, rhs, i, 2),
103 |             };
104 |         }
105 | 
106 |         template<typename T, typename U, int C, int N>
107 |         inline constexpr vec<decltype(std::declval<T>() * std::declval<U>()), 4>
108 |         matmult_column_mm(
109 |             const mat<T, N, 4>& lhs, const mat<U, C, N>& rhs, int i) noexcept
110 |         {
111 |             return {
112 |                 tue::detail_::matmult_component_mm(lhs, rhs, i, 0),
113 |                 tue::detail_::matmult_component_mm(lhs, rhs, i, 1),
114 |                 tue::detail_::matmult_component_mm(lhs, rhs, i, 2),
115 |                 tue::detail_::matmult_component_mm(lhs, rhs, i, 3),
116 |             };
117 |         }
118 | 
119 |         template<typename T, typename U, int N>
120 |         inline constexpr vec<decltype(std::declval<T>() * std::declval<U>()), 2>
121 |         multiplication_operator_vm(
122 |             const vec<T, N>& lhs, const mat<U, 2, N>& rhs) noexcept
123 |         {
124 |             return {
125 |                 tue::detail_::dot_vv(lhs, rhs[0]),
126 |                 tue::detail_::dot_vv(lhs, rhs[1]),
127 |             };
128 |         }
129 | 
130 |         template<typename T, typename U, int N>
131 |         inline constexpr vec<decltype(std::declval<T>() * std::declval<U>()), 3>
132 |         multiplication_operator_vm(
133 |             const vec<T, N>& lhs, const mat<U, 3, N>& rhs) noexcept
134 |         {
135 |             return {
136 |                 tue::detail_::dot_vv(lhs, rhs[0]),
137 |                 tue::detail_::dot_vv(lhs, rhs[1]),
138 |                 tue::detail_::dot_vv(lhs, rhs[2]),
139 |             };
140 |         }
141 | 
142 |         template<typename T, typename U, int N>
143 |         inline constexpr vec<decltype(std::declval<T>() * std::declval<U>()), 4>
144 |         multiplication_operator_vm(
145 |             const vec<T, N>& lhs, const mat<U, 4, N>& rhs) noexcept
146 |         {
147 |             return {
148 |                 tue::detail_::dot_vv(lhs, rhs[0]),
149 |                 tue::detail_::dot_vv(lhs, rhs[1]),
150 |                 tue::detail_::dot_vv(lhs, rhs[2]),
151 |                 tue::detail_::dot_vv(lhs, rhs[3]),
152 |             };
153 |         }
154 | 
155 |         template<typename T, typename U, int N>
156 |         inline constexpr vec<decltype(std::declval<T>() * std::declval<U>()), 2>
157 |         multiplication_operator_mv(
158 |             const mat<T, N, 2>& lhs, const vec<U, N>& rhs) noexcept
159 |         {
160 |             return {
161 |                 tue::detail_::matmult_component_mv(lhs, rhs, 0),
162 |                 tue::detail_::matmult_component_mv(lhs, rhs, 1),
163 |             };
164 |         }
165 | 
166 |         template<typename T, typename U, int N>
167 |         inline constexpr vec<decltype(std::declval<T>() * std::declval<U>()), 3>
168 |         multiplication_operator_mv(
169 |             const mat<T, N, 3>& lhs, const vec<U, N>& rhs) noexcept
170 |         {
171 |             return {
172 |                 tue::detail_::matmult_component_mv(lhs, rhs, 0),
173 |                 tue::detail_::matmult_component_mv(lhs, rhs, 1),
174 |                 tue::detail_::matmult_component_mv(lhs, rhs, 2),
175 |             };
176 |         }
177 | 
178 |         template<typename T, typename U, int N>
179 |         inline constexpr vec<decltype(std::declval<T>() * std::declval<U>()), 4>
180 |         multiplication_operator_mv(
181 |             const mat<T, N, 4>& lhs, const vec<U, N>& rhs) noexcept
182 |         {
183 |             return {
184 |                 tue::detail_::matmult_component_mv(lhs, rhs, 0),
185 |                 tue::detail_::matmult_component_mv(lhs, rhs, 1),
186 |                 tue::detail_::matmult_component_mv(lhs, rhs, 2),
187 |                 tue::detail_::matmult_component_mv(lhs, rhs, 3),
188 |             };
189 |         }
190 | 
191 |         template<typename T, typename U, int N, int R>
192 |         inline constexpr mat<decltype(
193 |             std::declval<T>() * std::declval<U>()), 2, R>
194 |         multiplication_operator_mm(
195 |             const mat<T, N, R>& lhs, const mat<U, 2, N>& rhs) noexcept
196 |         {
197 |             return {
198 |                 tue::detail_::matmult_column_mm(lhs, rhs, 0),
199 |                 tue::detail_::matmult_column_mm(lhs, rhs, 1),
200 |             };
201 |         }
202 | 
203 |         template<typename T, typename U, int N, int R>
204 |         inline constexpr mat<decltype(
205 |             std::declval<T>() * std::declval<U>()), 3, R>
206 |         multiplication_operator_mm(
207 |             const mat<T, N, R>& lhs, const mat<U, 3, N>& rhs) noexcept
208 |         {
209 |             return {
210 |                 tue::detail_::matmult_column_mm(lhs, rhs, 0),
211 |                 tue::detail_::matmult_column_mm(lhs, rhs, 1),
212 |                 tue::detail_::matmult_column_mm(lhs, rhs, 2),
213 |             };
214 |         }
215 | 
216 |         template<typename T, typename U, int N, int R>
217 |         inline constexpr mat<decltype(
218 |             std::declval<T>() * std::declval<U>()), 4, R>
219 |         multiplication_operator_mm(
220 |             const mat<T, N, R>& lhs, const mat<U, 4, N>& rhs) noexcept
221 |         {
222 |             return {
223 |                 tue::detail_::matmult_column_mm(lhs, rhs, 0),
224 |                 tue::detail_::matmult_column_mm(lhs, rhs, 1),
225 |                 tue::detail_::matmult_column_mm(lhs, rhs, 2),
226 |                 tue::detail_::matmult_column_mm(lhs, rhs, 3),
227 |             };
228 |         }
229 |     }
230 | }
231 | 


--------------------------------------------------------------------------------
/tests/quat.tests.cpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #include <tue/quat.hpp>
 10 | #include "tue.tests.hpp"
 11 | 
 12 | #include <tue/unused.hpp>
 13 | #include <tue/vec.hpp>
 14 | 
 15 | namespace
 16 | {
 17 |     using namespace tue;
 18 | 
 19 |     TEST_CASE(size)
 20 |     {
 21 |         test_assert(sizeof(quat<short>) == sizeof(short[4]));
 22 |         test_assert(sizeof(fquat) == sizeof(float[4]));
 23 |         test_assert(sizeof(dquat) == sizeof(double[4]));
 24 |     }
 25 | 
 26 |     TEST_CASE(alignment)
 27 |     {
 28 |         test_assert(alignof(quat<short>) == alignof(short[4]));
 29 |         test_assert(alignof(fquat) == alignof(float[4]));
 30 |         test_assert(alignof(dquat) <= alignof(double[4]));
 31 |     }
 32 | 
 33 |     TEST_CASE(component_type)
 34 |     {
 35 |         test_assert((
 36 |             std::is_same<typename quat<short>::component_type, short>::value));
 37 |         test_assert((
 38 |             std::is_same<typename fquat::component_type, float>::value));
 39 |         test_assert((
 40 |             std::is_same<typename dquat::component_type, double>::value));
 41 |     }
 42 | 
 43 |     TEST_CASE(component_count)
 44 |     {
 45 |         constexpr auto qs = quat<short>::component_count;
 46 |         constexpr auto fq = fquat::component_count;
 47 |         constexpr auto dq = dquat::component_count;
 48 |         test_assert(qs == 4);
 49 |         test_assert(fq == 4);
 50 |         test_assert(dq == 4);
 51 |     }
 52 | 
 53 |     TEST_CASE(default_constructor)
 54 |     {
 55 |         dquat q;
 56 |         unused(q);
 57 |     }
 58 | 
 59 |     TEST_CASE(individual_components_constructor)
 60 |     {
 61 |         CONST_OR_CONSTEXPR dquat q = { 1.2, 3.4, 5.6, 7.8 };
 62 |         test_assert(q[0] == 1.2);
 63 |         test_assert(q[1] == 3.4);
 64 |         test_assert(q[2] == 5.6);
 65 |         test_assert(q[3] == 7.8);
 66 |     }
 67 | 
 68 |     TEST_CASE(extend_vec3_constructor)
 69 |     {
 70 |         CONST_OR_CONSTEXPR dquat q = { { 1.2, 3.4, 5.6 }, 7.8 };
 71 |         test_assert(q[0] == 1.2);
 72 |         test_assert(q[1] == 3.4);
 73 |         test_assert(q[2] == 5.6);
 74 |         test_assert(q[3] == 7.8);
 75 |     }
 76 | 
 77 |     TEST_CASE(vec4_constructor)
 78 |     {
 79 |         CONST_OR_CONSTEXPR dvec4 v(1.2, 3.4, 5.6, 7.8);
 80 |         CONST_OR_CONSTEXPR dquat q(v);
 81 |         test_assert(q[0] == 1.2);
 82 |         test_assert(q[1] == 3.4);
 83 |         test_assert(q[2] == 5.6);
 84 |         test_assert(q[3] == 7.8);
 85 |     }
 86 | 
 87 |     TEST_CASE(explicit_conversion_constructor)
 88 |     {
 89 |         CONST_OR_CONSTEXPR dquat dq(1.2, 3.4, 5.6, 7.8);
 90 |         CONST_OR_CONSTEXPR fquat fq(dq);
 91 |         test_assert(fq[0] == 1.2f);
 92 |         test_assert(fq[1] == 3.4f);
 93 |         test_assert(fq[2] == 5.6f);
 94 |         test_assert(fq[3] == 7.8f);
 95 |     }
 96 | 
 97 |     TEST_CASE(implicit_conversion_operator)
 98 |     {
 99 |         CONST_OR_CONSTEXPR fquat fq(1.2f, 3.4f, 5.6f, 7.8f);
100 |         CONST_OR_CONSTEXPR dquat dq = fq;
101 |         test_assert(dq[0] == 1.2f);
102 |         test_assert(dq[1] == 3.4f);
103 |         test_assert(dq[2] == 5.6f);
104 |         test_assert(dq[3] == 7.8f);
105 |     }
106 | 
107 |     TEST_CASE(identity)
108 |     {
109 |         CONST_OR_CONSTEXPR auto q = dquat::identity();
110 |         test_assert(q == dquat(0.0, 0.0, 0.0, 1.0));
111 |     }
112 | 
113 |     TEST_CASE(subscript_operator)
114 |     {
115 |         CONST_OR_CONSTEXPR dquat ceq(1.2, 3.4, 5.6, 7.8);
116 |         CONST_OR_CONSTEXPR auto ceq0 = ceq[0];
117 |         CONST_OR_CONSTEXPR auto ceq1 = ceq[1];
118 |         CONST_OR_CONSTEXPR auto ceq2 = ceq[2];
119 |         CONST_OR_CONSTEXPR auto ceq3 = ceq[3];
120 |         test_assert(ceq0 == 1.2);
121 |         test_assert(ceq1 == 3.4);
122 |         test_assert(ceq2 == 5.6);
123 |         test_assert(ceq3 == 7.8);
124 | 
125 |         const auto cq = ceq;
126 |         const auto& cq0 = cq[0];
127 |         const auto& cq1 = cq[1];
128 |         const auto& cq2 = cq[2];
129 |         const auto& cq3 = cq[3];
130 |         test_assert(&cq0 == cq.data() + 0);
131 |         test_assert(&cq1 == cq.data() + 1);
132 |         test_assert(&cq2 == cq.data() + 2);
133 |         test_assert(&cq3 == cq.data() + 3);
134 | 
135 |         auto q = ceq;
136 |         auto& q0 = q[0];
137 |         auto& q1 = q[1];
138 |         auto& q2 = q[2];
139 |         auto& q3 = q[3];
140 |         test_assert(&q0 == q.data() + 0);
141 |         test_assert(&q1 == q.data() + 1);
142 |         test_assert(&q2 == q.data() + 2);
143 |         test_assert(&q3 == q.data() + 3);
144 |     }
145 | 
146 |     TEST_CASE(data)
147 |     {
148 |         const dquat dq(1.2, 3.4, 5.6, 7.8);
149 |         test_assert(dq.data() == static_cast<const void*>(&dq));
150 | 
151 |         dquat q;
152 |         test_assert(q.data() == static_cast<void*>(&q));
153 |     }
154 | 
155 |     TEST_CASE(x_y_z_w)
156 |     {
157 |         CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8);
158 |         CONST_OR_CONSTEXPR auto x = q.x();
159 |         CONST_OR_CONSTEXPR auto y = q.y();
160 |         CONST_OR_CONSTEXPR auto z = q.z();
161 |         CONST_OR_CONSTEXPR auto w = q.w();
162 |         test_assert(x == 1.2);
163 |         test_assert(y == 3.4);
164 |         test_assert(z == 5.6);
165 |         test_assert(w == 7.8);
166 |     }
167 | 
168 |     TEST_CASE(xyz)
169 |     {
170 |         CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8);
171 |         CONST_OR_CONSTEXPR auto xyz = q.xyz();
172 |         test_assert(xyz == dvec3(1.2, 3.4, 5.6));
173 |     }
174 | 
175 |     TEST_CASE(xyzw)
176 |     {
177 |         CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8);
178 |         CONST_OR_CONSTEXPR auto xyzw = q.xyzw();
179 |         test_assert(xyzw == dvec4(1.2, 3.4, 5.6, 7.8));
180 |     }
181 | 
182 |     TEST_CASE(set_x_y_z_w)
183 |     {
184 |         dquat q(1.2, 3.4, 5.6, 7.8);
185 |         q.set_x(9.10);
186 |         q.set_y(11.12);
187 |         q.set_z(13.14);
188 |         q.set_w(15.16);
189 |         test_assert(q[0] == 9.10);
190 |         test_assert(q[1] == 11.12);
191 |         test_assert(q[2] == 13.14);
192 |         test_assert(q[3] == 15.16);
193 |     }
194 | 
195 |     TEST_CASE(set_xyz)
196 |     {
197 |         dquat q(1.2, 3.4, 5.6, 7.8);
198 | 
199 |         q.set_xyz(9.10, 11.12, 13.14);
200 |         test_assert(q[0] == 9.10);
201 |         test_assert(q[1] == 11.12);
202 |         test_assert(q[2] == 13.14);
203 |         test_assert(q[3] == 7.8);
204 | 
205 |         q.set_xyz(dvec3(15.16, 17.18, 19.20));
206 |         test_assert(q[0] == 15.16);
207 |         test_assert(q[1] == 17.18);
208 |         test_assert(q[2] == 19.20);
209 |         test_assert(q[3] == 7.8);
210 |     }
211 | 
212 |     TEST_CASE(set_xyzw)
213 |     {
214 |         dquat q(1.2, 3.4, 5.6, 7.8);
215 | 
216 |         q.set_xyzw(9.10, 11.12, 13.14, 15.16);
217 |         test_assert(q[0] == 9.10);
218 |         test_assert(q[1] == 11.12);
219 |         test_assert(q[2] == 13.14);
220 |         test_assert(q[3] == 15.16);
221 | 
222 |         q.set_xyzw(dvec3(17.18, 19.20, 21.22), 23.24);
223 |         test_assert(q[0] == 17.18);
224 |         test_assert(q[1] == 19.20);
225 |         test_assert(q[2] == 21.22);
226 |         test_assert(q[3] == 23.24);
227 | 
228 |         q.set_xyzw(dvec4(25.26, 27.28, 29.30, 31.32));
229 |         test_assert(q[0] == 25.26);
230 |         test_assert(q[1] == 27.28);
231 |         test_assert(q[2] == 29.30);
232 |         test_assert(q[3] == 31.32);
233 |     }
234 | 
235 |     TEST_CASE(v)
236 |     {
237 |         CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8);
238 |         CONST_OR_CONSTEXPR auto v = q.v();
239 |         test_assert(v == dvec3(1.2, 3.4, 5.6));
240 |     }
241 | 
242 |     TEST_CASE(s)
243 |     {
244 |         CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8);
245 |         CONST_OR_CONSTEXPR auto s = q.s();
246 |         test_assert(s == 7.8);
247 |     }
248 | 
249 |     TEST_CASE(set_v)
250 |     {
251 |         dquat q(1.2, 3.4, 5.6, 7.8);
252 | 
253 |         q.set_v(9.10, 11.12, 13.14);
254 |         test_assert(q[0] == 9.10);
255 |         test_assert(q[1] == 11.12);
256 |         test_assert(q[2] == 13.14);
257 |         test_assert(q[3] == 7.8);
258 | 
259 |         q.set_v(dvec3(15.16, 17.18, 19.20));
260 |         test_assert(q[0] == 15.16);
261 |         test_assert(q[1] == 17.18);
262 |         test_assert(q[2] == 19.20);
263 |         test_assert(q[3] == 7.8);
264 |     }
265 | 
266 |     TEST_CASE(set_s)
267 |     {
268 |         dquat q(1.2, 3.4, 5.6, 7.8);
269 |         q.set_s(9.10);
270 |         test_assert(q[0] == 1.2);
271 |         test_assert(q[1] == 3.4);
272 |         test_assert(q[2] == 5.6);
273 |         test_assert(q[3] == 9.10);
274 |     }
275 | 
276 |     TEST_CASE(multiplication_assignment_operator)
277 |     {
278 |         const dquat q1(1.2, 3.4, 5.6, 7.8);
279 |         const fquat q2(9.10f, 11.12f, 13.14f, 15.16f);
280 | 
281 |         auto q3 = q1;
282 |         test_assert(&(q3 *= q2) == &q3);
283 |         test_assert(q3 == q1 * q2);
284 |     }
285 | 
286 |     TEST_CASE(multiplication_operator)
287 |     {
288 |         CONST_OR_CONSTEXPR dquat q1(1.2, 3.4, 5.6, 7.8);
289 |         CONST_OR_CONSTEXPR fquat q2(9.10f, 11.12f, 13.14f, 15.16f);
290 |         CONST_OR_CONSTEXPR auto q3 = q1 * q2;
291 | 
292 |         test_assert(
293 |             q3.v() ==
294 |             q2.s()*q1.v() + q1.s()*q2.v() + math::cross(q2.v(), q1.v()));
295 | 
296 |         test_assert(nearly_equal(
297 |             q3.s(),
298 |             q2.s()*q1.s() - math::dot(q2.v(), q1.v())));
299 |     }
300 | 
301 |     TEST_CASE(vec_multiplication_operator)
302 |     {
303 |         CONST_OR_CONSTEXPR dvec3 v1(1.2, 3.4, 5.6);
304 |         CONST_OR_CONSTEXPR fquat q(7.8f, 9.10f, 11.12f, 13.14f);
305 |         CONST_OR_CONSTEXPR auto v2 = v1 * q;
306 |         test_assert(v2 == (q * dquat(v1, 0.0) * dquat(-q.v(), q.s())).v());
307 |     }
308 | 
309 |     TEST_CASE(equality_operator)
310 |     {
311 |         CONST_OR_CONSTEXPR fquat q1(1.2f, 3.4f, 5.6f, 7.8f);
312 |         CONST_OR_CONSTEXPR dquat q2(1.2f, 3.4f, 5.6f, 7.8f);
313 |         CONST_OR_CONSTEXPR dquat q3(1.2f, 3.4f, 5.6f, 0.0f);
314 |         CONST_OR_CONSTEXPR dquat q4(1.2f, 3.4f, 0.0f, 7.8f);
315 |         CONST_OR_CONSTEXPR dquat q5(1.2f, 0.0f, 5.6f, 7.8f);
316 |         CONST_OR_CONSTEXPR dquat q6(0.0f, 3.4f, 5.6f, 7.8f);
317 |         CONST_OR_CONSTEXPR auto result1 = (q1 == q2);
318 |         CONST_OR_CONSTEXPR auto result2 = (q1 == q3);
319 |         CONST_OR_CONSTEXPR auto result3 = (q1 == q4);
320 |         CONST_OR_CONSTEXPR auto result4 = (q1 == q5);
321 |         CONST_OR_CONSTEXPR auto result5 = (q1 == q6);
322 |         test_assert(result1 == true);
323 |         test_assert(result2 == false);
324 |         test_assert(result3 == false);
325 |         test_assert(result4 == false);
326 |         test_assert(result5 == false);
327 |     }
328 | 
329 |     TEST_CASE(inequality_operator)
330 |     {
331 |         CONST_OR_CONSTEXPR fquat q1(1.2f, 3.4f, 5.6f, 7.8f);
332 |         CONST_OR_CONSTEXPR dquat q2(1.2f, 3.4f, 5.6f, 7.8f);
333 |         CONST_OR_CONSTEXPR dquat q3(1.2f, 3.4f, 5.6f, 0.0f);
334 |         CONST_OR_CONSTEXPR dquat q4(1.2f, 3.4f, 0.0f, 7.8f);
335 |         CONST_OR_CONSTEXPR dquat q5(1.2f, 0.0f, 5.6f, 7.8f);
336 |         CONST_OR_CONSTEXPR dquat q6(0.0f, 3.4f, 5.6f, 7.8f);
337 |         CONST_OR_CONSTEXPR auto result1 = (q1 != q2);
338 |         CONST_OR_CONSTEXPR auto result2 = (q1 != q3);
339 |         CONST_OR_CONSTEXPR auto result3 = (q1 != q4);
340 |         CONST_OR_CONSTEXPR auto result4 = (q1 != q5);
341 |         CONST_OR_CONSTEXPR auto result5 = (q1 != q6);
342 |         test_assert(result1 == false);
343 |         test_assert(result2 == true);
344 |         test_assert(result3 == true);
345 |         test_assert(result4 == true);
346 |         test_assert(result5 == true);
347 |     }
348 | 
349 |     TEST_CASE(normalize)
350 |     {
351 |         const auto q = math::normalize(dquat(1.2, 3.4, 5.6, 7.8));
352 |         test_assert(q == dquat(math::normalize(q.xyzw())));
353 |     }
354 | 
355 |     TEST_CASE(conjugate)
356 |     {
357 |         CONST_OR_CONSTEXPR auto q = math::conjugate(dquat(1.2, 3.4, 5.6, 7.8));
358 |         test_assert(q == dquat(-1.2, -3.4, -5.6, 7.8));
359 |     }
360 | }
361 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/uint64x2.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <cstdint>
 14 | #include <type_traits>
 15 | 
 16 | #include "../../../simd.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<std::uint64_t, 2>())
 22 |     simd<std::uint64_t, 2>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static uint64x2 explicit_cast(const simd<U, 2>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 std::uint64_t(s.data()[0]),
 32 |                 std::uint64_t(s.data()[1]),
 33 |             };
 34 |         }
 35 | 
 36 |         inline static uint64x2 explicit_cast(const bool64x2& s) noexcept;
 37 | 
 38 |         inline static uint64x2 explicit_cast(const int64x2& s) noexcept;
 39 | 
 40 |     public:
 41 |         using component_type = std::uint64_t;
 42 | 
 43 |         static constexpr int component_count = 2;
 44 | 
 45 |         static constexpr bool is_accelerated = true;
 46 | 
 47 |         simd() noexcept = default;
 48 | 
 49 |         explicit simd(std::uint64_t x) noexcept
 50 |         :
 51 |             underlying_(_mm_set1_epi64x(x))
 52 |         {
 53 |         }
 54 | 
 55 |         template<int M = 2, typename = std::enable_if_t<M == 2>>
 56 |         inline simd(
 57 |             std::uint64_t x, std::uint64_t y) noexcept
 58 |         :
 59 |             underlying_(_mm_set_epi64x(y, x))
 60 |         {
 61 |         }
 62 | 
 63 |         template<int M = 2, typename = std::enable_if_t<M == 2>>
 64 |         inline simd(
 65 |             std::uint64_t x, std::uint64_t y,
 66 |             std::uint64_t z, std::uint64_t w) noexcept;
 67 | 
 68 |         template<int M = 2, typename = std::enable_if_t<M == 8>>
 69 |         inline simd(
 70 |             std::uint64_t s0, std::uint64_t s1,
 71 |             std::uint64_t s2, std::uint64_t s3,
 72 |             std::uint64_t s4, std::uint64_t s5,
 73 |             std::uint64_t s6, std::uint64_t s7) noexcept;
 74 | 
 75 |         template<int M = 2, typename = std::enable_if_t<M == 16>>
 76 |         inline simd(
 77 |             std::uint64_t  s0, std::uint64_t  s1,
 78 |             std::uint64_t  s2, std::uint64_t  s3,
 79 |             std::uint64_t  s4, std::uint64_t  s5,
 80 |             std::uint64_t  s6, std::uint64_t  s7,
 81 |             std::uint64_t  s8, std::uint64_t  s9,
 82 |             std::uint64_t s10, std::uint64_t s11,
 83 |             std::uint64_t s12, std::uint64_t s13,
 84 |             std::uint64_t s14, std::uint64_t s15) noexcept;
 85 | 
 86 |         template<typename U>
 87 |         explicit simd(const simd<U, 2>& s) noexcept
 88 |         {
 89 |             *this = explicit_cast(s);
 90 |         }
 91 | 
 92 |         simd(__m128i underlying) noexcept
 93 |         :
 94 |             underlying_(underlying)
 95 |         {
 96 |         }
 97 | 
 98 |         operator __m128i() const noexcept
 99 |         {
100 |             return underlying_;
101 |         }
102 | 
103 |         static uint64x2 zero() noexcept
104 |         {
105 |             return _mm_setzero_si128();
106 |         }
107 | 
108 |         static uint64x2 load(const std::uint64_t* data) noexcept
109 |         {
110 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
111 |         }
112 | 
113 |         static uint64x2 loadu(const std::uint64_t* data) noexcept
114 |         {
115 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
116 |         }
117 | 
118 |         void store(std::uint64_t* data) const noexcept
119 |         {
120 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
121 |         }
122 | 
123 |         void storeu(std::uint64_t* data) const noexcept
124 |         {
125 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
126 |         }
127 | 
128 |         const std::uint64_t* data() const noexcept
129 |         {
130 |             return reinterpret_cast<const std::uint64_t*>(&underlying_);
131 |         }
132 | 
133 |         std::uint64_t* data() noexcept
134 |         {
135 |             return reinterpret_cast<std::uint64_t*>(&underlying_);
136 |         }
137 |     };
138 | }
139 | 
140 | #include "bool64x2.sse2.hpp"
141 | #include "int64x2.sse2.hpp"
142 | 
143 | namespace tue
144 | {
145 |     inline uint64x2 uint64x2::explicit_cast(const bool64x2& s) noexcept
146 |     {
147 |         return __m128i(s);
148 |     }
149 | 
150 |     inline uint64x2 uint64x2::explicit_cast(const int64x2& s) noexcept
151 |     {
152 |         return __m128i(s);
153 |     }
154 | 
155 |     namespace detail_
156 |     {
157 |         inline uint64x2& pre_increment_operator_s(uint64x2& s) noexcept
158 |         {
159 |             return s = _mm_add_epi64(s, uint64x2(1));
160 |         }
161 | 
162 |         inline uint64x2 post_increment_operator_s(uint64x2& s) noexcept
163 |         {
164 |             const auto result = s;
165 |             s = _mm_add_epi64(s, uint64x2(1));
166 |             return result;
167 |         }
168 | 
169 |         inline uint64x2& pre_decrement_operator_s(uint64x2& s) noexcept
170 |         {
171 |             return s = _mm_sub_epi64(s, uint64x2(1));
172 |         }
173 | 
174 |         inline uint64x2 post_decrement_operator_s(uint64x2& s) noexcept
175 |         {
176 |             const auto result = s;
177 |             s = _mm_sub_epi64(s, uint64x2(1));
178 |             return result;
179 |         }
180 | 
181 |         inline uint64x2 bitwise_not_operator_s(const uint64x2& s) noexcept
182 |         {
183 |             return _mm_xor_si128(s, uint64x2(~0ull));
184 |         }
185 | 
186 |         inline uint64x2 addition_operator_ss(
187 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
188 |         {
189 |             return _mm_add_epi64(lhs, rhs);
190 |         }
191 | 
192 |         inline uint64x2 subtraction_operator_ss(
193 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
194 |         {
195 |             return _mm_sub_epi64(lhs, rhs);
196 |         }
197 | 
198 |         /*inline uint64x2 multiplication_operator_ss(
199 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
200 |         {
201 |             // TODO
202 |         }
203 | 
204 |         inline uint64x2 division_operator_ss(
205 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
206 |         {
207 |             // TODO
208 |         }
209 | 
210 |         inline uint64x2 modulo_operator_ss(
211 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
212 |         {
213 |             // TODO
214 |         }*/
215 | 
216 |         inline uint64x2 bitwise_and_operator_ss(
217 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
218 |         {
219 |             return _mm_and_si128(lhs, rhs);
220 |         }
221 | 
222 |         inline uint64x2 bitwise_or_operator_ss(
223 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
224 |         {
225 |             return _mm_or_si128(lhs, rhs);
226 |         }
227 | 
228 |         inline uint64x2 bitwise_xor_operator_ss(
229 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
230 |         {
231 |             return _mm_xor_si128(lhs, rhs);
232 |         }
233 | 
234 |         inline uint64x2 bitwise_shift_left_operator_si(
235 |             const uint64x2& lhs, int rhs) noexcept
236 |         {
237 |             return _mm_slli_epi64(lhs, rhs);
238 |         }
239 | 
240 |         inline uint64x2 bitwise_shift_right_operator_si(
241 |             const uint64x2& lhs, int rhs) noexcept
242 |         {
243 |             return _mm_srli_epi64(lhs, rhs);
244 |         }
245 | 
246 |         inline uint64x2& addition_assignment_operator_ss(
247 |             uint64x2& lhs, const uint64x2& rhs) noexcept
248 |         {
249 |             return lhs = _mm_add_epi64(lhs, rhs);
250 |         }
251 | 
252 |         inline uint64x2& subtraction_assignment_operator_ss(
253 |             uint64x2& lhs, const uint64x2& rhs) noexcept
254 |         {
255 |             return lhs = _mm_sub_epi64(lhs, rhs);
256 |         }
257 | 
258 |         /*inline uint64x2& multiplication_assignment_operator_ss(
259 |             uint64x2& lhs, const uint64x2& rhs) noexcept
260 |         {
261 |             // TODO
262 |         }
263 | 
264 |         inline uint64x2& division_assignment_operator_ss(
265 |             uint64x2& lhs, const uint64x2& rhs) noexcept
266 |         {
267 |             // TODO
268 |         }
269 | 
270 |         inline uint64x2& modulo_assignment_operator_ss(
271 |             uint64x2& lhs, const uint64x2& rhs) noexcept
272 |         {
273 |             // TODO
274 |         }*/
275 | 
276 |         inline uint64x2& bitwise_and_assignment_operator_ss(
277 |             uint64x2& lhs, const uint64x2& rhs) noexcept
278 |         {
279 |             return lhs = _mm_and_si128(lhs, rhs);
280 |         }
281 | 
282 |         inline uint64x2& bitwise_or_assignment_operator_ss(
283 |             uint64x2& lhs, const uint64x2& rhs) noexcept
284 |         {
285 |             return lhs = _mm_or_si128(lhs, rhs);
286 |         }
287 | 
288 |         inline uint64x2& bitwise_xor_assignment_operator_ss(
289 |             uint64x2& lhs, const uint64x2& rhs) noexcept
290 |         {
291 |             return lhs = _mm_xor_si128(lhs, rhs);
292 |         }
293 | 
294 |         inline uint64x2& bitwise_shift_left_assignment_operator_si(
295 |             uint64x2& lhs, int rhs) noexcept
296 |         {
297 |             return lhs = _mm_slli_epi64(lhs, rhs);
298 |         }
299 | 
300 |         inline uint64x2& bitwise_shift_right_assignment_operator_si(
301 |             uint64x2& lhs, int rhs) noexcept
302 |         {
303 |             return lhs = _mm_srli_epi64(lhs, rhs);
304 |         }
305 | 
306 |         inline bool equality_operator_ss(
307 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
308 |         {
309 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
310 |         }
311 | 
312 |         inline bool inequality_operator_ss(
313 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
314 |         {
315 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
316 |         }
317 | 
318 |         inline uint64x2 abs_s(const uint64x2& s) noexcept
319 |         {
320 |             return s;
321 |         }
322 | 
323 |         /*inline uint64x2 min_ss(
324 |             const uint64x2& s1, const uint64x2& s2) noexcept
325 |         {
326 |             // TODO
327 |         }
328 | 
329 |         inline uint64x2 max_ss(
330 |             const uint64x2& s1, const uint64x2& s2) noexcept
331 |         {
332 |             // TODO
333 |         }*/
334 | 
335 |         inline uint64x2 mask_ss(
336 |             const bool64x2& conditions,
337 |             const uint64x2& values) noexcept
338 |         {
339 |             return _mm_and_si128(conditions, values);
340 |         }
341 | 
342 |         inline uint64x2 select_sss(
343 |             const bool64x2& conditions,
344 |             const uint64x2& values,
345 |             const uint64x2& otherwise) noexcept
346 |         {
347 |             return _mm_or_si128(
348 |                 _mm_and_si128(conditions, values),
349 |                 _mm_andnot_si128(conditions, otherwise));
350 |         }
351 | 
352 |         /*inline bool64x2 less_ss(
353 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
354 |         {
355 |             // TODO
356 |         }
357 | 
358 |         inline bool64x2 less_equal_ss(
359 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
360 |         {
361 |             // TODO
362 |         }
363 | 
364 |         inline bool64x2 greater_ss(
365 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
366 |         {
367 |             // TODO
368 |         }
369 | 
370 |         inline bool64x2 greater_equal_ss(
371 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
372 |         {
373 |             // TODO
374 |         }*/
375 | 
376 |         inline bool64x2 equal_ss(
377 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
378 |         {
379 |             const auto cmp = _mm_cmpeq_epi32(lhs, rhs);
380 |             const auto hi = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(3, 3, 1, 1));
381 |             const auto lo = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 2, 0, 0));
382 |             return _mm_and_si128(hi, lo);
383 |         }
384 | 
385 |         inline bool64x2 not_equal_ss(
386 |             const uint64x2& lhs, const uint64x2& rhs) noexcept
387 |         {
388 |             return _mm_xor_si128(equal_ss(lhs, rhs), uint64x2(~0ull));
389 |         }
390 |     }
391 | }
392 | 


--------------------------------------------------------------------------------
/tests/transform.tests.cpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #include <tue/transform.hpp>
 10 | #include "tue.tests.hpp"
 11 | 
 12 | #include <tue/math.hpp>
 13 | #include <tue/quat.hpp>
 14 | #include <tue/vec.hpp>
 15 | 
 16 | namespace
 17 | {
 18 |     using namespace tue;
 19 | 
 20 |     TEST_CASE(axis_angle_from_rotation_vec)
 21 |     {
 22 |         const auto aa1 = transform::axis_angle(1.2, 3.4, 5.6);
 23 |         test_assert(aa1.xyz() == math::normalize(dvec3(1.2, 3.4, 5.6)));
 24 |         test_assert(aa1.a() == math::length(dvec3(1.2, 3.4, 5.6)));
 25 | 
 26 |         const auto aa2 = transform::axis_angle(0.0, 0.0, 0.0);
 27 |         test_assert(aa2.xyz() == dvec3::z_axis());
 28 |         test_assert(aa2.a() == 0.0);
 29 | 
 30 |         const auto aa3 = transform::axis_angle(dvec3(1.2, 3.4, 5.6));
 31 |         test_assert(aa3 == aa1);
 32 | 
 33 |         const auto aa4 = transform::axis_angle(dvec3(0.0, 0.0, 0.0));
 34 |         test_assert(aa4 == aa2);
 35 |     }
 36 | 
 37 |     TEST_CASE(rotation_vec_from_axis_angle)
 38 |     {
 39 |         CONST_OR_CONSTEXPR auto rv1 =
 40 |             transform::rotation_vec(1.2, 3.4, 5.6, 7.8);
 41 |         test_assert(rv1 == dvec3(1.2, 3.4, 5.6) * 7.8);
 42 | 
 43 |         CONST_OR_CONSTEXPR auto rv2 =
 44 |             transform::rotation_vec(dvec3(1.2, 3.4, 5.6), 7.8);
 45 |         test_assert(rv2 == rv1);
 46 | 
 47 |         CONST_OR_CONSTEXPR auto rv3 =
 48 |             transform::rotation_vec(dvec4(1.2, 3.4, 5.6, 7.8));
 49 |         test_assert(rv3 == rv1);
 50 |     }
 51 | 
 52 |     TEST_CASE(rotation_quat_from_axis_angle)
 53 |     {
 54 |         const auto rq1 = transform::rotation_quat(dvec3(1.2, 3.4, 5.6), 7.8);
 55 |         test_assert(rq1.v() == dvec3(1.2, 3.4, 5.6) * math::sin(7.8/2));
 56 |         test_assert(rq1.s() == math::cos(7.8/2));
 57 | 
 58 |         const auto rq2 = transform::rotation_quat(1.2, 3.4, 5.6, 7.8);
 59 |         test_assert(rq2 == rq1);
 60 | 
 61 |         const auto rq3 = transform::rotation_quat(dvec4(1.2, 3.4, 5.6, 7.8));
 62 |         test_assert(rq3 == rq1);
 63 |     }
 64 | 
 65 |     TEST_CASE(rotation_quat_from_rotation_vec)
 66 |     {
 67 |         const auto rq1 = transform::rotation_quat(1.2, 3.4, 5.6);
 68 |         test_assert(rq1 ==
 69 |             transform::rotation_quat(transform::axis_angle(1.2, 3.4, 5.6)));
 70 | 
 71 |         const auto rq2 = transform::rotation_quat(0.0, 0.0, 0.0);
 72 |         test_assert(rq2 == dquat::identity());
 73 | 
 74 |         const auto rq3 = transform::rotation_quat(dvec3(1.2, 3.4, 5.6));
 75 |         test_assert(rq3 == rq1);
 76 | 
 77 |         const auto rq4 = transform::rotation_quat(dvec3(0.0, 0.0, 0.0));
 78 |         test_assert(rq4 == rq2);
 79 |     }
 80 | 
 81 |     TEST_CASE(translation_mat_2d)
 82 |     {
 83 |         CONST_OR_CONSTEXPR auto m1 =
 84 |             transform::translation_mat(1.2, 3.4);
 85 |         test_assert(m1[0] == dvec4(1.0, 0.0, 0.0, 1.2));
 86 |         test_assert(m1[1] == dvec4(0.0, 1.0, 0.0, 3.4));
 87 |         test_assert(m1[2] == dvec4(0.0, 0.0, 1.0, 0.0));
 88 |         test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0));
 89 | 
 90 |         CONST_OR_CONSTEXPR auto m2 =
 91 |             transform::translation_mat<double, 2, 3>(1.2, 3.4);
 92 |         test_assert(m2[0] == dvec3(1.0, 0.0, 1.2));
 93 |         test_assert(m2[1] == dvec3(0.0, 1.0, 3.4));
 94 | 
 95 |         CONST_OR_CONSTEXPR auto m3 =
 96 |             transform::translation_mat(dvec2(1.2, 3.4));
 97 |         test_assert(m3 == m1);
 98 | 
 99 |         CONST_OR_CONSTEXPR auto m4 =
100 |             transform::translation_mat<double, 2, 3>(dvec2(1.2, 3.4));
101 |         test_assert(m4 == m2);
102 |     }
103 | 
104 |     TEST_CASE(translation_mat_3d)
105 |     {
106 |         CONST_OR_CONSTEXPR auto m1 =
107 |             transform::translation_mat(1.2, 3.4, 5.6);
108 |         test_assert(m1[0] == dvec4(1.0, 0.0, 0.0, 1.2));
109 |         test_assert(m1[1] == dvec4(0.0, 1.0, 0.0, 3.4));
110 |         test_assert(m1[2] == dvec4(0.0, 0.0, 1.0, 5.6));
111 |         test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0));
112 | 
113 |         CONST_OR_CONSTEXPR auto m2 =
114 |             transform::translation_mat<double, 3, 4>(1.2, 3.4, 5.6);
115 |         test_assert(m2 == dmat3x4(m1));
116 | 
117 |         CONST_OR_CONSTEXPR auto m3 =
118 |             transform::translation_mat(dvec3(1.2, 3.4, 5.6));
119 |         test_assert(m3 == m1);
120 | 
121 |         CONST_OR_CONSTEXPR auto m4 =
122 |             transform::translation_mat<double, 3, 4>(dvec3(1.2, 3.4, 5.6));
123 |         test_assert(m4 == m2);
124 |     }
125 | 
126 |     TEST_CASE(rotation_mat_2d)
127 |     {
128 |         const auto m1 = transform::rotation_mat(1.2);
129 |         test_assert(m1[0] == dvec4(math::cos(1.2), -math::sin(1.2), 0.0, 0.0));
130 |         test_assert(m1[1] == dvec4(math::sin(1.2),  math::cos(1.2), 0.0, 0.0));
131 |         test_assert(m1[2] == dvec4(          0.0 ,            0.0 , 1.0, 0.0));
132 |         test_assert(m1[3] == dvec4(          0.0 ,            0.0 , 0.0, 1.0));
133 | 
134 |         const auto m2 = transform::rotation_mat<double, 2, 2>(1.2);
135 |         test_assert(m2 == dmat2x2(m1));
136 |     }
137 | 
138 |     TEST_CASE(rotation_mat_from_axis_angle)
139 |     {
140 |         double s, c;
141 |         math::sincos(7.8, s, c);
142 | 
143 |         const auto m1 = transform::rotation_mat(1.2, 3.4, 5.6, 7.8);
144 |         test_assert(nearly_equal(m1[0][0], 1.2*1.2*(1-c) + c));
145 |         test_assert(nearly_equal(m1[0][1], 1.2*3.4*(1-c) - 5.6*s));
146 |         test_assert(nearly_equal(m1[0][2], 1.2*5.6*(1-c) + 3.4*s));
147 |         test_assert(nearly_equal(m1[0][3], 0.0));
148 |         test_assert(nearly_equal(m1[1][0], 1.2*3.4*(1-c) + 5.6*s));
149 |         test_assert(nearly_equal(m1[1][1], 3.4*3.4*(1-c) + c));
150 |         test_assert(nearly_equal(m1[1][2], 3.4*5.6*(1-c) - 1.2*s));
151 |         test_assert(nearly_equal(m1[1][3], 0.0));
152 |         test_assert(nearly_equal(m1[2][0], 1.2*5.6*(1-c) - 3.4*s));
153 |         test_assert(nearly_equal(m1[2][1], 3.4*5.6*(1-c) + 1.2*s));
154 |         test_assert(nearly_equal(m1[2][2], 5.6*5.6*(1-c) + c));
155 |         test_assert(nearly_equal(m1[2][3], 0.0));
156 |         test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0));
157 | 
158 |         const auto m2 = transform::rotation_mat<double, 3, 3>(
159 |             1.2, 3.4, 5.6, 7.8);
160 |         test_assert(m2 == dmat3x3(m1));
161 | 
162 |         const auto m3 = transform::rotation_mat(
163 |             dvec3(1.2, 3.4, 5.6), 7.8);
164 |         test_assert(m3 == m1);
165 | 
166 |         const auto m4 = transform::rotation_mat<double, 3, 3>(
167 |             dvec3(1.2, 3.4, 5.6), 7.8);
168 |         test_assert(m4 == m2);
169 | 
170 |         const auto m5 = transform::rotation_mat(
171 |             dvec4(1.2, 3.4, 5.6, 7.8));
172 |         test_assert(m5 == m1);
173 | 
174 |         const auto m6 = transform::rotation_mat<double, 3, 3>(
175 |             dvec4(1.2, 3.4, 5.6, 7.8));
176 |         test_assert(m6 == m2);
177 |     }
178 | 
179 |     TEST_CASE(rotation_mat_from_rotation_vec)
180 |     {
181 |         const auto m1 = transform::rotation_mat(
182 |             1.2, 3.4, 5.6);
183 |         test_assert(m1 ==
184 |             transform::rotation_mat(transform::axis_angle(1.2, 3.4, 5.6)));
185 | 
186 |         const auto m2 = transform::rotation_mat<double, 3, 3>(
187 |             1.2, 3.4, 5.6);
188 |         test_assert(m2 == dmat3x3(m1));
189 | 
190 |         const auto m3 = transform::rotation_mat(
191 |             dvec3(1.2, 3.4, 5.6));
192 |         test_assert(m3 == m1);
193 | 
194 |         const auto m4 = transform::rotation_mat<double, 3, 3>(
195 |             dvec3(1.2, 3.4, 5.6));
196 |         test_assert(m4 == m2);
197 |     }
198 | 
199 |     TEST_CASE(rotation_mat_from_rotation_quat)
200 |     {
201 |         CONST_OR_CONSTEXPR auto x = 1.2, y = 3.4, z = 5.6, w = 7.8;
202 |         CONST_OR_CONSTEXPR auto m1 =
203 |             transform::rotation_mat(dquat(x, y, z, w));
204 |         test_assert(m1[0][0] == 1 - 2*y*y - 2*z*z);
205 |         test_assert(m1[0][1] == 2*x*y - 2*z*w);
206 |         test_assert(m1[0][2] == 2*x*z + 2*y*w);
207 |         test_assert(m1[0][3] == 0.0);
208 |         test_assert(m1[1][0] == 2*x*y + 2*z*w);
209 |         test_assert(m1[1][1] == 1 - 2*x*x - 2*z*z);
210 |         test_assert(m1[1][2] == 2*y*z - 2*x*w);
211 |         test_assert(m1[1][3] == 0.0);
212 |         test_assert(m1[2][0] == 2*x*z - 2*y*w);
213 |         test_assert(m1[2][1] == 2*y*z + 2*x*w);
214 |         test_assert(m1[2][2] == 1 - 2*x*x - 2*y*y);
215 |         test_assert(m1[2][3] == 0.0);
216 |         test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0));
217 | 
218 |         CONST_OR_CONSTEXPR auto m2 =
219 |             transform::rotation_mat<double, 3, 3>(dquat(x, y, z, w));
220 |         test_assert(m2 == dmat3x3(m1));
221 |     }
222 | 
223 |     TEST_CASE(scale_mat_2d)
224 |     {
225 |         CONST_OR_CONSTEXPR auto m1 =
226 |             transform::scale_mat(1.2, 3.4);
227 |         test_assert(m1[0] == dvec4(1.2, 0.0, 0.0, 0.0));
228 |         test_assert(m1[1] == dvec4(0.0, 3.4, 0.0, 0.0));
229 |         test_assert(m1[2] == dvec4(0.0, 0.0, 1.0, 0.0));
230 |         test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0));
231 | 
232 |         CONST_OR_CONSTEXPR auto m2 =
233 |             transform::scale_mat<double, 2, 2>(1.2, 3.4);
234 |         test_assert(m2 == dmat2x2(m2));
235 | 
236 |         CONST_OR_CONSTEXPR auto m3 =
237 |             transform::scale_mat(dvec2(1.2, 3.4));
238 |         test_assert(m3 == m1);
239 | 
240 |         CONST_OR_CONSTEXPR auto m4 =
241 |             transform::scale_mat<double, 2, 2>(dvec2(1.2, 3.4));
242 |         test_assert(m4 == m2);
243 |     }
244 | 
245 |     TEST_CASE(scale_mat_3d)
246 |     {
247 |         CONST_OR_CONSTEXPR auto m1 =
248 |             transform::scale_mat(1.2, 3.4, 5.6);
249 |         test_assert(m1[0] == dvec4(1.2, 0.0, 0.0, 0.0));
250 |         test_assert(m1[1] == dvec4(0.0, 3.4, 0.0, 0.0));
251 |         test_assert(m1[2] == dvec4(0.0, 0.0, 5.6, 0.0));
252 |         test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0));
253 | 
254 |         CONST_OR_CONSTEXPR auto m2 =
255 |             transform::scale_mat<double, 3, 3>(1.2, 3.4, 5.6);
256 |         test_assert(m2 == dmat3x3(m1));
257 | 
258 |         CONST_OR_CONSTEXPR auto m3 =
259 |             transform::scale_mat(dvec3(1.2, 3.4, 5.6));
260 |         test_assert(m3 == m1);
261 | 
262 |         CONST_OR_CONSTEXPR auto m4 =
263 |             transform::scale_mat<double, 3, 3>(dvec3(1.2, 3.4, 5.6));
264 |         test_assert(m4 == m2);
265 |     }
266 | 
267 |     TEST_CASE(perspective_mat)
268 |     {
269 |         const auto m1 = transform::perspective_mat(1.2, 3.4, 5.6, 7.8);
270 |         test_assert(nearly_equal(
271 |             m1[0][0], math::cos(1.2/2) / math::sin(1.2/2) / 3.4));
272 |         test_assert(m1[0][1] == 0.0);
273 |         test_assert(m1[0][2] == 0.0);
274 |         test_assert(m1[0][3] == 0.0);
275 |         test_assert(m1[1][0] == 0.0);
276 |         test_assert(nearly_equal(
277 |             m1[1][1], math::cos(1.2/2) / math::sin(1.2/2)));
278 |         test_assert(m1[1][2] == 0.0);
279 |         test_assert(m1[1][3] == 0.0);
280 |         test_assert(m1[2][0] == 0.0);
281 |         test_assert(m1[2][1] == 0.0);
282 |         test_assert(m1[2][2] ==     (5.6+7.8) / (5.6-7.8));
283 |         test_assert(m1[2][3] == 2 * (5.6*7.8) / (5.6-7.8));
284 |         test_assert(m1[3][0] == 0.0);
285 |         test_assert(m1[3][1] == 0.0);
286 |         test_assert(m1[3][2] == -1.0);
287 |         test_assert(m1[3][3] == 0.0);
288 | 
289 |         const auto m2 = transform::perspective_mat<double, 4, 4>(
290 |             1.2, 3.4, 5.6, 7.8);
291 |         test_assert(m2 == m1);
292 |     }
293 | 
294 |     TEST_CASE(ortho_mat)
295 |     {
296 |         CONST_OR_CONSTEXPR auto m1 = transform::ortho_mat(1.2, 3.4, 5.6, 7.8);
297 |         test_assert(m1[0][0] == 2 / 1.2);
298 |         test_assert(m1[0][1] == 0.0);
299 |         test_assert(m1[0][2] == 0.0);
300 |         test_assert(m1[0][3] == 0.0);
301 |         test_assert(m1[1][0] == 0.0);
302 |         test_assert(m1[1][1] == 2 / 3.4);
303 |         test_assert(m1[1][2] == 0.0);
304 |         test_assert(m1[1][3] == 0.0);
305 |         test_assert(m1[2][0] == 0.0);
306 |         test_assert(m1[2][1] == 0.0);
307 |         test_assert(m1[2][2] ==        2  / (5.6-7.8));
308 |         test_assert(m1[2][3] == (5.6+7.8) / (5.6-7.8));
309 |         test_assert(m1[3][0] == 0.0);
310 |         test_assert(m1[3][1] == 0.0);
311 |         test_assert(m1[3][2] == 0.0);
312 |         test_assert(m1[3][3] == 1.0);
313 | 
314 |         const auto m2 = transform::ortho_mat<double, 3, 4>(1.2, 3.4, 5.6, 7.8);
315 |         test_assert(m2 == dmat3x4(m2));
316 |     }
317 | }
318 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/uint16x8.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <cstdint>
 14 | #include <type_traits>
 15 | 
 16 | #include "../../../simd.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<std::uint16_t, 8>())
 22 |     simd<std::uint16_t, 8>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static uint16x8 explicit_cast(const simd<U, 8>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 std::uint16_t(s.data()[0]),
 32 |                 std::uint16_t(s.data()[1]),
 33 |                 std::uint16_t(s.data()[2]),
 34 |                 std::uint16_t(s.data()[3]),
 35 |                 std::uint16_t(s.data()[4]),
 36 |                 std::uint16_t(s.data()[5]),
 37 |                 std::uint16_t(s.data()[6]),
 38 |                 std::uint16_t(s.data()[7]),
 39 |             };
 40 |         }
 41 | 
 42 |         inline static uint16x8 explicit_cast(const bool16x8& s) noexcept;
 43 | 
 44 |         inline static uint16x8 explicit_cast(const int16x8& s) noexcept;
 45 | 
 46 |     public:
 47 |         using component_type = std::uint16_t;
 48 | 
 49 |         static constexpr int component_count = 8;
 50 | 
 51 |         static constexpr bool is_accelerated = true;
 52 | 
 53 |         simd() noexcept = default;
 54 | 
 55 |         explicit simd(std::uint16_t x) noexcept
 56 |         :
 57 |             underlying_(_mm_set1_epi16(x))
 58 |         {
 59 |         }
 60 | 
 61 |         template<int M = 8, typename = std::enable_if_t<M == 2>>
 62 |         inline simd(
 63 |             std::uint16_t x, std::uint16_t y) noexcept;
 64 | 
 65 |         template<int M = 8, typename = std::enable_if_t<M == 4>>
 66 |         inline simd(
 67 |             std::uint16_t x, std::uint16_t y,
 68 |             std::uint16_t z, std::uint16_t w) noexcept;
 69 | 
 70 |         template<int M = 8, typename = std::enable_if_t<M == 8>>
 71 |         inline simd(
 72 |             std::uint16_t s0, std::uint16_t s1,
 73 |             std::uint16_t s2, std::uint16_t s3,
 74 |             std::uint16_t s4, std::uint16_t s5,
 75 |             std::uint16_t s6, std::uint16_t s7) noexcept
 76 |         :
 77 |             underlying_(_mm_setr_epi16(
 78 |                 s0, s1, s2, s3, s4, s5, s6, s7))
 79 |         {
 80 |         }
 81 | 
 82 |         template<int M = 8, typename = std::enable_if_t<M == 16>>
 83 |         inline simd(
 84 |             std::uint16_t  s0, std::uint16_t  s1,
 85 |             std::uint16_t  s2, std::uint16_t  s3,
 86 |             std::uint16_t  s4, std::uint16_t  s5,
 87 |             std::uint16_t  s6, std::uint16_t  s7,
 88 |             std::uint16_t  s8, std::uint16_t  s9,
 89 |             std::uint16_t s10, std::uint16_t s11,
 90 |             std::uint16_t s12, std::uint16_t s13,
 91 |             std::uint16_t s14, std::uint16_t s15) noexcept;
 92 | 
 93 |         template<typename U>
 94 |         explicit simd(const simd<U, 8>& s) noexcept
 95 |         {
 96 |             *this = explicit_cast(s);
 97 |         }
 98 | 
 99 |         simd(__m128i underlying) noexcept
100 |         :
101 |             underlying_(underlying)
102 |         {
103 |         }
104 | 
105 |         operator __m128i() const noexcept
106 |         {
107 |             return underlying_;
108 |         }
109 | 
110 |         static uint16x8 zero() noexcept
111 |         {
112 |             return _mm_setzero_si128();
113 |         }
114 | 
115 |         static uint16x8 load(const std::uint16_t* data) noexcept
116 |         {
117 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
118 |         }
119 | 
120 |         static uint16x8 loadu(const std::uint16_t* data) noexcept
121 |         {
122 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
123 |         }
124 | 
125 |         void store(std::uint16_t* data) const noexcept
126 |         {
127 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
128 |         }
129 | 
130 |         void storeu(std::uint16_t* data) const noexcept
131 |         {
132 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
133 |         }
134 | 
135 |         const std::uint16_t* data() const noexcept
136 |         {
137 |             return reinterpret_cast<const std::uint16_t*>(&underlying_);
138 |         }
139 | 
140 |         std::uint16_t* data() noexcept
141 |         {
142 |             return reinterpret_cast<std::uint16_t*>(&underlying_);
143 |         }
144 |     };
145 | }
146 | 
147 | #include "bool16x8.sse2.hpp"
148 | #include "int16x8.sse2.hpp"
149 | 
150 | namespace tue
151 | {
152 |     inline uint16x8 uint16x8::explicit_cast(const bool16x8& s) noexcept
153 |     {
154 |         return __m128i(s);
155 |     }
156 | 
157 |     inline uint16x8 uint16x8::explicit_cast(const int16x8& s) noexcept
158 |     {
159 |         return __m128i(s);
160 |     }
161 | 
162 |     namespace detail_
163 |     {
164 |         inline uint16x8& pre_increment_operator_s(uint16x8& s) noexcept
165 |         {
166 |             return s = _mm_add_epi16(s, uint16x8(1));
167 |         }
168 | 
169 |         inline uint16x8 post_increment_operator_s(uint16x8& s) noexcept
170 |         {
171 |             const auto result = s;
172 |             s = _mm_add_epi16(s, uint16x8(1));
173 |             return result;
174 |         }
175 | 
176 |         inline uint16x8& pre_decrement_operator_s(uint16x8& s) noexcept
177 |         {
178 |             return s = _mm_sub_epi16(s, uint16x8(1));
179 |         }
180 | 
181 |         inline uint16x8 post_decrement_operator_s(uint16x8& s) noexcept
182 |         {
183 |             const auto result = s;
184 |             s = _mm_sub_epi16(s, uint16x8(1));
185 |             return result;
186 |         }
187 | 
188 |         inline uint16x8 bitwise_not_operator_s(const uint16x8& s) noexcept
189 |         {
190 |             return _mm_xor_si128(s, uint16x8(0xFFFF));
191 |         }
192 | 
193 |         inline uint16x8 addition_operator_ss(
194 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
195 |         {
196 |             return _mm_add_epi16(lhs, rhs);
197 |         }
198 | 
199 |         inline uint16x8 subtraction_operator_ss(
200 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
201 |         {
202 |             return _mm_sub_epi16(lhs, rhs);
203 |         }
204 | 
205 |         /*inline uint16x8 multiplication_operator_ss(
206 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
207 |         {
208 |             // TODO
209 |         }
210 | 
211 |         inline uint16x8 division_operator_ss(
212 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
213 |         {
214 |             // TODO
215 |         }
216 | 
217 |         inline uint16x8 modulo_operator_ss(
218 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
219 |         {
220 |             // TODO
221 |         }*/
222 | 
223 |         inline uint16x8 bitwise_and_operator_ss(
224 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
225 |         {
226 |             return _mm_and_si128(lhs, rhs);
227 |         }
228 | 
229 |         inline uint16x8 bitwise_or_operator_ss(
230 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
231 |         {
232 |             return _mm_or_si128(lhs, rhs);
233 |         }
234 | 
235 |         inline uint16x8 bitwise_xor_operator_ss(
236 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
237 |         {
238 |             return _mm_xor_si128(lhs, rhs);
239 |         }
240 | 
241 |         inline uint16x8 bitwise_shift_left_operator_si(
242 |             const uint16x8& lhs, int rhs) noexcept
243 |         {
244 |             return _mm_slli_epi16(lhs, rhs);
245 |         }
246 | 
247 |         inline uint16x8 bitwise_shift_right_operator_si(
248 |             const uint16x8& lhs, int rhs) noexcept
249 |         {
250 |             return _mm_srli_epi16(lhs, rhs);
251 |         }
252 | 
253 |         inline uint16x8& addition_assignment_operator_ss(
254 |             uint16x8& lhs, const uint16x8& rhs) noexcept
255 |         {
256 |             return lhs = _mm_add_epi16(lhs, rhs);
257 |         }
258 | 
259 |         inline uint16x8& subtraction_assignment_operator_ss(
260 |             uint16x8& lhs, const uint16x8& rhs) noexcept
261 |         {
262 |             return lhs = _mm_sub_epi16(lhs, rhs);
263 |         }
264 | 
265 |         /*inline uint16x8& multiplication_assignment_operator_ss(
266 |             uint16x8& lhs, const uint16x8& rhs) noexcept
267 |         {
268 |             // TODO
269 |         }
270 | 
271 |         inline uint16x8& division_assignment_operator_ss(
272 |             uint16x8& lhs, const uint16x8& rhs) noexcept
273 |         {
274 |             // TODO
275 |         }
276 | 
277 |         inline uint16x8& modulo_assignment_operator_ss(
278 |             uint16x8& lhs, const uint16x8& rhs) noexcept
279 |         {
280 |             // TODO
281 |         }*/
282 | 
283 |         inline uint16x8& bitwise_and_assignment_operator_ss(
284 |             uint16x8& lhs, const uint16x8& rhs) noexcept
285 |         {
286 |             return lhs = _mm_and_si128(lhs, rhs);
287 |         }
288 | 
289 |         inline uint16x8& bitwise_or_assignment_operator_ss(
290 |             uint16x8& lhs, const uint16x8& rhs) noexcept
291 |         {
292 |             return lhs = _mm_or_si128(lhs, rhs);
293 |         }
294 | 
295 |         inline uint16x8& bitwise_xor_assignment_operator_ss(
296 |             uint16x8& lhs, const uint16x8& rhs) noexcept
297 |         {
298 |             return lhs = _mm_xor_si128(lhs, rhs);
299 |         }
300 | 
301 |         inline uint16x8& bitwise_shift_left_assignment_operator_si(
302 |             uint16x8& lhs, int rhs) noexcept
303 |         {
304 |             return lhs = _mm_slli_epi16(lhs, rhs);
305 |         }
306 | 
307 |         inline uint16x8& bitwise_shift_right_assignment_operator_si(
308 |             uint16x8& lhs, int rhs) noexcept
309 |         {
310 |             return lhs = _mm_srli_epi16(lhs, rhs);
311 |         }
312 | 
313 |         inline bool equality_operator_ss(
314 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
315 |         {
316 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
317 |         }
318 | 
319 |         inline bool inequality_operator_ss(
320 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
321 |         {
322 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
323 |         }
324 | 
325 |         inline uint16x8 abs_s(const uint16x8& s) noexcept
326 |         {
327 |             return s;
328 |         }
329 | 
330 |         /*inline uint16x8 min_ss(
331 |             const uint16x8& s1, const uint16x8& s2) noexcept
332 |         {
333 |             // TODO
334 |         }
335 | 
336 |         inline uint16x8 max_ss(
337 |             const uint16x8& s1, const uint16x8& s2) noexcept
338 |         {
339 |             // TODO
340 |         }*/
341 | 
342 |         inline uint16x8 mask_ss(
343 |             const bool16x8& conditions,
344 |             const uint16x8& values) noexcept
345 |         {
346 |             return _mm_and_si128(conditions, values);
347 |         }
348 | 
349 |         inline uint16x8 select_sss(
350 |             const bool16x8& conditions,
351 |             const uint16x8& values,
352 |             const uint16x8& otherwise) noexcept
353 |         {
354 |             return _mm_or_si128(
355 |                 _mm_and_si128(conditions, values),
356 |                 _mm_andnot_si128(conditions, otherwise));
357 |         }
358 | 
359 |         /*inline bool16x8 less_ss(
360 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
361 |         {
362 |             // TODO
363 |         }
364 | 
365 |         inline bool16x8 less_equal_ss(
366 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
367 |         {
368 |             // TODO
369 |         }
370 | 
371 |         inline bool16x8 greater_ss(
372 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
373 |         {
374 |             // TODO
375 |         }
376 | 
377 |         inline bool16x8 greater_equal_ss(
378 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
379 |         {
380 |             // TODO
381 |         }*/
382 | 
383 |         inline bool16x8 equal_ss(
384 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
385 |         {
386 |             return _mm_cmpeq_epi16(lhs, rhs);
387 |         }
388 | 
389 |         inline bool16x8 not_equal_ss(
390 |             const uint16x8& lhs, const uint16x8& rhs) noexcept
391 |         {
392 |             return _mm_xor_si128(_mm_cmpeq_epi16(lhs, rhs), uint16x8(0xFFFF));
393 |         }
394 |     }
395 | }
396 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/uint32x4.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <cstdint>
 14 | #include <type_traits>
 15 | 
 16 | #include "../../../simd.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<std::uint32_t, 4>())
 22 |     simd<std::uint32_t, 4>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static uint32x4 explicit_cast(const simd<U, 4>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 std::uint32_t(s.data()[0]),
 32 |                 std::uint32_t(s.data()[1]),
 33 |                 std::uint32_t(s.data()[2]),
 34 |                 std::uint32_t(s.data()[3]),
 35 |             };
 36 |         }
 37 | 
 38 |         inline static uint32x4 explicit_cast(const bool32x4& s) noexcept;
 39 | 
 40 |         inline static uint32x4 explicit_cast(const float32x4& s) noexcept;
 41 | 
 42 |         inline static uint32x4 explicit_cast(const int32x4& s) noexcept;
 43 | 
 44 |     public:
 45 |         using component_type = std::uint32_t;
 46 | 
 47 |         static constexpr int component_count = 4;
 48 | 
 49 |         static constexpr bool is_accelerated = true;
 50 | 
 51 |         simd() noexcept = default;
 52 | 
 53 |         explicit simd(std::uint32_t x) noexcept
 54 |         :
 55 |             underlying_(_mm_set1_epi32(x))
 56 |         {
 57 |         }
 58 | 
 59 |         template<int M = 4, typename = std::enable_if_t<M == 2>>
 60 |         inline simd(
 61 |             std::uint32_t x, std::uint32_t y) noexcept;
 62 | 
 63 |         template<int M = 4, typename = std::enable_if_t<M == 4>>
 64 |         simd(
 65 |             std::uint32_t x, std::uint32_t y,
 66 |             std::uint32_t z, std::uint32_t w) noexcept
 67 |         :
 68 |             underlying_(_mm_setr_epi32(x, y, z, w))
 69 |         {
 70 |         }
 71 | 
 72 |         template<int M = 4, typename = std::enable_if_t<M == 8>>
 73 |         inline simd(
 74 |             std::uint32_t s0, std::uint32_t s1,
 75 |             std::uint32_t s2, std::uint32_t s3,
 76 |             std::uint32_t s4, std::uint32_t s5,
 77 |             std::uint32_t s6, std::uint32_t s7) noexcept;
 78 | 
 79 |         template<int M = 4, typename = std::enable_if_t<M == 16>>
 80 |         inline simd(
 81 |             std::uint32_t  s0, std::uint32_t  s1,
 82 |             std::uint32_t  s2, std::uint32_t  s3,
 83 |             std::uint32_t  s4, std::uint32_t  s5,
 84 |             std::uint32_t  s6, std::uint32_t  s7,
 85 |             std::uint32_t  s8, std::uint32_t  s9,
 86 |             std::uint32_t s10, std::uint32_t s11,
 87 |             std::uint32_t s12, std::uint32_t s13,
 88 |             std::uint32_t s14, std::uint32_t s15) noexcept;
 89 | 
 90 |         template<typename U>
 91 |         explicit simd(const simd<U, 4>& s) noexcept
 92 |         {
 93 |             *this = explicit_cast(s);
 94 |         }
 95 | 
 96 |         simd(__m128i underlying) noexcept
 97 |         :
 98 |             underlying_(underlying)
 99 |         {
100 |         }
101 | 
102 |         operator __m128i() const noexcept
103 |         {
104 |             return underlying_;
105 |         }
106 | 
107 |         static uint32x4 zero() noexcept
108 |         {
109 |             return _mm_setzero_si128();
110 |         }
111 | 
112 |         static uint32x4 load(const std::uint32_t* data) noexcept
113 |         {
114 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
115 |         }
116 | 
117 |         static uint32x4 loadu(const std::uint32_t* data) noexcept
118 |         {
119 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
120 |         }
121 | 
122 |         void store(std::uint32_t* data) const noexcept
123 |         {
124 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
125 |         }
126 | 
127 |         void storeu(std::uint32_t* data) const noexcept
128 |         {
129 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
130 |         }
131 | 
132 |         const std::uint32_t* data() const noexcept
133 |         {
134 |             return reinterpret_cast<const std::uint32_t*>(&underlying_);
135 |         }
136 | 
137 |         std::uint32_t* data() noexcept
138 |         {
139 |             return reinterpret_cast<std::uint32_t*>(&underlying_);
140 |         }
141 |     };
142 | }
143 | 
144 | #include "../sse/bool32x4.sse.hpp"
145 | #include "../sse/float32x4.sse.hpp"
146 | #include "int32x4.sse2.hpp"
147 | 
148 | namespace tue
149 | {
150 |     inline uint32x4 uint32x4::explicit_cast(const bool32x4& s) noexcept
151 |     {
152 |         return __m128i(s);
153 |     }
154 | 
155 |     inline uint32x4 uint32x4::explicit_cast(const float32x4& s) noexcept
156 |     {
157 |         return _mm_cvtps_epi32(s);
158 |     }
159 | 
160 |     inline uint32x4 uint32x4::explicit_cast(const int32x4& s) noexcept
161 |     {
162 |         return __m128i(s);
163 |     }
164 | 
165 |     namespace detail_
166 |     {
167 |         inline uint32x4& pre_increment_operator_s(uint32x4& s) noexcept
168 |         {
169 |             return s = _mm_add_epi32(s, uint32x4(1));
170 |         }
171 | 
172 |         inline uint32x4 post_increment_operator_s(uint32x4& s) noexcept
173 |         {
174 |             const auto result = s;
175 |             s = _mm_add_epi32(s, uint32x4(1));
176 |             return result;
177 |         }
178 | 
179 |         inline uint32x4& pre_decrement_operator_s(uint32x4& s) noexcept
180 |         {
181 |             return s = _mm_sub_epi32(s, uint32x4(1));
182 |         }
183 | 
184 |         inline uint32x4 post_decrement_operator_s(uint32x4& s) noexcept
185 |         {
186 |             const auto result = s;
187 |             s = _mm_sub_epi32(s, uint32x4(1));
188 |             return result;
189 |         }
190 | 
191 |         inline uint32x4 bitwise_not_operator_s(const uint32x4& s) noexcept
192 |         {
193 |             return _mm_xor_si128(s, uint32x4(0xFFFFFFFF));
194 |         }
195 | 
196 |         inline uint32x4 addition_operator_ss(
197 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
198 |         {
199 |             return _mm_add_epi32(lhs, rhs);
200 |         }
201 | 
202 |         inline uint32x4 subtraction_operator_ss(
203 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
204 |         {
205 |             return _mm_sub_epi32(lhs, rhs);
206 |         }
207 | 
208 |         /*inline uint32x4 multiplication_operator_ss(
209 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
210 |         {
211 |             // TODO
212 |         }
213 | 
214 |         inline uint32x4 division_operator_ss(
215 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
216 |         {
217 |             // TODO
218 |         }
219 | 
220 |         inline uint32x4 modulo_operator_ss(
221 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
222 |         {
223 |             // TODO
224 |         }*/
225 | 
226 |         inline uint32x4 bitwise_and_operator_ss(
227 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
228 |         {
229 |             return _mm_and_si128(lhs, rhs);
230 |         }
231 | 
232 |         inline uint32x4 bitwise_or_operator_ss(
233 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
234 |         {
235 |             return _mm_or_si128(lhs, rhs);
236 |         }
237 | 
238 |         inline uint32x4 bitwise_xor_operator_ss(
239 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
240 |         {
241 |             return _mm_xor_si128(lhs, rhs);
242 |         }
243 | 
244 |         inline uint32x4 bitwise_shift_left_operator_si(
245 |             const uint32x4& lhs, int rhs) noexcept
246 |         {
247 |             return _mm_slli_epi32(lhs, rhs);
248 |         }
249 | 
250 |         inline uint32x4 bitwise_shift_right_operator_si(
251 |             const uint32x4& lhs, int rhs) noexcept
252 |         {
253 |             return _mm_srli_epi32(lhs, rhs);
254 |         }
255 | 
256 |         inline uint32x4& addition_assignment_operator_ss(
257 |             uint32x4& lhs, const uint32x4& rhs) noexcept
258 |         {
259 |             return lhs = _mm_add_epi32(lhs, rhs);
260 |         }
261 | 
262 |         inline uint32x4& subtraction_assignment_operator_ss(
263 |             uint32x4& lhs, const uint32x4& rhs) noexcept
264 |         {
265 |             return lhs = _mm_sub_epi32(lhs, rhs);
266 |         }
267 | 
268 |         /*inline uint32x4& multiplication_assignment_operator_ss(
269 |             uint32x4& lhs, const uint32x4& rhs) noexcept
270 |         {
271 |             // TODO
272 |         }
273 | 
274 |         inline uint32x4& division_assignment_operator_ss(
275 |             uint32x4& lhs, const uint32x4& rhs) noexcept
276 |         {
277 |             // TODO
278 |         }
279 | 
280 |         inline uint32x4& modulo_assignment_operator_ss(
281 |             uint32x4& lhs, const uint32x4& rhs) noexcept
282 |         {
283 |             // TODO
284 |         }*/
285 | 
286 |         inline uint32x4& bitwise_and_assignment_operator_ss(
287 |             uint32x4& lhs, const uint32x4& rhs) noexcept
288 |         {
289 |             return lhs = _mm_and_si128(lhs, rhs);
290 |         }
291 | 
292 |         inline uint32x4& bitwise_or_assignment_operator_ss(
293 |             uint32x4& lhs, const uint32x4& rhs) noexcept
294 |         {
295 |             return lhs = _mm_or_si128(lhs, rhs);
296 |         }
297 | 
298 |         inline uint32x4& bitwise_xor_assignment_operator_ss(
299 |             uint32x4& lhs, const uint32x4& rhs) noexcept
300 |         {
301 |             return lhs = _mm_xor_si128(lhs, rhs);
302 |         }
303 | 
304 |         inline uint32x4& bitwise_shift_left_assignment_operator_si(
305 |             uint32x4& lhs, int rhs) noexcept
306 |         {
307 |             return lhs = _mm_slli_epi32(lhs, rhs);
308 |         }
309 | 
310 |         inline uint32x4& bitwise_shift_right_assignment_operator_si(
311 |             uint32x4& lhs, int rhs) noexcept
312 |         {
313 |             return lhs = _mm_srli_epi32(lhs, rhs);
314 |         }
315 | 
316 |         inline bool equality_operator_ss(
317 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
318 |         {
319 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
320 |         }
321 | 
322 |         inline bool inequality_operator_ss(
323 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
324 |         {
325 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
326 |         }
327 | 
328 |         inline uint32x4 abs_s(const uint32x4& s) noexcept
329 |         {
330 |             return s;
331 |         }
332 | 
333 |         /*inline uint32x4 min_ss(
334 |             const uint32x4& s1, const uint32x4& s2) noexcept
335 |         {
336 |             // TODO
337 |         }
338 | 
339 |         inline uint32x4 max_ss(
340 |             const uint32x4& s1, const uint32x4& s2) noexcept
341 |         {
342 |             // TODO
343 |         }*/
344 | 
345 |         inline uint32x4 mask_ss(
346 |             const bool32x4& conditions,
347 |             const uint32x4& values) noexcept
348 |         {
349 |             return _mm_and_si128(conditions, values);
350 |         }
351 | 
352 |         inline uint32x4 select_sss(
353 |             const bool32x4& conditions,
354 |             const uint32x4& values,
355 |             const uint32x4& otherwise) noexcept
356 |         {
357 |             return _mm_or_si128(
358 |                 _mm_and_si128(conditions, values),
359 |                 _mm_andnot_si128(conditions, otherwise));
360 |         }
361 | 
362 |         /*inline bool32x4 less_ss(
363 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
364 |         {
365 |             // TODO
366 |         }
367 | 
368 |         inline bool32x4 less_equal_ss(
369 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
370 |         {
371 |             // TODO
372 |         }
373 | 
374 |         inline bool32x4 greater_ss(
375 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
376 |         {
377 |             // TODO
378 |         }
379 | 
380 |         inline bool32x4 greater_equal_ss(
381 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
382 |         {
383 |             // TODO
384 |         }*/
385 | 
386 |         inline bool32x4 equal_ss(
387 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
388 |         {
389 |             return _mm_cmpeq_epi32(lhs, rhs);
390 |         }
391 | 
392 |         inline bool32x4 not_equal_ss(
393 |             const uint32x4& lhs, const uint32x4& rhs) noexcept
394 |         {
395 |             return _mm_xor_si128(
396 |                 _mm_cmpeq_epi32(lhs, rhs), uint32x4(0xFFFFFFFF));
397 |         }
398 |     }
399 | }
400 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/int64x2.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <cstdint>
 14 | #include <type_traits>
 15 | 
 16 | #include "../../../simd.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<std::int64_t, 2>())
 22 |     simd<std::int64_t, 2>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static int64x2 explicit_cast(const simd<U, 2>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 std::int64_t(s.data()[0]),
 32 |                 std::int64_t(s.data()[1]),
 33 |             };
 34 |         }
 35 | 
 36 |         inline static int64x2 explicit_cast(const bool64x2& s) noexcept;
 37 | 
 38 |         inline static int64x2 explicit_cast(const uint64x2& s) noexcept;
 39 | 
 40 |     public:
 41 |         using component_type = std::int64_t;
 42 | 
 43 |         static constexpr int component_count = 2;
 44 | 
 45 |         static constexpr bool is_accelerated = true;
 46 | 
 47 |         simd() noexcept = default;
 48 | 
 49 |         explicit simd(std::int64_t x) noexcept
 50 |         :
 51 |             underlying_(_mm_set1_epi64x(x))
 52 |         {
 53 |         }
 54 | 
 55 |         template<int M = 2, typename = std::enable_if_t<M == 2>>
 56 |         inline simd(
 57 |             std::int64_t x, std::int64_t y) noexcept
 58 |         :
 59 |             underlying_(_mm_set_epi64x(y, x))
 60 |         {
 61 |         }
 62 | 
 63 |         template<int M = 2, typename = std::enable_if_t<M == 4>>
 64 |         inline simd(
 65 |             std::int64_t x, std::int64_t y,
 66 |             std::int64_t z, std::int64_t w) noexcept;
 67 | 
 68 |         template<int M = 2, typename = std::enable_if_t<M == 8>>
 69 |         inline simd(
 70 |             std::int64_t s0, std::int64_t s1,
 71 |             std::int64_t s2, std::int64_t s3,
 72 |             std::int64_t s4, std::int64_t s5,
 73 |             std::int64_t s6, std::int64_t s7) noexcept;
 74 | 
 75 |         template<int M = 2, typename = std::enable_if_t<M == 16>>
 76 |         inline simd(
 77 |             std::int64_t  s0, std::int64_t  s1,
 78 |             std::int64_t  s2, std::int64_t  s3,
 79 |             std::int64_t  s4, std::int64_t  s5,
 80 |             std::int64_t  s6, std::int64_t  s7,
 81 |             std::int64_t  s8, std::int64_t  s9,
 82 |             std::int64_t s10, std::int64_t s11,
 83 |             std::int64_t s12, std::int64_t s13,
 84 |             std::int64_t s14, std::int64_t s15) noexcept;
 85 | 
 86 |         template<typename U>
 87 |         explicit simd(const simd<U, 2>& s) noexcept
 88 |         {
 89 |             *this = explicit_cast(s);
 90 |         }
 91 | 
 92 |         simd(__m128i underlying) noexcept
 93 |         :
 94 |             underlying_(underlying)
 95 |         {
 96 |         }
 97 | 
 98 |         operator __m128i() const noexcept
 99 |         {
100 |             return underlying_;
101 |         }
102 | 
103 |         static int64x2 zero() noexcept
104 |         {
105 |             return _mm_setzero_si128();
106 |         }
107 | 
108 |         static int64x2 load(const std::int64_t* data) noexcept
109 |         {
110 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
111 |         }
112 | 
113 |         static int64x2 loadu(const std::int64_t* data) noexcept
114 |         {
115 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
116 |         }
117 | 
118 |         void store(std::int64_t* data) const noexcept
119 |         {
120 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
121 |         }
122 | 
123 |         void storeu(std::int64_t* data) const noexcept
124 |         {
125 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
126 |         }
127 | 
128 |         const std::int64_t* data() const noexcept
129 |         {
130 |             return reinterpret_cast<const std::int64_t*>(&underlying_);
131 |         }
132 | 
133 |         std::int64_t* data() noexcept
134 |         {
135 |             return reinterpret_cast<std::int64_t*>(&underlying_);
136 |         }
137 |     };
138 | }
139 | 
140 | #include "bool64x2.sse2.hpp"
141 | #include "uint64x2.sse2.hpp"
142 | 
143 | namespace tue
144 | {
145 |     inline int64x2 int64x2::explicit_cast(const bool64x2& s) noexcept
146 |     {
147 |         return __m128i(s);
148 |     }
149 | 
150 |     inline int64x2 int64x2::explicit_cast(const uint64x2& s) noexcept
151 |     {
152 |         return __m128i(s);
153 |     }
154 | 
155 |     namespace detail_
156 |     {
157 |         inline int64x2 unary_plus_operator_s(const int64x2& s) noexcept
158 |         {
159 |             return s;
160 |         }
161 | 
162 |         inline int64x2& pre_increment_operator_s(int64x2& s) noexcept
163 |         {
164 |             return s = _mm_add_epi64(s, int64x2(1));
165 |         }
166 | 
167 |         inline int64x2 post_increment_operator_s(int64x2& s) noexcept
168 |         {
169 |             const auto result = s;
170 |             s = _mm_add_epi64(s, int64x2(1));
171 |             return result;
172 |         }
173 | 
174 |         inline int64x2 unary_minus_operator_s(const int64x2& s) noexcept
175 |         {
176 |             return _mm_sub_epi64(_mm_setzero_si128(), s);
177 |         }
178 | 
179 |         inline int64x2& pre_decrement_operator_s(int64x2& s) noexcept
180 |         {
181 |             return s = _mm_sub_epi64(s, int64x2(1));
182 |         }
183 | 
184 |         inline int64x2 post_decrement_operator_s(int64x2& s) noexcept
185 |         {
186 |             const auto result = s;
187 |             s = _mm_sub_epi64(s, int64x2(1));
188 |             return result;
189 |         }
190 | 
191 |         inline int64x2 bitwise_not_operator_s(const int64x2& s) noexcept
192 |         {
193 |             return _mm_xor_si128(s, int64x2(~0ull));
194 |         }
195 | 
196 |         inline int64x2 addition_operator_ss(
197 |             const int64x2& lhs, const int64x2& rhs) noexcept
198 |         {
199 |             return _mm_add_epi64(lhs, rhs);
200 |         }
201 | 
202 |         inline int64x2 subtraction_operator_ss(
203 |             const int64x2& lhs, const int64x2& rhs) noexcept
204 |         {
205 |             return _mm_sub_epi64(lhs, rhs);
206 |         }
207 | 
208 |         /*inline int64x2 multiplication_operator_ss(
209 |             const int64x2& lhs, const int64x2& rhs) noexcept
210 |         {
211 |             // TODO
212 |         }
213 | 
214 |         inline int64x2 division_operator_ss(
215 |             const int64x2& lhs, const int64x2& rhs) noexcept
216 |         {
217 |             // TODO
218 |         }
219 | 
220 |         inline int64x2 modulo_operator_ss(
221 |             const int64x2& lhs, const int64x2& rhs) noexcept
222 |         {
223 |             // TODO
224 |         }*/
225 | 
226 |         inline int64x2 bitwise_and_operator_ss(
227 |             const int64x2& lhs, const int64x2& rhs) noexcept
228 |         {
229 |             return _mm_and_si128(lhs, rhs);
230 |         }
231 | 
232 |         inline int64x2 bitwise_or_operator_ss(
233 |             const int64x2& lhs, const int64x2& rhs) noexcept
234 |         {
235 |             return _mm_or_si128(lhs, rhs);
236 |         }
237 | 
238 |         inline int64x2 bitwise_xor_operator_ss(
239 |             const int64x2& lhs, const int64x2& rhs) noexcept
240 |         {
241 |             return _mm_xor_si128(lhs, rhs);
242 |         }
243 | 
244 |         inline int64x2 bitwise_shift_left_operator_si(
245 |             const int64x2& lhs, int rhs) noexcept
246 |         {
247 |             return _mm_slli_epi64(lhs, rhs);
248 |         }
249 | 
250 |         inline int64x2 bitwise_shift_right_operator_si(
251 |             const int64x2& lhs, int rhs) noexcept
252 |         {
253 |             return _mm_srli_epi64(lhs, rhs);
254 |         }
255 | 
256 |         inline int64x2& addition_assignment_operator_ss(
257 |             int64x2& lhs, const int64x2& rhs) noexcept
258 |         {
259 |             return lhs = _mm_add_epi64(lhs, rhs);
260 |         }
261 | 
262 |         inline int64x2& subtraction_assignment_operator_ss(
263 |             int64x2& lhs, const int64x2& rhs) noexcept
264 |         {
265 |             return lhs = _mm_sub_epi64(lhs, rhs);
266 |         }
267 | 
268 |         /*inline int64x2& multiplication_assignment_operator_ss(
269 |             int64x2& lhs, const int64x2& rhs) noexcept
270 |         {
271 |             // TODO
272 |         }
273 | 
274 |         inline int64x2& division_assignment_operator_ss(
275 |             int64x2& lhs, const int64x2& rhs) noexcept
276 |         {
277 |             // TODO
278 |         }
279 | 
280 |         inline int64x2& modulo_assignment_operator_ss(
281 |             int64x2& lhs, const int64x2& rhs) noexcept
282 |         {
283 |             // TODO
284 |         }*/
285 | 
286 |         inline int64x2& bitwise_and_assignment_operator_ss(
287 |             int64x2& lhs, const int64x2& rhs) noexcept
288 |         {
289 |             return lhs = _mm_and_si128(lhs, rhs);
290 |         }
291 | 
292 |         inline int64x2& bitwise_or_assignment_operator_ss(
293 |             int64x2& lhs, const int64x2& rhs) noexcept
294 |         {
295 |             return lhs = _mm_or_si128(lhs, rhs);
296 |         }
297 | 
298 |         inline int64x2& bitwise_xor_assignment_operator_ss(
299 |             int64x2& lhs, const int64x2& rhs) noexcept
300 |         {
301 |             return lhs = _mm_xor_si128(lhs, rhs);
302 |         }
303 | 
304 |         inline int64x2& bitwise_shift_left_assignment_operator_si(
305 |             int64x2& lhs, int rhs) noexcept
306 |         {
307 |             return lhs = _mm_slli_epi64(lhs, rhs);
308 |         }
309 | 
310 |         inline int64x2& bitwise_shift_right_assignment_operator_si(
311 |             int64x2& lhs, int rhs) noexcept
312 |         {
313 |             return lhs = _mm_srli_epi64(lhs, rhs);
314 |         }
315 | 
316 |         inline bool equality_operator_ss(
317 |             const int64x2& lhs, const int64x2& rhs) noexcept
318 |         {
319 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
320 |         }
321 | 
322 |         inline bool inequality_operator_ss(
323 |             const int64x2& lhs, const int64x2& rhs) noexcept
324 |         {
325 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
326 |         }
327 | 
328 |         inline int64x2 abs_s(const int64x2& s) noexcept
329 |         {
330 |             const auto nmask = _mm_shuffle_epi32(
331 |                 _mm_cmplt_epi32(s, _mm_setzero_si128()),
332 |                 _MM_SHUFFLE(3, 3, 1, 1));
333 |             return _mm_or_si128(
334 |                 _mm_and_si128(nmask, unary_minus_operator_s(s)),
335 |                 _mm_andnot_si128(nmask, s));
336 |         }
337 | 
338 |         /*inline int64x2 min_ss(
339 |             const int64x2& s1, const int64x2& s2) noexcept
340 |         {
341 |             // TODO
342 |         }
343 | 
344 |         inline int64x2 max_ss(
345 |             const int64x2& s1, const int64x2& s2) noexcept
346 |         {
347 |             // TODO
348 |         }*/
349 | 
350 |         inline int64x2 mask_ss(
351 |             const bool64x2& conditions,
352 |             const int64x2& values) noexcept
353 |         {
354 |             return _mm_and_si128(conditions, values);
355 |         }
356 | 
357 |         inline int64x2 select_sss(
358 |             const bool64x2& conditions,
359 |             const int64x2& values,
360 |             const int64x2& otherwise) noexcept
361 |         {
362 |             return _mm_or_si128(
363 |                 _mm_and_si128(conditions, values),
364 |                 _mm_andnot_si128(conditions, otherwise));
365 |         }
366 | 
367 |         /*inline bool64x2 less_ss(
368 |             const int64x2& lhs, const int64x2& rhs) noexcept
369 |         {
370 |             // TODO
371 |         }
372 | 
373 |         inline bool64x2 less_equal_ss(
374 |             const int64x2& lhs, const int64x2& rhs) noexcept
375 |         {
376 |             // TODO
377 |         }
378 | 
379 |         inline bool64x2 greater_ss(
380 |             const int64x2& lhs, const int64x2& rhs) noexcept
381 |         {
382 |             // TODO
383 |         }
384 | 
385 |         inline bool64x2 greater_equal_ss(
386 |             const int64x2& lhs, const int64x2& rhs) noexcept
387 |         {
388 |             // TODO
389 |         }*/
390 | 
391 |         inline bool64x2 equal_ss(
392 |             const int64x2& lhs, const int64x2& rhs) noexcept
393 |         {
394 |             const auto cmp = _mm_cmpeq_epi32(lhs, rhs);
395 |             const auto hi = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(3, 3, 1, 1));
396 |             const auto lo = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 2, 0, 0));
397 |             return _mm_and_si128(hi, lo);
398 |         }
399 | 
400 |         inline bool64x2 not_equal_ss(
401 |             const int64x2& lhs, const int64x2& rhs) noexcept
402 |         {
403 |             return _mm_xor_si128(equal_ss(lhs, rhs), int64x2(~0ull));
404 |         }
405 |     }
406 | }
407 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/uint8x16.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <cstdint>
 14 | #include <type_traits>
 15 | 
 16 | #include "../../../simd.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<std::uint8_t, 16>())
 22 |     simd<std::uint8_t, 16>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static uint8x16 explicit_cast(const simd<U, 16>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 std::uint8_t(s.data()[0]),
 32 |                 std::uint8_t(s.data()[1]),
 33 |                 std::uint8_t(s.data()[2]),
 34 |                 std::uint8_t(s.data()[3]),
 35 |                 std::uint8_t(s.data()[4]),
 36 |                 std::uint8_t(s.data()[5]),
 37 |                 std::uint8_t(s.data()[6]),
 38 |                 std::uint8_t(s.data()[7]),
 39 |                 std::uint8_t(s.data()[8]),
 40 |                 std::uint8_t(s.data()[9]),
 41 |                 std::uint8_t(s.data()[10]),
 42 |                 std::uint8_t(s.data()[11]),
 43 |                 std::uint8_t(s.data()[12]),
 44 |                 std::uint8_t(s.data()[13]),
 45 |                 std::uint8_t(s.data()[14]),
 46 |                 std::uint8_t(s.data()[15]),
 47 |             };
 48 |         }
 49 | 
 50 |         inline static uint8x16 explicit_cast(const bool8x16& s) noexcept;
 51 | 
 52 |         inline static uint8x16 explicit_cast(const int8x16& s) noexcept;
 53 | 
 54 |     public:
 55 |         using component_type = std::uint8_t;
 56 | 
 57 |         static constexpr int component_count = 16;
 58 | 
 59 |         static constexpr bool is_accelerated = true;
 60 | 
 61 |         simd() noexcept = default;
 62 | 
 63 |         explicit simd(std::uint8_t x) noexcept
 64 |         :
 65 |             underlying_(_mm_set1_epi8(x))
 66 |         {
 67 |         }
 68 | 
 69 |         template<int M = 16, typename = std::enable_if_t<M == 2>>
 70 |         inline simd(
 71 |             std::uint8_t x, std::uint8_t y) noexcept;
 72 | 
 73 |         template<int M = 16, typename = std::enable_if_t<M == 4>>
 74 |         inline simd(
 75 |             std::uint8_t x, std::uint8_t y,
 76 |             std::uint8_t z, std::uint8_t w) noexcept;
 77 | 
 78 |         template<int M = 16, typename = std::enable_if_t<M == 8>>
 79 |         inline simd(
 80 |             std::uint8_t s0, std::uint8_t s1,
 81 |             std::uint8_t s2, std::uint8_t s3,
 82 |             std::uint8_t s4, std::uint8_t s5,
 83 |             std::uint8_t s6, std::uint8_t s7) noexcept;
 84 | 
 85 |         template<int M = 16, typename = std::enable_if_t<M == 16>>
 86 |         inline simd(
 87 |             std::uint8_t  s0, std::uint8_t  s1,
 88 |             std::uint8_t  s2, std::uint8_t  s3,
 89 |             std::uint8_t  s4, std::uint8_t  s5,
 90 |             std::uint8_t  s6, std::uint8_t  s7,
 91 |             std::uint8_t  s8, std::uint8_t  s9,
 92 |             std::uint8_t s10, std::uint8_t s11,
 93 |             std::uint8_t s12, std::uint8_t s13,
 94 |             std::uint8_t s14, std::uint8_t s15) noexcept
 95 |         :
 96 |             underlying_(_mm_setr_epi8(
 97 |                 s0, s1,  s2,  s3,  s4,  s5,  s6,  s7,
 98 |                 s8, s9, s10, s11, s12, s13, s14, s15))
 99 |         {
100 |         }
101 | 
102 |         template<typename U>
103 |         explicit simd(const simd<U, 16>& s) noexcept
104 |         {
105 |             *this = explicit_cast(s);
106 |         }
107 | 
108 |         simd(__m128i underlying) noexcept
109 |         :
110 |             underlying_(underlying)
111 |         {
112 |         }
113 | 
114 |         operator __m128i() const noexcept
115 |         {
116 |             return underlying_;
117 |         }
118 | 
119 |         static uint8x16 zero() noexcept
120 |         {
121 |             return _mm_setzero_si128();
122 |         }
123 | 
124 |         static uint8x16 load(const std::uint8_t* data) noexcept
125 |         {
126 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
127 |         }
128 | 
129 |         static uint8x16 loadu(const std::uint8_t* data) noexcept
130 |         {
131 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
132 |         }
133 | 
134 |         void store(std::uint8_t* data) const noexcept
135 |         {
136 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
137 |         }
138 | 
139 |         void storeu(std::uint8_t* data) const noexcept
140 |         {
141 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
142 |         }
143 | 
144 |         const std::uint8_t* data() const noexcept
145 |         {
146 |             return reinterpret_cast<const std::uint8_t*>(&underlying_);
147 |         }
148 | 
149 |         std::uint8_t* data() noexcept
150 |         {
151 |             return reinterpret_cast<std::uint8_t*>(&underlying_);
152 |         }
153 |     };
154 | }
155 | 
156 | #include "bool8x16.sse2.hpp"
157 | #include "int8x16.sse2.hpp"
158 | 
159 | namespace tue
160 | {
161 |     inline uint8x16 uint8x16::explicit_cast(const bool8x16& s) noexcept
162 |     {
163 |         return __m128i(s);
164 |     }
165 | 
166 |     inline uint8x16 uint8x16::explicit_cast(const int8x16& s) noexcept
167 |     {
168 |         return __m128i(s);
169 |     }
170 | 
171 |     namespace detail_
172 |     {
173 |         inline uint8x16& pre_increment_operator_s(uint8x16& s) noexcept
174 |         {
175 |             return s = _mm_add_epi8(s, uint8x16(1));
176 |         }
177 | 
178 |         inline uint8x16 post_increment_operator_s(uint8x16& s) noexcept
179 |         {
180 |             const auto result = s;
181 |             s = _mm_add_epi8(s, uint8x16(1));
182 |             return result;
183 |         }
184 | 
185 |         inline uint8x16& pre_decrement_operator_s(uint8x16& s) noexcept
186 |         {
187 |             return s = _mm_sub_epi8(s, uint8x16(1));
188 |         }
189 | 
190 |         inline uint8x16 post_decrement_operator_s(uint8x16& s) noexcept
191 |         {
192 |             const auto result = s;
193 |             s = _mm_sub_epi8(s, uint8x16(1));
194 |             return result;
195 |         }
196 | 
197 |         inline uint8x16 bitwise_not_operator_s(const uint8x16& s) noexcept
198 |         {
199 |             return _mm_xor_si128(s, uint8x16(0xFF));
200 |         }
201 | 
202 |         inline uint8x16 addition_operator_ss(
203 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
204 |         {
205 |             return _mm_add_epi8(lhs, rhs);
206 |         }
207 | 
208 |         inline uint8x16 subtraction_operator_ss(
209 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
210 |         {
211 |             return _mm_sub_epi8(lhs, rhs);
212 |         }
213 | 
214 |         /*inline uint8x16 multiplication_operator_ss(
215 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
216 |         {
217 |             // TODO
218 |         }
219 | 
220 |         inline uint8x16 division_operator_ss(
221 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
222 |         {
223 |             // TODO
224 |         }
225 | 
226 |         inline uint8x16 modulo_operator_ss(
227 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
228 |         {
229 |             // TODO
230 |         }*/
231 | 
232 |         inline uint8x16 bitwise_and_operator_ss(
233 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
234 |         {
235 |             return _mm_and_si128(lhs, rhs);
236 |         }
237 | 
238 |         inline uint8x16 bitwise_or_operator_ss(
239 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
240 |         {
241 |             return _mm_or_si128(lhs, rhs);
242 |         }
243 | 
244 |         inline uint8x16 bitwise_xor_operator_ss(
245 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
246 |         {
247 |             return _mm_xor_si128(lhs, rhs);
248 |         }
249 | 
250 |         /*inline uint8x16 bitwise_shift_left_operator_si(
251 |             const uint8x16& lhs, int rhs) noexcept
252 |         {
253 |             // TODO
254 |         }
255 | 
256 |         inline uint8x16 bitwise_shift_right_operator_si(
257 |             const uint8x16& lhs, int rhs) noexcept
258 |         {
259 |             // TODO
260 |         }*/
261 | 
262 |         inline uint8x16& addition_assignment_operator_ss(
263 |             uint8x16& lhs, const uint8x16& rhs) noexcept
264 |         {
265 |             return lhs = _mm_add_epi8(lhs, rhs);
266 |         }
267 | 
268 |         inline uint8x16& subtraction_assignment_operator_ss(
269 |             uint8x16& lhs, const uint8x16& rhs) noexcept
270 |         {
271 |             return lhs = _mm_sub_epi8(lhs, rhs);
272 |         }
273 | 
274 |         /*inline uint8x16& multiplication_assignment_operator_ss(
275 |             uint8x16& lhs, const uint8x16& rhs) noexcept
276 |         {
277 |             // TODO
278 |         }
279 | 
280 |         inline uint8x16& division_assignment_operator_ss(
281 |             uint8x16& lhs, const uint8x16& rhs) noexcept
282 |         {
283 |             // TODO
284 |         }
285 | 
286 |         inline uint8x16& modulo_assignment_operator_ss(
287 |             uint8x16& lhs, const uint8x16& rhs) noexcept
288 |         {
289 |             // TODO
290 |         }*/
291 | 
292 |         inline uint8x16& bitwise_and_assignment_operator_ss(
293 |             uint8x16& lhs, const uint8x16& rhs) noexcept
294 |         {
295 |             return lhs = _mm_and_si128(lhs, rhs);
296 |         }
297 | 
298 |         inline uint8x16& bitwise_or_assignment_operator_ss(
299 |             uint8x16& lhs, const uint8x16& rhs) noexcept
300 |         {
301 |             return lhs = _mm_or_si128(lhs, rhs);
302 |         }
303 | 
304 |         inline uint8x16& bitwise_xor_assignment_operator_ss(
305 |             uint8x16& lhs, const uint8x16& rhs) noexcept
306 |         {
307 |             return lhs = _mm_xor_si128(lhs, rhs);
308 |         }
309 | 
310 |         /*inline uint8x16& bitwise_shift_left_assignment_operator_si(
311 |             uint8x16& lhs, int rhs) noexcept
312 |         {
313 |             // TODO
314 |         }
315 | 
316 |         inline uint8x16& bitwise_shift_right_assignment_operator_si(
317 |             uint8x16& lhs, int rhs) noexcept
318 |         {
319 |             // TODO
320 |         }*/
321 | 
322 |         inline bool equality_operator_ss(
323 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
324 |         {
325 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
326 |         }
327 | 
328 |         inline bool inequality_operator_ss(
329 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
330 |         {
331 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
332 |         }
333 | 
334 |         inline uint8x16 abs_s(const uint8x16& s) noexcept
335 |         {
336 |             return s;
337 |         }
338 | 
339 |         inline uint8x16 min_ss(
340 |             const uint8x16& s1, const uint8x16& s2) noexcept
341 |         {
342 |             return _mm_min_epu8(s1, s2);
343 |         }
344 | 
345 |         inline uint8x16 max_ss(
346 |             const uint8x16& s1, const uint8x16& s2) noexcept
347 |         {
348 |             return _mm_max_epu8(s1, s2);
349 |         }
350 | 
351 |         inline uint8x16 mask_ss(
352 |             const bool8x16& conditions,
353 |             const uint8x16& values) noexcept
354 |         {
355 |             return _mm_and_si128(conditions, values);
356 |         }
357 | 
358 |         inline uint8x16 select_sss(
359 |             const bool8x16& conditions,
360 |             const uint8x16& values,
361 |             const uint8x16& otherwise) noexcept
362 |         {
363 |             return _mm_or_si128(
364 |                 _mm_and_si128(conditions, values),
365 |                 _mm_andnot_si128(conditions, otherwise));
366 |         }
367 | 
368 |         /*inline bool8x16 less_ss(
369 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
370 |         {
371 |             // TODO
372 |         }
373 | 
374 |         inline bool8x16 less_equal_ss(
375 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
376 |         {
377 |             // TODO
378 |         }
379 | 
380 |         inline bool8x16 greater_ss(
381 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
382 |         {
383 |             // TODO
384 |         }
385 | 
386 |         inline bool8x16 greater_equal_ss(
387 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
388 |         {
389 |             // TODO
390 |         }*/
391 | 
392 |         inline bool8x16 equal_ss(
393 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
394 |         {
395 |             return _mm_cmpeq_epi8(lhs, rhs);
396 |         }
397 | 
398 |         inline bool8x16 not_equal_ss(
399 |             const uint8x16& lhs, const uint8x16& rhs) noexcept
400 |         {
401 |             return _mm_xor_si128(_mm_cmpeq_epi8(lhs, rhs), uint8x16(0xFF));
402 |         }
403 |     }
404 | }
405 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/int16x8.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <cstdint>
 14 | #include <type_traits>
 15 | 
 16 | #include "../../../simd.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<std::int16_t, 8>())
 22 |     simd<std::int16_t, 8>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static int16x8 explicit_cast(const simd<U, 8>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 std::int16_t(s.data()[0]),
 32 |                 std::int16_t(s.data()[1]),
 33 |                 std::int16_t(s.data()[2]),
 34 |                 std::int16_t(s.data()[3]),
 35 |                 std::int16_t(s.data()[4]),
 36 |                 std::int16_t(s.data()[5]),
 37 |                 std::int16_t(s.data()[6]),
 38 |                 std::int16_t(s.data()[7]),
 39 |             };
 40 |         }
 41 | 
 42 |         inline static int16x8 explicit_cast(const bool16x8& s) noexcept;
 43 | 
 44 |         inline static int16x8 explicit_cast(const uint16x8& s) noexcept;
 45 | 
 46 |     public:
 47 |         using component_type = std::int16_t;
 48 | 
 49 |         static constexpr int component_count = 8;
 50 | 
 51 |         static constexpr bool is_accelerated = true;
 52 | 
 53 |         simd() noexcept = default;
 54 | 
 55 |         explicit simd(std::int16_t x) noexcept
 56 |         :
 57 |             underlying_(_mm_set1_epi16(x))
 58 |         {
 59 |         }
 60 | 
 61 |         template<int M = 8, typename = std::enable_if_t<M == 2>>
 62 |         inline simd(
 63 |             std::int16_t x, std::int16_t y) noexcept;
 64 | 
 65 |         template<int M = 8, typename = std::enable_if_t<M == 4>>
 66 |         inline simd(
 67 |             std::int16_t x, std::int16_t y,
 68 |             std::int16_t z, std::int16_t w) noexcept;
 69 | 
 70 |         template<int M = 8, typename = std::enable_if_t<M == 8>>
 71 |         inline simd(
 72 |             std::int16_t s0, std::int16_t s1,
 73 |             std::int16_t s2, std::int16_t s3,
 74 |             std::int16_t s4, std::int16_t s5,
 75 |             std::int16_t s6, std::int16_t s7) noexcept
 76 |         :
 77 |             underlying_(_mm_setr_epi16(
 78 |                 s0, s1, s2, s3, s4, s5, s6, s7))
 79 |         {
 80 |         }
 81 | 
 82 |         template<int M = 8, typename = std::enable_if_t<M == 16>>
 83 |         inline simd(
 84 |             std::int16_t  s0, std::int16_t  s1,
 85 |             std::int16_t  s2, std::int16_t  s3,
 86 |             std::int16_t  s4, std::int16_t  s5,
 87 |             std::int16_t  s6, std::int16_t  s7,
 88 |             std::int16_t  s8, std::int16_t  s9,
 89 |             std::int16_t s10, std::int16_t s11,
 90 |             std::int16_t s12, std::int16_t s13,
 91 |             std::int16_t s14, std::int16_t s15) noexcept;
 92 | 
 93 |         template<typename U>
 94 |         explicit simd(const simd<U, 8>& s) noexcept
 95 |         {
 96 |             *this = explicit_cast(s);
 97 |         }
 98 | 
 99 |         simd(__m128i underlying) noexcept
100 |         :
101 |             underlying_(underlying)
102 |         {
103 |         }
104 | 
105 |         operator __m128i() const noexcept
106 |         {
107 |             return underlying_;
108 |         }
109 | 
110 |         static int16x8 zero() noexcept
111 |         {
112 |             return _mm_setzero_si128();
113 |         }
114 | 
115 |         static int16x8 load(const std::int16_t* data) noexcept
116 |         {
117 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
118 |         }
119 | 
120 |         static int16x8 loadu(const std::int16_t* data) noexcept
121 |         {
122 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
123 |         }
124 | 
125 |         void store(std::int16_t* data) const noexcept
126 |         {
127 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
128 |         }
129 | 
130 |         void storeu(std::int16_t* data) const noexcept
131 |         {
132 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
133 |         }
134 | 
135 |         const std::int16_t* data() const noexcept
136 |         {
137 |             return reinterpret_cast<const std::int16_t*>(&underlying_);
138 |         }
139 | 
140 |         std::int16_t* data() noexcept
141 |         {
142 |             return reinterpret_cast<std::int16_t*>(&underlying_);
143 |         }
144 |     };
145 | }
146 | 
147 | #include "bool16x8.sse2.hpp"
148 | #include "uint16x8.sse2.hpp"
149 | 
150 | namespace tue
151 | {
152 |     inline int16x8 int16x8::explicit_cast(const bool16x8& s) noexcept
153 |     {
154 |         return __m128i(s);
155 |     }
156 | 
157 |     inline int16x8 int16x8::explicit_cast(const uint16x8& s) noexcept
158 |     {
159 |         return __m128i(s);
160 |     }
161 | 
162 |     namespace detail_
163 |     {
164 |         inline int16x8 unary_plus_operator_s(const int16x8& s) noexcept
165 |         {
166 |             return s;
167 |         }
168 | 
169 |         inline int16x8& pre_increment_operator_s(int16x8& s) noexcept
170 |         {
171 |             return s = _mm_add_epi16(s, int16x8(1));
172 |         }
173 | 
174 |         inline int16x8 post_increment_operator_s(int16x8& s) noexcept
175 |         {
176 |             const auto result = s;
177 |             s = _mm_add_epi16(s, int16x8(1));
178 |             return result;
179 |         }
180 | 
181 |         inline int16x8 unary_minus_operator_s(const int16x8& s) noexcept
182 |         {
183 |             return _mm_sub_epi16(_mm_setzero_si128(), s);
184 |         }
185 | 
186 |         inline int16x8& pre_decrement_operator_s(int16x8& s) noexcept
187 |         {
188 |             return s = _mm_sub_epi16(s, int16x8(1));
189 |         }
190 | 
191 |         inline int16x8 post_decrement_operator_s(int16x8& s) noexcept
192 |         {
193 |             const auto result = s;
194 |             s = _mm_sub_epi16(s, int16x8(1));
195 |             return result;
196 |         }
197 | 
198 |         inline int16x8 bitwise_not_operator_s(const int16x8& s) noexcept
199 |         {
200 |             return _mm_xor_si128(s, int16x8(0xFFFFu));
201 |         }
202 | 
203 |         inline int16x8 addition_operator_ss(
204 |             const int16x8& lhs, const int16x8& rhs) noexcept
205 |         {
206 |             return _mm_add_epi16(lhs, rhs);
207 |         }
208 | 
209 |         inline int16x8 subtraction_operator_ss(
210 |             const int16x8& lhs, const int16x8& rhs) noexcept
211 |         {
212 |             return _mm_sub_epi16(lhs, rhs);
213 |         }
214 | 
215 |         /*inline int16x8 multiplication_operator_ss(
216 |             const int16x8& lhs, const int16x8& rhs) noexcept
217 |         {
218 |             // TODO
219 |         }
220 | 
221 |         inline int16x8 division_operator_ss(
222 |             const int16x8& lhs, const int16x8& rhs) noexcept
223 |         {
224 |             // TODO
225 |         }
226 | 
227 |         inline int16x8 modulo_operator_ss(
228 |             const int16x8& lhs, const int16x8& rhs) noexcept
229 |         {
230 |             // TODO
231 |         }*/
232 | 
233 |         inline int16x8 bitwise_and_operator_ss(
234 |             const int16x8& lhs, const int16x8& rhs) noexcept
235 |         {
236 |             return _mm_and_si128(lhs, rhs);
237 |         }
238 | 
239 |         inline int16x8 bitwise_or_operator_ss(
240 |             const int16x8& lhs, const int16x8& rhs) noexcept
241 |         {
242 |             return _mm_or_si128(lhs, rhs);
243 |         }
244 | 
245 |         inline int16x8 bitwise_xor_operator_ss(
246 |             const int16x8& lhs, const int16x8& rhs) noexcept
247 |         {
248 |             return _mm_xor_si128(lhs, rhs);
249 |         }
250 | 
251 |         inline int16x8 bitwise_shift_left_operator_si(
252 |             const int16x8& lhs, int rhs) noexcept
253 |         {
254 |             return _mm_slli_epi16(lhs, rhs);
255 |         }
256 | 
257 |         inline int16x8 bitwise_shift_right_operator_si(
258 |             const int16x8& lhs, int rhs) noexcept
259 |         {
260 |             return _mm_srli_epi16(lhs, rhs);
261 |         }
262 | 
263 |         inline int16x8& addition_assignment_operator_ss(
264 |             int16x8& lhs, const int16x8& rhs) noexcept
265 |         {
266 |             return lhs = _mm_add_epi16(lhs, rhs);
267 |         }
268 | 
269 |         inline int16x8& subtraction_assignment_operator_ss(
270 |             int16x8& lhs, const int16x8& rhs) noexcept
271 |         {
272 |             return lhs = _mm_sub_epi16(lhs, rhs);
273 |         }
274 | 
275 |         /*inline int16x8& multiplication_assignment_operator_ss(
276 |             int16x8& lhs, const int16x8& rhs) noexcept
277 |         {
278 |             // TODO
279 |         }
280 | 
281 |         inline int16x8& division_assignment_operator_ss(
282 |             int16x8& lhs, const int16x8& rhs) noexcept
283 |         {
284 |             // TODO
285 |         }
286 | 
287 |         inline int16x8& modulo_assignment_operator_ss(
288 |             int16x8& lhs, const int16x8& rhs) noexcept
289 |         {
290 |             // TODO
291 |         }*/
292 | 
293 |         inline int16x8& bitwise_and_assignment_operator_ss(
294 |             int16x8& lhs, const int16x8& rhs) noexcept
295 |         {
296 |             return lhs = _mm_and_si128(lhs, rhs);
297 |         }
298 | 
299 |         inline int16x8& bitwise_or_assignment_operator_ss(
300 |             int16x8& lhs, const int16x8& rhs) noexcept
301 |         {
302 |             return lhs = _mm_or_si128(lhs, rhs);
303 |         }
304 | 
305 |         inline int16x8& bitwise_xor_assignment_operator_ss(
306 |             int16x8& lhs, const int16x8& rhs) noexcept
307 |         {
308 |             return lhs = _mm_xor_si128(lhs, rhs);
309 |         }
310 | 
311 |         inline int16x8& bitwise_shift_left_assignment_operator_si(
312 |             int16x8& lhs, int rhs) noexcept
313 |         {
314 |             return lhs = _mm_slli_epi16(lhs, rhs);
315 |         }
316 | 
317 |         inline int16x8& bitwise_shift_right_assignment_operator_si(
318 |             int16x8& lhs, int rhs) noexcept
319 |         {
320 |             return lhs = _mm_srli_epi16(lhs, rhs);
321 |         }
322 | 
323 |         inline bool equality_operator_ss(
324 |             const int16x8& lhs, const int16x8& rhs) noexcept
325 |         {
326 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
327 |         }
328 | 
329 |         inline bool inequality_operator_ss(
330 |             const int16x8& lhs, const int16x8& rhs) noexcept
331 |         {
332 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
333 |         }
334 | 
335 |         inline int16x8 abs_s(const int16x8& s) noexcept
336 |         {
337 |             const auto nmask = _mm_cmplt_epi16(s, _mm_setzero_si128());
338 |             return _mm_or_si128(
339 |                 _mm_and_si128(nmask, unary_minus_operator_s(s)),
340 |                 _mm_andnot_si128(nmask, s));
341 |         }
342 | 
343 |         inline int16x8 min_ss(
344 |             const int16x8& s1, const int16x8& s2) noexcept
345 |         {
346 |             return _mm_min_epi16(s1, s2);
347 |         }
348 | 
349 |         inline int16x8 max_ss(
350 |             const int16x8& s1, const int16x8& s2) noexcept
351 |         {
352 |             return _mm_max_epi16(s1, s2);
353 |         }
354 | 
355 |         inline int16x8 mask_ss(
356 |             const bool16x8& conditions,
357 |             const int16x8& values) noexcept
358 |         {
359 |             return _mm_and_si128(conditions, values);
360 |         }
361 | 
362 |         inline int16x8 select_sss(
363 |             const bool16x8& conditions,
364 |             const int16x8& values,
365 |             const int16x8& otherwise) noexcept
366 |         {
367 |             return _mm_or_si128(
368 |                 _mm_and_si128(conditions, values),
369 |                 _mm_andnot_si128(conditions, otherwise));
370 |         }
371 | 
372 |         inline bool16x8 less_ss(
373 |             const int16x8& lhs, const int16x8& rhs) noexcept
374 |         {
375 |             return _mm_cmplt_epi16(lhs, rhs);
376 |         }
377 | 
378 |         inline bool16x8 less_equal_ss(
379 |             const int16x8& lhs, const int16x8& rhs) noexcept
380 |         {
381 |             return _mm_xor_si128(_mm_cmpgt_epi16(lhs, rhs), int16x8(0xFFFFu));
382 |         }
383 | 
384 |         inline bool16x8 greater_ss(
385 |             const int16x8& lhs, const int16x8& rhs) noexcept
386 |         {
387 |             return _mm_cmpgt_epi16(lhs, rhs);
388 |         }
389 | 
390 |         inline bool16x8 greater_equal_ss(
391 |             const int16x8& lhs, const int16x8& rhs) noexcept
392 |         {
393 |             return _mm_xor_si128(_mm_cmplt_epi16(lhs, rhs), int16x8(0xFFFFu));
394 |         }
395 | 
396 |         inline bool16x8 equal_ss(
397 |             const int16x8& lhs, const int16x8& rhs) noexcept
398 |         {
399 |             return _mm_cmpeq_epi16(lhs, rhs);
400 |         }
401 | 
402 |         inline bool16x8 not_equal_ss(
403 |             const int16x8& lhs, const int16x8& rhs) noexcept
404 |         {
405 |             return _mm_xor_si128(_mm_cmpeq_epi16(lhs, rhs), int16x8(0xFFFFu));
406 |         }
407 |     }
408 | }
409 | 


--------------------------------------------------------------------------------
/include/tue/detail_/simd/sse2/int32x4.sse2.hpp:
--------------------------------------------------------------------------------
  1 | //                Copyright Jo Bates 2015.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | //
  6 | //     Please report any bugs, typos, or suggestions to
  7 | //         https://github.com/Cincinesh/tue/issues
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <emmintrin.h>
 12 | 
 13 | #include <cstdint>
 14 | #include <type_traits>
 15 | 
 16 | #include "../../../simd.hpp"
 17 | 
 18 | namespace tue
 19 | {
 20 |     template<>
 21 |     class alignas(tue::detail_::alignof_simd<std::int32_t, 4>())
 22 |     simd<std::int32_t, 4>
 23 |     {
 24 |         __m128i underlying_;
 25 | 
 26 |     private:
 27 |         template<typename U>
 28 |         static int32x4 explicit_cast(const simd<U, 4>& s) noexcept
 29 |         {
 30 |             return {
 31 |                 std::int32_t(s.data()[0]),
 32 |                 std::int32_t(s.data()[1]),
 33 |                 std::int32_t(s.data()[2]),
 34 |                 std::int32_t(s.data()[3]),
 35 |             };
 36 |         }
 37 | 
 38 |         inline static int32x4 explicit_cast(const bool32x4& s) noexcept;
 39 | 
 40 |         inline static int32x4 explicit_cast(const float32x4& s) noexcept;
 41 | 
 42 |         inline static int32x4 explicit_cast(const uint32x4& s) noexcept;
 43 | 
 44 |     public:
 45 |         using component_type = std::int32_t;
 46 | 
 47 |         static constexpr int component_count = 4;
 48 | 
 49 |         static constexpr bool is_accelerated = true;
 50 | 
 51 |         simd() noexcept = default;
 52 | 
 53 |         explicit simd(std::int32_t x) noexcept
 54 |         :
 55 |             underlying_(_mm_set1_epi32(x))
 56 |         {
 57 |         }
 58 | 
 59 |         template<int M = 4, typename = std::enable_if_t<M == 2>>
 60 |         inline simd(
 61 |             std::int32_t x, std::int32_t y) noexcept;
 62 | 
 63 |         template<int M = 4, typename = std::enable_if_t<M == 4>>
 64 |         inline simd(
 65 |             std::int32_t x, std::int32_t y,
 66 |             std::int32_t z, std::int32_t w) noexcept
 67 |         :
 68 |             underlying_(_mm_setr_epi32(x, y, z, w))
 69 |         {
 70 |         }
 71 | 
 72 |         template<int M = 4, typename = std::enable_if_t<M == 8>>
 73 |         inline simd(
 74 |             std::int32_t s0, std::int32_t s1,
 75 |             std::int32_t s2, std::int32_t s3,
 76 |             std::int32_t s4, std::int32_t s5,
 77 |             std::int32_t s6, std::int32_t s7) noexcept;
 78 | 
 79 |         template<int M = 4, typename = std::enable_if_t<M == 16>>
 80 |         inline simd(
 81 |             std::int32_t  s0, std::int32_t  s1,
 82 |             std::int32_t  s2, std::int32_t  s3,
 83 |             std::int32_t  s4, std::int32_t  s5,
 84 |             std::int32_t  s6, std::int32_t  s7,
 85 |             std::int32_t  s8, std::int32_t  s9,
 86 |             std::int32_t s10, std::int32_t s11,
 87 |             std::int32_t s12, std::int32_t s13,
 88 |             std::int32_t s14, std::int32_t s15) noexcept;
 89 | 
 90 |         template<typename U>
 91 |         explicit simd(const simd<U, 4>& s) noexcept
 92 |         {
 93 |             *this = explicit_cast(s);
 94 |         }
 95 | 
 96 |         simd(__m128i underlying) noexcept
 97 |         :
 98 |             underlying_(underlying)
 99 |         {
100 |         }
101 | 
102 |         operator __m128i() const noexcept
103 |         {
104 |             return underlying_;
105 |         }
106 | 
107 |         static int32x4 zero() noexcept
108 |         {
109 |             return _mm_setzero_si128();
110 |         }
111 | 
112 |         static int32x4 load(const std::int32_t* data) noexcept
113 |         {
114 |             return _mm_load_si128(reinterpret_cast<const __m128i*>(data));
115 |         }
116 | 
117 |         static int32x4 loadu(const std::int32_t* data) noexcept
118 |         {
119 |             return _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
120 |         }
121 | 
122 |         void store(std::int32_t* data) const noexcept
123 |         {
124 |             _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_);
125 |         }
126 | 
127 |         void storeu(std::int32_t* data) const noexcept
128 |         {
129 |             _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_);
130 |         }
131 | 
132 |         const std::int32_t* data() const noexcept
133 |         {
134 |             return reinterpret_cast<const std::int32_t*>(&underlying_);
135 |         }
136 | 
137 |         std::int32_t* data() noexcept
138 |         {
139 |             return reinterpret_cast<std::int32_t*>(&underlying_);
140 |         }
141 |     };
142 | }
143 | 
144 | #include "../sse/bool32x4.sse.hpp"
145 | #include "../sse/float32x4.sse.hpp"
146 | #include "uint32x4.sse2.hpp"
147 | 
148 | namespace tue
149 | {
150 |     inline int32x4 int32x4::explicit_cast(const bool32x4& s) noexcept
151 |     {
152 |         return __m128i(s);
153 |     }
154 | 
155 |     inline int32x4 int32x4::explicit_cast(const float32x4& s) noexcept
156 |     {
157 |         return _mm_cvtps_epi32(s);
158 |     }
159 | 
160 |     inline int32x4 int32x4::explicit_cast(const uint32x4& s) noexcept
161 |     {
162 |         return __m128i(s);
163 |     }
164 | 
165 |     namespace detail_
166 |     {
167 |         inline int32x4 unary_plus_operator_s(const int32x4& s) noexcept
168 |         {
169 |             return s;
170 |         }
171 | 
172 |         inline int32x4& pre_increment_operator_s(int32x4& s) noexcept
173 |         {
174 |             return s = _mm_add_epi32(s, int32x4(1));
175 |         }
176 | 
177 |         inline int32x4 post_increment_operator_s(int32x4& s) noexcept
178 |         {
179 |             const auto result = s;
180 |             s = _mm_add_epi32(s, int32x4(1));
181 |             return result;
182 |         }
183 | 
184 |         inline int32x4 unary_minus_operator_s(const int32x4& s) noexcept
185 |         {
186 |             return _mm_sub_epi32(_mm_setzero_si128(), s);
187 |         }
188 | 
189 |         inline int32x4& pre_decrement_operator_s(int32x4& s) noexcept
190 |         {
191 |             return s = _mm_sub_epi32(s, int32x4(1));
192 |         }
193 | 
194 |         inline int32x4 post_decrement_operator_s(int32x4& s) noexcept
195 |         {
196 |             const auto result = s;
197 |             s = _mm_sub_epi32(s, int32x4(1));
198 |             return result;
199 |         }
200 | 
201 |         inline int32x4 bitwise_not_operator_s(const int32x4& s) noexcept
202 |         {
203 |             return _mm_xor_si128(s, int32x4(0xFFFFFFFF));
204 |         }
205 | 
206 |         inline int32x4 addition_operator_ss(
207 |             const int32x4& lhs, const int32x4& rhs) noexcept
208 |         {
209 |             return _mm_add_epi32(lhs, rhs);
210 |         }
211 | 
212 |         inline int32x4 subtraction_operator_ss(
213 |             const int32x4& lhs, const int32x4& rhs) noexcept
214 |         {
215 |             return _mm_sub_epi32(lhs, rhs);
216 |         }
217 | 
218 |         /*inline int32x4 multiplication_operator_ss(
219 |             const int32x4& lhs, const int32x4& rhs) noexcept
220 |         {
221 |             // TODO
222 |         }
223 | 
224 |         inline int32x4 division_operator_ss(
225 |             const int32x4& lhs, const int32x4& rhs) noexcept
226 |         {
227 |             // TODO
228 |         }
229 | 
230 |         inline int32x4 modulo_operator_ss(
231 |             const int32x4& lhs, const int32x4& rhs) noexcept
232 |         {
233 |             // TODO
234 |         }*/
235 | 
236 |         inline int32x4 bitwise_and_operator_ss(
237 |             const int32x4& lhs, const int32x4& rhs) noexcept
238 |         {
239 |             return _mm_and_si128(lhs, rhs);
240 |         }
241 | 
242 |         inline int32x4 bitwise_or_operator_ss(
243 |             const int32x4& lhs, const int32x4& rhs) noexcept
244 |         {
245 |             return _mm_or_si128(lhs, rhs);
246 |         }
247 | 
248 |         inline int32x4 bitwise_xor_operator_ss(
249 |             const int32x4& lhs, const int32x4& rhs) noexcept
250 |         {
251 |             return _mm_xor_si128(lhs, rhs);
252 |         }
253 | 
254 |         inline int32x4 bitwise_shift_left_operator_si(
255 |             const int32x4& lhs, int rhs) noexcept
256 |         {
257 |             return _mm_slli_epi32(lhs, rhs);
258 |         }
259 | 
260 |         inline int32x4 bitwise_shift_right_operator_si(
261 |             const int32x4& lhs, int rhs) noexcept
262 |         {
263 |             return _mm_srli_epi32(lhs, rhs);
264 |         }
265 | 
266 |         inline int32x4& addition_assignment_operator_ss(
267 |             int32x4& lhs, const int32x4& rhs) noexcept
268 |         {
269 |             return lhs = _mm_add_epi32(lhs, rhs);
270 |         }
271 | 
272 |         inline int32x4& subtraction_assignment_operator_ss(
273 |             int32x4& lhs, const int32x4& rhs) noexcept
274 |         {
275 |             return lhs = _mm_sub_epi32(lhs, rhs);
276 |         }
277 | 
278 |         /*inline int32x4& multiplication_assignment_operator_ss(
279 |             int32x4& lhs, const int32x4& rhs) noexcept
280 |         {
281 |             // TODO
282 |         }
283 | 
284 |         inline int32x4& division_assignment_operator_ss(
285 |             int32x4& lhs, const int32x4& rhs) noexcept
286 |         {
287 |             // TODO
288 |         }
289 | 
290 |         inline int32x4& modulo_assignment_operator_ss(
291 |             int32x4& lhs, const int32x4& rhs) noexcept
292 |         {
293 |             // TODO
294 |         }*/
295 | 
296 |         inline int32x4& bitwise_and_assignment_operator_ss(
297 |             int32x4& lhs, const int32x4& rhs) noexcept
298 |         {
299 |             return lhs = _mm_and_si128(lhs, rhs);
300 |         }
301 | 
302 |         inline int32x4& bitwise_or_assignment_operator_ss(
303 |             int32x4& lhs, const int32x4& rhs) noexcept
304 |         {
305 |             return lhs = _mm_or_si128(lhs, rhs);
306 |         }
307 | 
308 |         inline int32x4& bitwise_xor_assignment_operator_ss(
309 |             int32x4& lhs, const int32x4& rhs) noexcept
310 |         {
311 |             return lhs = _mm_xor_si128(lhs, rhs);
312 |         }
313 | 
314 |         inline int32x4& bitwise_shift_left_assignment_operator_si(
315 |             int32x4& lhs, int rhs) noexcept
316 |         {
317 |             return lhs = _mm_slli_epi32(lhs, rhs);
318 |         }
319 | 
320 |         inline int32x4& bitwise_shift_right_assignment_operator_si(
321 |             int32x4& lhs, int rhs) noexcept
322 |         {
323 |             return lhs = _mm_srli_epi32(lhs, rhs);
324 |         }
325 | 
326 |         inline bool equality_operator_ss(
327 |             const int32x4& lhs, const int32x4& rhs) noexcept
328 |         {
329 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF;
330 |         }
331 | 
332 |         inline bool inequality_operator_ss(
333 |             const int32x4& lhs, const int32x4& rhs) noexcept
334 |         {
335 |             return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF;
336 |         }
337 | 
338 |         inline int32x4 abs_s(const int32x4& s) noexcept
339 |         {
340 |             const auto nmask = _mm_cmplt_epi32(s, _mm_setzero_si128());
341 |             return _mm_or_si128(
342 |                 _mm_and_si128(nmask, unary_minus_operator_s(s)),
343 |                 _mm_andnot_si128(nmask, s));
344 |         }
345 | 
346 |         /*inline int32x4 min_ss(
347 |             const int32x4& s1, const int32x4& s2) noexcept
348 |         {
349 |             // TODO
350 |         }
351 | 
352 |         inline int32x4 max_ss(
353 |             const int32x4& s1, const int32x4& s2) noexcept
354 |         {
355 |             // TODO
356 |         }*/
357 | 
358 |         inline int32x4 mask_ss(
359 |             const bool32x4& conditions,
360 |             const int32x4& values) noexcept
361 |         {
362 |             return _mm_and_si128(conditions, values);
363 |         }
364 | 
365 |         inline int32x4 select_sss(
366 |             const bool32x4& conditions,
367 |             const int32x4& values,
368 |             const int32x4& otherwise) noexcept
369 |         {
370 |             return _mm_or_si128(
371 |                 _mm_and_si128(conditions, values),
372 |                 _mm_andnot_si128(conditions, otherwise));
373 |         }
374 | 
375 |         inline bool32x4 less_ss(
376 |             const int32x4& lhs, const int32x4& rhs) noexcept
377 |         {
378 |             return _mm_cmplt_epi32(lhs, rhs);
379 |         }
380 | 
381 |         inline bool32x4 less_equal_ss(
382 |             const int32x4& lhs, const int32x4& rhs) noexcept
383 |         {
384 |             return _mm_xor_si128(
385 |                 _mm_cmpgt_epi32(lhs, rhs), int32x4(0xFFFFFFFF));
386 |         }
387 | 
388 |         inline bool32x4 greater_ss(
389 |             const int32x4& lhs, const int32x4& rhs) noexcept
390 |         {
391 |             return _mm_cmpgt_epi32(lhs, rhs);
392 |         }
393 | 
394 |         inline bool32x4 greater_equal_ss(
395 |             const int32x4& lhs, const int32x4& rhs) noexcept
396 |         {
397 |             return _mm_xor_si128(
398 |                 _mm_cmplt_epi32(lhs, rhs), int32x4(0xFFFFFFFF));
399 |         }
400 | 
401 |         inline bool32x4 equal_ss(
402 |             const int32x4& lhs, const int32x4& rhs) noexcept
403 |         {
404 |             return _mm_cmpeq_epi32(lhs, rhs);
405 |         }
406 | 
407 |         inline bool32x4 not_equal_ss(
408 |             const int32x4& lhs, const int32x4& rhs) noexcept
409 |         {
410 |             return _mm_xor_si128(
411 |                 _mm_cmpeq_epi32(lhs, rhs), int32x4(0xFFFFFFFF));
412 |         }
413 |     }
414 | }
415 | 


--------------------------------------------------------------------------------