├── .gitattributes ├── .gitignore ├── .gitmodules ├── docs ├── README.md ├── index.md └── DoxygenLayout.xml ├── include └── tue │ ├── unused.hpp │ ├── detail_ │ ├── simd_support.hpp │ ├── is_floating_point_simd_component.hpp │ ├── simd_specializations.hpp │ ├── is_sized_bool.hpp │ ├── is_integral_simd_component.hpp │ ├── is_arithmetic_simd_component.hpp │ ├── is_simd_component.hpp │ ├── is_vec_component.hpp │ ├── simd │ │ ├── sse2 │ │ │ ├── bool64x2.sse2.hpp │ │ │ ├── bool16x8.sse2.hpp │ │ │ ├── bool8x16.sse2.hpp │ │ │ ├── uint64x2.sse2.hpp │ │ │ ├── uint16x8.sse2.hpp │ │ │ ├── uint32x4.sse2.hpp │ │ │ ├── int64x2.sse2.hpp │ │ │ ├── uint8x16.sse2.hpp │ │ │ ├── int16x8.sse2.hpp │ │ │ └── int32x4.sse2.hpp │ │ └── sse │ │ │ └── bool32x4.sse.hpp │ └── matmult.hpp │ ├── nocopy_cast.hpp │ └── sized_bool.hpp ├── tests ├── tue.tests.hpp ├── unused.tests.cpp ├── nocopy_cast.tests.cpp ├── vec.tests.cpp ├── math.tests.cpp ├── sized_bool.tests.cpp ├── quat.tests.cpp └── transform.tests.cpp ├── LICENSE_1_0.txt ├── CMakeLists.txt └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | docs/html 3 | *.user 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/mon"] 2 | path = lib/mon 3 | url = https://github.com/Cincinesh/mon.git 4 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | Documentation 2 | ------------- 3 | Tuesday documentation can be generated with 4 | [Doxygen](http://www.stack.nl/~dimitri/doxygen/) using the `Doxyfile` at the 5 | root of this project. A copy is also published at: 6 | 7 | http://cincinesh.github.io/tue/master/docs/index.html 8 | 9 | License 10 | ------- 11 | Copyright Jo Bates 2015. 12 | 13 | Distributed under the Boost Software License, Version 1.0. 14 | 15 | See accompanying file [LICENSE_1_0.txt](LICENSE_1_0.txt) or copy at 16 | http://www.boost.org/LICENSE_1_0.txt. 17 | 18 | Bug Reporting 19 | ------------- 20 | Please report any bugs, typos, or suggestions to 21 | https://github.com/Cincinesh/tue/issues. 22 | -------------------------------------------------------------------------------- /include/tue/unused.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | namespace tue 12 | { 13 | /*! 14 | * \defgroup unused_hpp 15 | * 16 | * \brief The `unused()` function template. 17 | * 18 | * @{ 19 | */ 20 | 21 | /*! 22 | * \brief Suppresses unused variable warnings. 23 | * 24 | * \tparam Args The parameter types with reference qualifiers removed. 25 | */ 26 | template 27 | inline void unused(Args&&...) noexcept 28 | { 29 | } 30 | 31 | /*!@}*/ 32 | } 33 | -------------------------------------------------------------------------------- /tests/tue.tests.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #ifdef _MSC_VER 17 | #define CONST_OR_CONSTEXPR const 18 | #else 19 | #define CONST_OR_CONSTEXPR constexpr 20 | #endif 21 | 22 | namespace 23 | { 24 | template 25 | bool nearly_equal(T actual, T expected) noexcept 26 | { 27 | return actual == expected 28 | || std::abs(actual - expected) < std::abs(expected * 0.0003f) 29 | || std::abs(expected) == std::numeric_limits::infinity(); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /include/tue/detail_/simd_support.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | /*! 12 | * \addtogroup simd_hpp 13 | * @{ 14 | */ 15 | #if defined(__SSE__) \ 16 | || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) \ 17 | || defined(_M_X64) 18 | /*! 19 | * \brief Defined if the current compiler configuration supports SSE 20 | * intrinsics. 21 | */ 22 | #define TUE_SSE 23 | #endif 24 | 25 | #if defined(__SSE2__) \ 26 | || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) \ 27 | || defined(_M_X64) 28 | /*! 29 | * \brief Defined if the current compiler configuration supports SSE2 30 | * intrinsics. 31 | */ 32 | #define TUE_SSE2 33 | #endif 34 | 35 | /*!@}*/ 36 | -------------------------------------------------------------------------------- /include/tue/detail_/is_floating_point_simd_component.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace tue 14 | { 15 | template 16 | struct is_floating_point_simd_component 17 | : 18 | public std::integral_constant 19 | { 20 | using std::integral_constant::integral_constant; 21 | }; 22 | 23 | template<> 24 | struct is_floating_point_simd_component 25 | : 26 | public std::integral_constant 27 | { 28 | using std::integral_constant::integral_constant; 29 | }; 30 | 31 | template<> 32 | struct is_floating_point_simd_component 33 | : 34 | public std::integral_constant 35 | { 36 | using std::integral_constant::integral_constant; 37 | }; 38 | } 39 | -------------------------------------------------------------------------------- /tests/unused.tests.cpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #include 10 | #include "tue.tests.hpp" 11 | 12 | #include 13 | 14 | namespace 15 | { 16 | using namespace tue; 17 | 18 | struct test_struct 19 | { 20 | mutable bool was_copied; 21 | 22 | test_struct() noexcept : 23 | was_copied(false) 24 | { 25 | } 26 | 27 | test_struct(const test_struct& other) noexcept : 28 | was_copied(false) 29 | { 30 | other.was_copied = true; 31 | } 32 | 33 | test_struct(test_struct&& other) noexcept : 34 | was_copied(false) 35 | { 36 | other.was_copied = true; 37 | } 38 | }; 39 | 40 | TEST_CASE(unused) 41 | { 42 | test_struct x; 43 | test_struct y; 44 | const test_struct z; 45 | tue::unused(x, std::move(y), z); 46 | test_assert(x.was_copied == false); 47 | test_assert(y.was_copied == false); 48 | test_assert(z.was_copied == false); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /LICENSE_1_0.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /include/tue/detail_/simd_specializations.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include "../simd.hpp" 12 | 13 | // SSE 14 | #ifdef TUE_SSE 15 | #include 16 | 17 | namespace tue 18 | { 19 | namespace detail_ 20 | { 21 | inline float binary_float(std::uint32_t x) noexcept 22 | { 23 | return reinterpret_cast(x); 24 | } 25 | } 26 | } 27 | 28 | #include "simd/sse/bool32x4.sse.hpp" 29 | #include "simd/sse/float32x4.sse.hpp" 30 | 31 | #ifdef TUE_SSE2 32 | 33 | namespace tue 34 | { 35 | namespace detail_ 36 | { 37 | inline double binary_double(std::uint64_t x) noexcept 38 | { 39 | return reinterpret_cast(x); 40 | } 41 | } 42 | } 43 | 44 | #include "simd/sse2/bool8x16.sse2.hpp" 45 | #include "simd/sse2/bool16x8.sse2.hpp" 46 | #include "simd/sse2/bool64x2.sse2.hpp" 47 | #include "simd/sse2/float64x2.sse2.hpp" 48 | #include "simd/sse2/int8x16.sse2.hpp" 49 | #include "simd/sse2/int16x8.sse2.hpp" 50 | #include "simd/sse2/int32x4.sse2.hpp" 51 | #include "simd/sse2/int64x2.sse2.hpp" 52 | #include "simd/sse2/uint8x16.sse2.hpp" 53 | #include "simd/sse2/uint16x8.sse2.hpp" 54 | #include "simd/sse2/uint32x4.sse2.hpp" 55 | #include "simd/sse2/uint64x2.sse2.hpp" 56 | 57 | #endif 58 | #endif 59 | -------------------------------------------------------------------------------- /include/tue/detail_/is_sized_bool.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace tue 15 | { 16 | enum bool8 : std::uint8_t; 17 | enum bool16 : std::uint16_t; 18 | enum bool32 : std::uint32_t; 19 | enum bool64 : std::uint64_t; 20 | 21 | template 22 | struct is_sized_bool : public std::integral_constant 23 | { 24 | using std::integral_constant::integral_constant; 25 | }; 26 | 27 | template<> 28 | struct is_sized_bool : public std::integral_constant 29 | { 30 | using std::integral_constant::integral_constant; 31 | }; 32 | 33 | template<> 34 | struct is_sized_bool : public std::integral_constant 35 | { 36 | using std::integral_constant::integral_constant; 37 | }; 38 | 39 | template<> 40 | struct is_sized_bool : public std::integral_constant 41 | { 42 | using std::integral_constant::integral_constant; 43 | }; 44 | 45 | template<> 46 | struct is_sized_bool : public std::integral_constant 47 | { 48 | using std::integral_constant::integral_constant; 49 | }; 50 | } 51 | -------------------------------------------------------------------------------- /tests/nocopy_cast.tests.cpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #include 10 | #include "tue.tests.hpp" 11 | 12 | #include 13 | 14 | namespace 15 | { 16 | using namespace tue; 17 | 18 | struct A 19 | { 20 | mutable bool was_copied; 21 | 22 | constexpr A() noexcept : 23 | was_copied(false) 24 | { 25 | } 26 | 27 | A(const A& other) noexcept : 28 | was_copied(false) 29 | { 30 | other.was_copied = true; 31 | } 32 | }; 33 | 34 | struct B 35 | { 36 | B() = delete; 37 | 38 | constexpr explicit B(const A&) noexcept 39 | { 40 | } 41 | }; 42 | 43 | TEST_CASE(nocopy_cast_same_type) 44 | { 45 | const A a1; 46 | const A& a2 = nocopy_cast(a1); 47 | test_assert(&a2 == &a1); 48 | test_assert(!a1.was_copied); 49 | 50 | CONST_OR_CONSTEXPR A a3; 51 | CONST_OR_CONSTEXPR B b1(a3); 52 | CONST_OR_CONSTEXPR B b2 = nocopy_cast(b1); 53 | unused(b2); 54 | } 55 | 56 | TEST_CASE(nocopy_cast_different_type) 57 | { 58 | CONST_OR_CONSTEXPR A a; 59 | CONST_OR_CONSTEXPR B b = nocopy_cast(a); 60 | test_assert(!a.was_copied); 61 | unused(b); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /tests/vec.tests.cpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #include 10 | #include "tue.tests.hpp" 11 | 12 | #include 13 | 14 | namespace 15 | { 16 | using namespace tue; 17 | 18 | TEST_CASE(is_simd_component) 19 | { 20 | test_assert(is_vec_component::value == false); 21 | test_assert(is_vec_component::value == true); 22 | test_assert(is_vec_component::value == true); 23 | test_assert(is_vec_component::value == true); 24 | test_assert(is_vec_component::value == true); 25 | test_assert(is_vec_component::value == true); 26 | test_assert(is_vec_component::value == true); 27 | test_assert(is_vec_component::value == true); 28 | test_assert(is_vec_component::value == true); 29 | test_assert(is_vec_component::value == true); 30 | test_assert(is_vec_component::value == true); 31 | test_assert(is_vec_component::value == true); 32 | test_assert(is_vec_component::value == true); 33 | test_assert(is_vec_component::value == true); 34 | test_assert(is_vec_component::value == true); 35 | test_assert((is_vec_component>::value == true)); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /include/tue/nocopy_cast.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | /*! 14 | * \defgroup nocopy_cast_hpp 15 | * 16 | * \brief The `nocopy_cast()` function template. 17 | */ 18 | namespace tue 19 | { 20 | namespace detail_ 21 | { 22 | template 23 | inline constexpr std::enable_if_t::value, const T&> 24 | nocopy_cast(const U& x) noexcept 25 | { 26 | return x; 27 | } 28 | 29 | template 30 | inline constexpr std::enable_if_t::value, T> 31 | nocopy_cast(const U& x) noexcept 32 | { 33 | return static_cast(x); 34 | } 35 | } 36 | 37 | /*! 38 | * \addtogroup nocopy_cast_hpp 39 | * @{ 40 | */ 41 | 42 | /*! 43 | * \brief Casts `x` to type `T`, avoiding a copy if possible. 44 | * 45 | * \tparam T The return type. 46 | * \tparam U The type of parameter `x`. 47 | * 48 | * \param x The value to cast. 49 | * 50 | * \return A const reference to `x` if `x` is already of type `T`. 51 | * `static_cast(x)` otherwise. 52 | */ 53 | template 54 | inline constexpr std::conditional_t::value, const T&, T> 55 | nocopy_cast(const U& x) noexcept 56 | { 57 | return tue::detail_::nocopy_cast(x); 58 | } 59 | 60 | template 61 | void nocopy_cast(U&& x) = delete; 62 | 63 | /*!@}*/ 64 | } 65 | -------------------------------------------------------------------------------- /include/tue/detail_/is_integral_simd_component.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace tue 15 | { 16 | template 17 | struct is_integral_simd_component 18 | : 19 | public std::integral_constant 20 | { 21 | using std::integral_constant::integral_constant; 22 | }; 23 | 24 | template<> 25 | struct is_integral_simd_component 26 | : 27 | public std::integral_constant 28 | { 29 | using std::integral_constant::integral_constant; 30 | }; 31 | 32 | template<> 33 | struct is_integral_simd_component 34 | : 35 | public std::integral_constant 36 | { 37 | using std::integral_constant::integral_constant; 38 | }; 39 | 40 | template<> 41 | struct is_integral_simd_component 42 | : 43 | public std::integral_constant 44 | { 45 | using std::integral_constant::integral_constant; 46 | }; 47 | 48 | template<> 49 | struct is_integral_simd_component 50 | : 51 | public std::integral_constant 52 | { 53 | using std::integral_constant::integral_constant; 54 | }; 55 | 56 | template<> 57 | struct is_integral_simd_component 58 | : 59 | public std::integral_constant 60 | { 61 | using std::integral_constant::integral_constant; 62 | }; 63 | 64 | template<> 65 | struct is_integral_simd_component 66 | : 67 | public std::integral_constant 68 | { 69 | using std::integral_constant::integral_constant; 70 | }; 71 | 72 | template<> 73 | struct is_integral_simd_component 74 | : 75 | public std::integral_constant 76 | { 77 | using std::integral_constant::integral_constant; 78 | }; 79 | 80 | template<> 81 | struct is_integral_simd_component 82 | : 83 | public std::integral_constant 84 | { 85 | using std::integral_constant::integral_constant; 86 | }; 87 | } 88 | -------------------------------------------------------------------------------- /include/tue/detail_/is_arithmetic_simd_component.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace tue 15 | { 16 | template 17 | struct is_arithmetic_simd_component 18 | : 19 | public std::integral_constant 20 | { 21 | using std::integral_constant::integral_constant; 22 | }; 23 | 24 | template<> 25 | struct is_arithmetic_simd_component 26 | : 27 | public std::integral_constant 28 | { 29 | using std::integral_constant::integral_constant; 30 | }; 31 | 32 | template<> 33 | struct is_arithmetic_simd_component 34 | : 35 | public std::integral_constant 36 | { 37 | using std::integral_constant::integral_constant; 38 | }; 39 | 40 | template<> 41 | struct is_arithmetic_simd_component 42 | : 43 | public std::integral_constant 44 | { 45 | using std::integral_constant::integral_constant; 46 | }; 47 | 48 | template<> 49 | struct is_arithmetic_simd_component 50 | : 51 | public std::integral_constant 52 | { 53 | using std::integral_constant::integral_constant; 54 | }; 55 | 56 | template<> 57 | struct is_arithmetic_simd_component 58 | : 59 | public std::integral_constant 60 | { 61 | using std::integral_constant::integral_constant; 62 | }; 63 | 64 | template<> 65 | struct is_arithmetic_simd_component 66 | : 67 | public std::integral_constant 68 | { 69 | using std::integral_constant::integral_constant; 70 | }; 71 | 72 | template<> 73 | struct is_arithmetic_simd_component 74 | : 75 | public std::integral_constant 76 | { 77 | using std::integral_constant::integral_constant; 78 | }; 79 | 80 | template<> 81 | struct is_arithmetic_simd_component 82 | : 83 | public std::integral_constant 84 | { 85 | using std::integral_constant::integral_constant; 86 | }; 87 | 88 | template<> 89 | struct is_arithmetic_simd_component 90 | : 91 | public std::integral_constant 92 | { 93 | using std::integral_constant::integral_constant; 94 | }; 95 | 96 | template<> 97 | struct is_arithmetic_simd_component 98 | : 99 | public std::integral_constant 100 | { 101 | using std::integral_constant::integral_constant; 102 | }; 103 | } 104 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.3) 2 | project(tue) 3 | enable_testing() 4 | 5 | set(CMAKE_CXX_STANDARD 14) 6 | 7 | if(MSVC) 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj") 9 | endif() 10 | 11 | include_directories( 12 | lib/mon/include 13 | include) 14 | 15 | # mon 16 | set(MON_SOURCES 17 | lib/mon/include/mon/test_case.hpp 18 | lib/mon/include/mon/test_failure.hpp 19 | lib/mon/include/mon/test_runner.hpp 20 | lib/mon/src/main.cpp 21 | lib/mon/src/mon.test_case.cpp 22 | lib/mon/src/mon.test_failure.cpp 23 | lib/mon/src/mon.test_runner.cpp) 24 | 25 | # tue 26 | set(TUE_SOURCES 27 | include/tue/detail_/is_arithmetic_simd_component.hpp 28 | include/tue/detail_/is_floating_point_simd_component.hpp 29 | include/tue/detail_/is_integral_simd_component.hpp 30 | include/tue/detail_/is_simd_component.hpp 31 | include/tue/detail_/is_sized_bool.hpp 32 | include/tue/detail_/is_vec_component.hpp 33 | include/tue/detail_/mat2xR.hpp 34 | include/tue/detail_/mat3xR.hpp 35 | include/tue/detail_/mat4xR.hpp 36 | include/tue/detail_/matmult.hpp 37 | include/tue/detail_/simd2.hpp 38 | include/tue/detail_/simdN.hpp 39 | include/tue/detail_/simd_specializations.hpp 40 | include/tue/detail_/simd_support.hpp 41 | include/tue/detail_/simd/sse/bool32x4.sse.hpp 42 | include/tue/detail_/simd/sse/float32x4.sse.hpp 43 | include/tue/detail_/simd/sse2/bool8x16.sse2.hpp 44 | include/tue/detail_/simd/sse2/bool16x8.sse2.hpp 45 | include/tue/detail_/simd/sse2/bool64x2.sse2.hpp 46 | include/tue/detail_/simd/sse2/float64x2.sse2.hpp 47 | include/tue/detail_/simd/sse2/int8x16.sse2.hpp 48 | include/tue/detail_/simd/sse2/int16x8.sse2.hpp 49 | include/tue/detail_/simd/sse2/int32x4.sse2.hpp 50 | include/tue/detail_/simd/sse2/int64x2.sse2.hpp 51 | include/tue/detail_/simd/sse2/uint8x16.sse2.hpp 52 | include/tue/detail_/simd/sse2/uint16x8.sse2.hpp 53 | include/tue/detail_/simd/sse2/uint32x4.sse2.hpp 54 | include/tue/detail_/simd/sse2/uint64x2.sse2.hpp 55 | include/tue/detail_/vec2.hpp 56 | include/tue/detail_/vec3.hpp 57 | include/tue/detail_/vec4.hpp 58 | include/tue/mat.hpp 59 | include/tue/math.hpp 60 | include/tue/nocopy_cast.hpp 61 | include/tue/quat.hpp 62 | include/tue/simd.hpp 63 | include/tue/sized_bool.hpp 64 | include/tue/transform.hpp 65 | include/tue/unused.hpp 66 | include/tue/vec.hpp 67 | docs/DoxygenLayout.xml 68 | docs/index.md 69 | Doxyfile 70 | LICENSE_1_0.txt 71 | README.md) 72 | 73 | # tue.tests 74 | set(TUE_TEST_SOURCES 75 | tests/mat2xR.tests.cpp 76 | tests/mat3xR.tests.cpp 77 | tests/mat4xR.tests.cpp 78 | tests/matmult.tests.cpp 79 | tests/math.tests.cpp 80 | tests/nocopy_cast.tests.cpp 81 | tests/quat.tests.cpp 82 | tests/simd.tests.cpp 83 | tests/sized_bool.tests.cpp 84 | tests/transform.tests.cpp 85 | tests/tue.tests.hpp 86 | tests/unused.tests.cpp 87 | tests/vec.tests.cpp 88 | tests/vec2.tests.cpp 89 | tests/vec3.tests.cpp 90 | tests/vec4.tests.cpp) 91 | 92 | add_executable( 93 | tue.tests 94 | ${MON_SOURCES} 95 | ${TUE_SOURCES} 96 | ${TUE_TEST_SOURCES}) 97 | 98 | add_test( 99 | tue.tests 100 | tue.tests) 101 | 102 | # check 103 | add_custom_target( 104 | check 105 | COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure 106 | DEPENDS tue.tests) 107 | -------------------------------------------------------------------------------- /include/tue/detail_/is_simd_component.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace tue 15 | { 16 | enum bool8 : std::uint8_t; 17 | enum bool16 : std::uint16_t; 18 | enum bool32 : std::uint32_t; 19 | enum bool64 : std::uint64_t; 20 | 21 | template 22 | struct is_simd_component 23 | : 24 | public std::integral_constant 25 | { 26 | using std::integral_constant::integral_constant; 27 | }; 28 | 29 | template<> 30 | struct is_simd_component 31 | : 32 | public std::integral_constant 33 | { 34 | using std::integral_constant::integral_constant; 35 | }; 36 | 37 | template<> 38 | struct is_simd_component 39 | : 40 | public std::integral_constant 41 | { 42 | using std::integral_constant::integral_constant; 43 | }; 44 | 45 | template<> 46 | struct is_simd_component 47 | : 48 | public std::integral_constant 49 | { 50 | using std::integral_constant::integral_constant; 51 | }; 52 | 53 | template<> 54 | struct is_simd_component 55 | : 56 | public std::integral_constant 57 | { 58 | using std::integral_constant::integral_constant; 59 | }; 60 | 61 | template<> 62 | struct is_simd_component 63 | : 64 | public std::integral_constant 65 | { 66 | using std::integral_constant::integral_constant; 67 | }; 68 | 69 | template<> 70 | struct is_simd_component 71 | : 72 | public std::integral_constant 73 | { 74 | using std::integral_constant::integral_constant; 75 | }; 76 | 77 | template<> 78 | struct is_simd_component 79 | : 80 | public std::integral_constant 81 | { 82 | using std::integral_constant::integral_constant; 83 | }; 84 | 85 | template<> 86 | struct is_simd_component 87 | : 88 | public std::integral_constant 89 | { 90 | using std::integral_constant::integral_constant; 91 | }; 92 | 93 | template<> 94 | struct is_simd_component 95 | : 96 | public std::integral_constant 97 | { 98 | using std::integral_constant::integral_constant; 99 | }; 100 | 101 | template<> 102 | struct is_simd_component 103 | : 104 | public std::integral_constant 105 | { 106 | using std::integral_constant::integral_constant; 107 | }; 108 | 109 | template<> 110 | struct is_simd_component 111 | : 112 | public std::integral_constant 113 | { 114 | using std::integral_constant::integral_constant; 115 | }; 116 | 117 | template<> 118 | struct is_simd_component 119 | : 120 | public std::integral_constant 121 | { 122 | using std::integral_constant::integral_constant; 123 | }; 124 | 125 | template<> 126 | struct is_simd_component 127 | : 128 | public std::integral_constant 129 | { 130 | using std::integral_constant::integral_constant; 131 | }; 132 | 133 | template<> 134 | struct is_simd_component 135 | : 136 | public std::integral_constant 137 | { 138 | using std::integral_constant::integral_constant; 139 | }; 140 | } 141 | -------------------------------------------------------------------------------- /include/tue/detail_/is_vec_component.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace tue 15 | { 16 | enum bool8 : std::uint8_t; 17 | enum bool16 : std::uint16_t; 18 | enum bool32 : std::uint32_t; 19 | enum bool64 : std::uint64_t; 20 | 21 | template 22 | class simd; 23 | 24 | template 25 | struct is_vec_component 26 | : 27 | public std::integral_constant 28 | { 29 | using std::integral_constant::integral_constant; 30 | }; 31 | 32 | template<> 33 | struct is_vec_component 34 | : 35 | public std::integral_constant 36 | { 37 | using std::integral_constant::integral_constant; 38 | }; 39 | 40 | template<> 41 | struct is_vec_component 42 | : 43 | public std::integral_constant 44 | { 45 | using std::integral_constant::integral_constant; 46 | }; 47 | 48 | template<> 49 | struct is_vec_component 50 | : 51 | public std::integral_constant 52 | { 53 | using std::integral_constant::integral_constant; 54 | }; 55 | 56 | template<> 57 | struct is_vec_component 58 | : 59 | public std::integral_constant 60 | { 61 | using std::integral_constant::integral_constant; 62 | }; 63 | 64 | template<> 65 | struct is_vec_component 66 | : 67 | public std::integral_constant 68 | { 69 | using std::integral_constant::integral_constant; 70 | }; 71 | 72 | template<> 73 | struct is_vec_component 74 | : 75 | public std::integral_constant 76 | { 77 | using std::integral_constant::integral_constant; 78 | }; 79 | 80 | template<> 81 | struct is_vec_component 82 | : 83 | public std::integral_constant 84 | { 85 | using std::integral_constant::integral_constant; 86 | }; 87 | 88 | template<> 89 | struct is_vec_component 90 | : 91 | public std::integral_constant 92 | { 93 | using std::integral_constant::integral_constant; 94 | }; 95 | 96 | template<> 97 | struct is_vec_component 98 | : 99 | public std::integral_constant 100 | { 101 | using std::integral_constant::integral_constant; 102 | }; 103 | 104 | template<> 105 | struct is_vec_component 106 | : 107 | public std::integral_constant 108 | { 109 | using std::integral_constant::integral_constant; 110 | }; 111 | 112 | template<> 113 | struct is_vec_component 114 | : 115 | public std::integral_constant 116 | { 117 | using std::integral_constant::integral_constant; 118 | }; 119 | 120 | template<> 121 | struct is_vec_component 122 | : 123 | public std::integral_constant 124 | { 125 | using std::integral_constant::integral_constant; 126 | }; 127 | 128 | template<> 129 | struct is_vec_component 130 | : 131 | public std::integral_constant 132 | { 133 | using std::integral_constant::integral_constant; 134 | }; 135 | 136 | template<> 137 | struct is_vec_component 138 | : 139 | public std::integral_constant 140 | { 141 | using std::integral_constant::integral_constant; 142 | }; 143 | 144 | template 145 | struct is_vec_component> 146 | : 147 | public std::integral_constant 148 | { 149 | using std::integral_constant::integral_constant; 150 | }; 151 | } 152 | -------------------------------------------------------------------------------- /tests/math.tests.cpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #include 10 | #include "tue.tests.hpp" 11 | 12 | #include 13 | 14 | namespace 15 | { 16 | using namespace tue; 17 | 18 | TEST_CASE(sin) 19 | { 20 | test_assert(nearly_equal(math::sin(1.2), std::sin(1.2))); 21 | } 22 | 23 | TEST_CASE(cos) 24 | { 25 | test_assert(nearly_equal(math::cos(1.2), std::cos(1.2))); 26 | } 27 | 28 | TEST_CASE(sincos) 29 | { 30 | double s, c; 31 | math::sincos(1.2, s, c); 32 | test_assert(nearly_equal(s, std::sin(1.2))); 33 | test_assert(nearly_equal(c, std::cos(1.2))); 34 | } 35 | 36 | TEST_CASE(exp) 37 | { 38 | test_assert(nearly_equal(math::exp(1.2), std::exp(1.2))); 39 | } 40 | 41 | TEST_CASE(log) 42 | { 43 | test_assert(nearly_equal(math::log(1.2), std::log(1.2))); 44 | } 45 | 46 | TEST_CASE(abs) 47 | { 48 | test_assert(math::abs(1.2) == 1.2); 49 | test_assert(math::abs(-1.2) == 1.2); 50 | test_assert(math::abs(12) == 12); 51 | test_assert(math::abs(-12) == 12); 52 | test_assert(math::abs(12u) == 12u); 53 | } 54 | 55 | TEST_CASE(pow) 56 | { 57 | test_assert(nearly_equal(math::pow(1.2, 3.4), std::pow(1.2, 3.4))); 58 | } 59 | 60 | TEST_CASE(recip) 61 | { 62 | test_assert(nearly_equal(math::recip(1.2), 1 / 1.2)); 63 | } 64 | 65 | TEST_CASE(sqrt) 66 | { 67 | test_assert(nearly_equal(math::sqrt(1.2), std::sqrt(1.2))); 68 | } 69 | 70 | TEST_CASE(rsqrt) 71 | { 72 | test_assert(nearly_equal(math::rsqrt(1.2), 1 / std::sqrt(1.2))); 73 | } 74 | 75 | TEST_CASE(min) 76 | { 77 | test_assert(math::min(1.2, 3.4) == 1.2); 78 | test_assert(math::min(1.2, -3.4) == -3.4); 79 | 80 | test_assert(math::min(12, 34) == 12); 81 | test_assert(math::min(12, -34) == -34); 82 | } 83 | 84 | TEST_CASE(max) 85 | { 86 | test_assert(math::max(1.2, 3.4) == 3.4); 87 | test_assert(math::max(1.2, -3.4) == 1.2); 88 | 89 | test_assert(math::max(12, 34) == 34); 90 | test_assert(math::max(12, -34) == 12); 91 | } 92 | 93 | TEST_CASE(mask) 94 | { 95 | test_assert(math::mask(true64, 1.2) == 1.2); 96 | test_assert(math::mask(false64, 1.2) == 0.0); 97 | 98 | test_assert(math::mask(true32, 1) == 1); 99 | test_assert(math::mask(false32, 1) == 0); 100 | 101 | const auto t = math::mask( 102 | true16, static_cast(1)); 103 | const auto f = math::mask( 104 | false16, static_cast(1)); 105 | test_assert(t == static_cast(1)); 106 | test_assert(f == static_cast(0)); 107 | } 108 | 109 | TEST_CASE(select) 110 | { 111 | test_assert(math::select(true64, 1.2, 3.4) == 1.2); 112 | test_assert(math::select(false64, 1.2, 3.4) == 3.4); 113 | 114 | test_assert(math::select(true32, 1, 2) == 1); 115 | test_assert(math::select(false32, 1, 2) == 2); 116 | 117 | const auto t = math::select( 118 | true16, static_cast(1), static_cast(2)); 119 | const auto f = math::select( 120 | false16, static_cast(1), static_cast(2)); 121 | test_assert(t == static_cast(1)); 122 | test_assert(f == static_cast(2)); 123 | } 124 | 125 | TEST_CASE(less) 126 | { 127 | test_assert(math::less(1, 2) == true32); 128 | test_assert(math::less(2, 2) == false32); 129 | test_assert(math::less(3, 2) == false32); 130 | } 131 | 132 | TEST_CASE(less_equal) 133 | { 134 | test_assert(math::less_equal(1, 2) == true32); 135 | test_assert(math::less_equal(2, 2) == true32); 136 | test_assert(math::less_equal(3, 2) == false32); 137 | } 138 | 139 | TEST_CASE(greater) 140 | { 141 | test_assert(math::greater(1, 2) == false32); 142 | test_assert(math::greater(2, 2) == false32); 143 | test_assert(math::greater(3, 2) == true32); 144 | } 145 | 146 | TEST_CASE(greater_equal) 147 | { 148 | test_assert(math::greater_equal(1, 2) == false32); 149 | test_assert(math::greater_equal(2, 2) == true32); 150 | test_assert(math::greater_equal(3, 2) == true32); 151 | } 152 | 153 | TEST_CASE(equal) 154 | { 155 | test_assert(math::equal(1, 2) == false32); 156 | test_assert(math::equal(2, 2) == true32); 157 | test_assert(math::equal(3, 2) == false32); 158 | 159 | test_assert(math::equal(true32, true32) == true32); 160 | test_assert(math::equal(true32, false32) == false32); 161 | test_assert(math::equal(false32, true32) == false32); 162 | test_assert(math::equal(false32, false32) == true32); 163 | } 164 | 165 | TEST_CASE(not_equal) 166 | { 167 | test_assert(math::not_equal(1, 2) == true32); 168 | test_assert(math::not_equal(2, 2) == false32); 169 | test_assert(math::not_equal(3, 2) == true32); 170 | 171 | test_assert(math::not_equal(true32, true32) == false32); 172 | test_assert(math::not_equal(true32, false32) == true32); 173 | test_assert(math::not_equal(false32, true32) == true32); 174 | test_assert(math::not_equal(false32, false32) == false32); 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | The Tuesday C++ Vector Math and SIMD Library 2 | ============================================ 3 | The Tuesday C++ Vector Math and SIMD Library is a library of template classes 4 | and math functions with a focus on physics and graphics applications. It 5 | provides data types commonly used in games and other simulations such as 6 | vectors, quaternions, and matrices, SIMD intrinsic wrapper classes completely 7 | separate from (but compatible with) the other types, operator overloads for 8 | combining and manipulating all these types, as well as some other common 9 | mathematical functions. It was written to match the style of the C++ Standard 10 | Library and uses modern C++ features (i.e., C++14) extensively. 11 | 12 | Major Features 13 | -------------- 14 | Tuesday provides the following unique features over other similar libraries such 15 | as [GLM](https://github.com/g-truc/glm): 16 | 17 | - The dimensions of vector and matrix types are template parameters, 18 | unlike GLM where, e.g., `tvec2`, `tvec3`, and `tvec4` are separate types. By 19 | making the dimensions template parameters, it's possible to write one template 20 | function that can operate on and/or produce vectors or matrices of multiple 21 | dimensions. For example, the transformation matrix generation functions 22 | (`translation_mat`, `rotation_mat`, etc.) can produce matrices of multiple 23 | sizes so long as they meet the minimum requirements of each transformation and 24 | are, at the largest, 4x4. 25 | 26 | - It makes heavy use of `decltype` in return types. This makes it possible for 27 | composite types to behave much more like their component types when it comes 28 | to things like implicit type conversions. For example, `fvec3 + dvec3` results 29 | in a `dvec3` just as `float + double` results in a `double`. 30 | 31 | - It uses `constexpr` whenever possible which, as it turns out, is often. 32 | 33 | - SIMD types are completely separate from vector types. This may seem 34 | counter-intuitive, but SIMD vectors aren't very efficient when used as 35 | traditional 3D vectors. The fourth component of an SIMD vector would often go 36 | to waste, and functions where multiple components interact (such as the 37 | `length` function, `dot` product, or `cross` product) would be horribly 38 | inefficient with SIMD intrinsics. Instead, SIMD instructions should be used to 39 | perform the same logic on multiple vectors in parallel. Tuesday is designed 40 | for this use case. For example, `vec3 v` could be thought of as 4 41 | parallel 3D vectors (4 x-values, followed by 4 y-values, and finally 4 42 | z-values). Something like `math::dot(v)` would then compute a single 43 | `float32x4` containing the dot products of those 4 parallel vectors without 44 | any inefficient component shuffling. See 45 | [this answer](http://stackoverflow.com/a/11620369/1195206) to a naive question 46 | I asked on Stack Overflow a few years back for some more rationale. 47 | 48 | - The SIMD system supports a huge number of types. You can create 2, 4, 8, 16, 49 | 32, and 64-component vectors of all the major arithmetic types (`float`, 50 | `double`, `int8_t`, `int16_t`, `int32_t`, `int64_t`, `uint8_t`, `uint16_t`, 51 | `uint32_t`, and `uint64_t`) along with sized boolean types (`bool8`, `bool16`, 52 | `bool32` and `bool64`). If SIMD-intrinsic acceleration isn't available for a 53 | particular type, there's a standard C++-compliant fallback. If a vector has 54 | too many components for acceleration, but a smaller vector with the same 55 | component type can be accelerated, then the larger vector is simply the 56 | composite of two smaller vectors. For example, if `float32x4` is accelerated 57 | but `float32x8` isn't, then `float32x8` will at least be partially-accelerated 58 | in that it's made of two `float32x4`'s. 59 | 60 | Requirements 61 | ------------ 62 | Tuesday requires Visual Studio 2015 or a fully C++14 compliant compiler such as 63 | GCC 5 or Clang 3.4. 64 | 65 | Usage 66 | ----- 67 | Tuesday is a header-only library. Simply make sure the `include` directory in 68 | the root of this project is on your include path. For GCC and Clang, you might 69 | have to provide the compiler option `-std=c++14` or higher as well. 70 | 71 | Here's a small usage example: 72 | ~~~{.cpp} 73 | #include 74 | #include 75 | #include 76 | #include 77 | #include 78 | 79 | using namespace tue; 80 | 81 | void UpdatePose( 82 | fvec3& translation, 83 | fquat& rotation, 84 | fmat3x4& matrix, 85 | const fvec3& linearVelocity, 86 | const fvec3& angularVelocity, 87 | float deltaTime) 88 | { 89 | translation += linearVelocity * deltaTime; 90 | rotation *= transform::rotation_quat(angularVelocity * deltaTime); 91 | matrix = transform::rotation_mat(rotation) 92 | * transform::translation_mat(translation); 93 | } 94 | 95 | void SimdUpdatePoses( 96 | vec3& translations, 97 | quat& rotations, 98 | mat3x4& matrices, 99 | const vec3& linearVelocities, 100 | const vec3& angularVelocities, 101 | float deltaTime) 102 | { 103 | const float32x4 deltaTimes(deltaTime); 104 | translations += linearVelocities * deltaTimes; 105 | rotations *= transform::rotation_quat(angularVelocities * deltaTimes); 106 | matrices = transform::rotation_mat(rotations) 107 | * transform::translation_mat(translations); 108 | } 109 | ~~~ 110 | 111 | License 112 | ------- 113 | Copyright Jo Bates 2015. 114 | 115 | Distributed under the Boost Software License, Version 1.0. 116 | 117 | See accompanying file [LICENSE_1_0.txt](LICENSE_1_0.txt) or copy at 118 | http://www.boost.org/LICENSE_1_0.txt. 119 | 120 | Bug Reporting 121 | ------------- 122 | Please report any bugs, typos, or suggestions to 123 | https://github.com/Cincinesh/tue/issues. 124 | -------------------------------------------------------------------------------- /tests/sized_bool.tests.cpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #include 10 | #include "tue.tests.hpp" 11 | 12 | #include 13 | 14 | namespace 15 | { 16 | using namespace tue; 17 | 18 | TEST_CASE(bool8) 19 | { 20 | test_assert(sizeof(bool8) == 1); 21 | test_assert(sizeof(true8) == 1); 22 | test_assert(sizeof(false8) == 1); 23 | test_assert(true8 == static_cast(~0LL)); 24 | test_assert(false8 == static_cast(0LL)); 25 | } 26 | 27 | TEST_CASE(bool16) 28 | { 29 | test_assert(sizeof(bool16) == 2); 30 | test_assert(sizeof(true16) == 2); 31 | test_assert(sizeof(false16) == 2); 32 | test_assert(true16 == static_cast(~0LL)); 33 | test_assert(false16 == static_cast(0LL)); 34 | } 35 | 36 | TEST_CASE(bool32) 37 | { 38 | test_assert(sizeof(bool32) == 4); 39 | test_assert(sizeof(true32) == 4); 40 | test_assert(sizeof(false32) == 4); 41 | test_assert(true32 == static_cast(~0LL)); 42 | test_assert(false32 == static_cast(0LL)); 43 | } 44 | 45 | TEST_CASE(bool64) 46 | { 47 | test_assert(sizeof(bool64) == 8); 48 | test_assert(sizeof(true64) == 8); 49 | test_assert(sizeof(false64) == 8); 50 | test_assert(true64 == static_cast(~0LL)); 51 | test_assert(false64 == static_cast(0LL)); 52 | } 53 | 54 | TEST_CASE(implicit_cast_to_bool) 55 | { 56 | if (!true8 || !true16 || !true32 || !true64 57 | || false8 || false16 || false32 || false64) 58 | { 59 | test_fail( 60 | "Sized bool types didn't implicitly cast to bool as expected"); 61 | } 62 | } 63 | 64 | TEST_CASE(sized_bool_t) 65 | { 66 | test_assert((std::is_same, bool8>::value)); 67 | test_assert((std::is_same, bool16>::value)); 68 | test_assert((std::is_same, bool32>::value)); 69 | test_assert((std::is_same, bool64>::value)); 70 | } 71 | 72 | TEST_CASE(is_sized_bool) 73 | { 74 | test_assert(is_sized_bool::value == false); 75 | test_assert(is_sized_bool::value == true); 76 | test_assert(is_sized_bool::value == true); 77 | test_assert(is_sized_bool::value == true); 78 | test_assert(is_sized_bool::value == true); 79 | } 80 | 81 | TEST_CASE(bitwise_not_operator) 82 | { 83 | constexpr auto nt = ~true32; 84 | constexpr auto nf = ~false32; 85 | test_assert(nt == false32); 86 | test_assert(nf == true32); 87 | } 88 | 89 | TEST_CASE(bitwise_and_operator) 90 | { 91 | constexpr auto tt = true32 & true32; 92 | constexpr auto tf = true32 & false32; 93 | constexpr auto ft = false32 & true32; 94 | constexpr auto ff = false32 & false32; 95 | test_assert(tt == true32); 96 | test_assert(tf == false32); 97 | test_assert(ft == false32); 98 | test_assert(ff == false32); 99 | } 100 | 101 | TEST_CASE(bitwise_or_operator) 102 | { 103 | constexpr auto tt = true32 | true32; 104 | constexpr auto tf = true32 | false32; 105 | constexpr auto ft = false32 | true32; 106 | constexpr auto ff = false32 | false32; 107 | test_assert(tt == true32); 108 | test_assert(tf == true32); 109 | test_assert(ft == true32); 110 | test_assert(ff == false32); 111 | } 112 | 113 | TEST_CASE(bitwise_xor_operator) 114 | { 115 | constexpr auto tt = true32 ^ true32; 116 | constexpr auto tf = true32 ^ false32; 117 | constexpr auto ft = false32 ^ true32; 118 | constexpr auto ff = false32 ^ false32; 119 | test_assert(tt == false32); 120 | test_assert(tf == true32); 121 | test_assert(ft == true32); 122 | test_assert(ff == false32); 123 | } 124 | 125 | TEST_CASE(bitwise_and_assignment_operator) 126 | { 127 | auto tt = true32; 128 | test_assert(&(tt &= true32) == &tt); 129 | test_assert(tt == true32); 130 | 131 | auto tf = true32; 132 | test_assert(&(tf &= false32) == &tf); 133 | test_assert(tf == false32); 134 | 135 | auto ft = false32; 136 | test_assert(&(ft &= true32) == &ft); 137 | test_assert(ft == false32); 138 | 139 | auto ff = false32; 140 | test_assert(&(ff &= false32) == &ff); 141 | test_assert(ff == false32); 142 | } 143 | 144 | TEST_CASE(bitwise_or_assignment_operator) 145 | { 146 | auto tt = true32; 147 | test_assert(&(tt |= true32) == &tt); 148 | test_assert(tt == true32); 149 | 150 | auto tf = true32; 151 | test_assert(&(tf |= false32) == &tf); 152 | test_assert(tf == true32); 153 | 154 | auto ft = false32; 155 | test_assert(&(ft |= true32) == &ft); 156 | test_assert(ft == true32); 157 | 158 | auto ff = false32; 159 | test_assert(&(ff |= false32) == &ff); 160 | test_assert(ff == false32); 161 | } 162 | 163 | TEST_CASE(bitwise_xor_assignment_operator) 164 | { 165 | auto tt = true32; 166 | test_assert(&(tt ^= true32) == &tt); 167 | test_assert(tt == false32); 168 | 169 | auto tf = true32; 170 | test_assert(&(tf ^= false32) == &tf); 171 | test_assert(tf == true32); 172 | 173 | auto ft = false32; 174 | test_assert(&(ft ^= true32) == &ft); 175 | test_assert(ft == true32); 176 | 177 | auto ff = false32; 178 | test_assert(&(ff ^= false32) == &ff); 179 | test_assert(ff == false32); 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /docs/DoxygenLayout.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | The Tuesday C++ Vector Math and SIMD Library 2 | ============================================ 3 | The Tuesday C++ Vector Math and SIMD Library is a library of template classes 4 | and math functions with a focus on physics and graphics applications. It 5 | provides data types commonly used in games and other simulations such as 6 | vectors, quaternions, and matrices, SIMD intrinsic wrapper classes completely 7 | separate from (but compatible with) the other types, operator overloads for 8 | combining and manipulating all these types, as well as some other common 9 | mathematical functions. It was written to match the style of the C++ Standard 10 | Library and uses modern C++ features (i.e., C++14) extensively. 11 | 12 | Major Features 13 | -------------- 14 | Tuesday provides the following unique features over other similar libraries such 15 | as [GLM](https://github.com/g-truc/glm): 16 | 17 | - The dimensions of vector and matrix types are template parameters, 18 | unlike GLM where, e.g., `tvec2`, `tvec3`, and `tvec4` are separate types. By 19 | making the dimensions template parameters, it's possible to write one template 20 | function that can operate on and/or produce vectors or matrices of multiple 21 | dimensions. For example, the transformation matrix generation functions 22 | (`translation_mat`, `rotation_mat`, etc.) can produce matrices of multiple 23 | sizes so long as they meet the minimum requirements of each transformation and 24 | are, at the largest, 4x4. 25 | 26 | - It makes heavy use of `decltype` in return types. This makes it possible for 27 | composite types to behave much more like their component types when it comes 28 | to things like implicit type conversions. For example, `fvec3 + dvec3` results 29 | in a `dvec3` just as `float + double` results in a `double`. 30 | 31 | - It uses `constexpr` whenever possible which, as it turns out, is often. 32 | 33 | - SIMD types are completely separate from vector types. This may seem 34 | counter-intuitive, but SIMD vectors aren't very efficient when used as 35 | traditional 3D vectors. The fourth component of an SIMD vector would often go 36 | to waste, and functions where multiple components interact (such as the 37 | `length` function, `dot` product, or `cross` product) would be horribly 38 | inefficient with SIMD intrinsics. Instead, SIMD instructions should be used to 39 | perform the same logic on multiple vectors in parallel. Tuesday is designed 40 | for this use case. For example, `vec3 v` could be thought of as 4 41 | parallel 3D vectors (4 x-values, followed by 4 y-values, and finally 4 42 | z-values). Something like `math::dot(v)` would then compute a single 43 | `float32x4` containing the dot products of those 4 parallel vectors without 44 | any inefficient component shuffling. See 45 | [this answer](http://stackoverflow.com/a/11620369/1195206) to a naive question 46 | I asked on Stack Overflow a few years back for some more rationale. 47 | 48 | - The SIMD system supports a huge number of types. You can create 2, 4, 8, 16, 49 | 32, and 64-component vectors of all the major arithmetic types (`float`, 50 | `double`, `int8_t`, `int16_t`, `int32_t`, `int64_t`, `uint8_t`, `uint16_t`, 51 | `uint32_t`, and `uint64_t`) along with sized boolean types (`bool8`, `bool16`, 52 | `bool32` and `bool64`). If SIMD-intrinsic acceleration isn't available for a 53 | particular type, there's a standard C++-compliant fallback. If a vector has 54 | too many components for acceleration, but a smaller vector with the same 55 | component type can be accelerated, then the larger vector is simply the 56 | composite of two smaller vectors. For example, if `float32x4` is accelerated 57 | but `float32x8` isn't, then `float32x8` will at least be partially-accelerated 58 | in that it's made of two `float32x4`'s. 59 | 60 | Requirements 61 | ------------ 62 | Tuesday requires Visual Studio 2015 or a fully C++14 compliant compiler such as 63 | GCC 5 or Clang 3.4. 64 | 65 | Usage 66 | ----- 67 | Tuesday is a header-only library. Simply make sure the `include` directory in 68 | the root of this project is on your include path. For GCC and Clang, you might 69 | have to provide the compiler option `-std=c++14` or higher as well. 70 | 71 | Here's a small usage example: 72 | ~~~{.cpp} 73 | #include 74 | #include 75 | #include 76 | #include 77 | #include 78 | 79 | using namespace tue; 80 | 81 | void UpdatePose( 82 | fvec3& translation, 83 | fquat& rotation, 84 | fmat3x4& matrix, 85 | const fvec3& linearVelocity, 86 | const fvec3& angularVelocity, 87 | float deltaTime) 88 | { 89 | translation += linearVelocity * deltaTime; 90 | rotation *= transform::rotation_quat(angularVelocity * deltaTime); 91 | matrix = transform::rotation_mat(rotation) 92 | * transform::translation_mat(translation); 93 | } 94 | 95 | void SimdUpdatePoses( 96 | vec3& translations, 97 | quat& rotations, 98 | mat3x4& matrices, 99 | const vec3& linearVelocities, 100 | const vec3& angularVelocities, 101 | float deltaTime) 102 | { 103 | const float32x4 deltaTimes(deltaTime); 104 | translations += linearVelocities * deltaTimes; 105 | rotations *= transform::rotation_quat(angularVelocities * deltaTimes); 106 | matrices = transform::rotation_mat(rotations) 107 | * transform::translation_mat(translations); 108 | } 109 | ~~~ 110 | 111 | Documentation 112 | ------------- 113 | Tuesday documentation can be generated with 114 | [Doxygen](http://www.stack.nl/~dimitri/doxygen/) using the `Doxyfile` at the 115 | root of this project. A copy is also published at: 116 | 117 | http://cincinesh.github.io/tue/master/docs/index.html 118 | 119 | Testing 120 | ------- 121 | Here are some tips for running the unit tests: 122 | - This repository uses git submodules. After cloning, make sure to use 123 | `git submodule init` and `git submodule update`. 124 | - This project uses a fairly simple [CMake](http://www.cmake.org/) 125 | configuration. Use CMake to generate IDE project files or build scripts and 126 | simply build the `check` target to run the unit tests. 127 | 128 | License 129 | ------- 130 | Copyright Jo Bates 2015. 131 | 132 | Distributed under the Boost Software License, Version 1.0. 133 | 134 | See accompanying file [LICENSE_1_0.txt](LICENSE_1_0.txt) or copy at 135 | http://www.boost.org/LICENSE_1_0.txt. 136 | 137 | Bug Reporting 138 | ------------- 139 | Please report any bugs, typos, or suggestions to 140 | https://github.com/Cincinesh/tue/issues. 141 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/bool64x2.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include "../../../simd.hpp" 16 | #include "../../../sized_bool.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static bool64x2 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | bool64(s.data()[0]), 32 | bool64(s.data()[1]), 33 | }; 34 | } 35 | 36 | public: 37 | using component_type = bool64; 38 | 39 | static constexpr int component_count = 2; 40 | 41 | static constexpr bool is_accelerated = true; 42 | 43 | simd() noexcept = default; 44 | 45 | explicit simd(bool64 x) noexcept 46 | : 47 | underlying_(_mm_set1_epi64x(x)) 48 | { 49 | } 50 | 51 | template> 52 | inline simd( 53 | bool64 x, bool64 y) noexcept 54 | : 55 | underlying_(_mm_set_epi64x(y, x)) 56 | { 57 | } 58 | 59 | template> 60 | inline simd( 61 | bool64 x, bool64 y, bool64 z, bool64 w) noexcept; 62 | 63 | template> 64 | inline simd( 65 | bool64 s0, bool64 s1, bool64 s2, bool64 s3, 66 | bool64 s4, bool64 s5, bool64 s6, bool64 s7) noexcept; 67 | 68 | template> 69 | inline simd( 70 | bool64 s0, bool64 s1, bool64 s2, bool64 s3, 71 | bool64 s4, bool64 s5, bool64 s6, bool64 s7, 72 | bool64 s8, bool64 s9, bool64 s10, bool64 s11, 73 | bool64 s12, bool64 s13, bool64 s14, bool64 s15) noexcept; 74 | 75 | template 76 | explicit simd(const simd& s) noexcept 77 | { 78 | *this = explicit_cast(s); 79 | } 80 | 81 | simd(__m128d underlying) noexcept 82 | : 83 | underlying_(_mm_castpd_si128(underlying)) 84 | { 85 | } 86 | 87 | operator __m128d() const noexcept 88 | { 89 | return _mm_castsi128_pd(underlying_); 90 | } 91 | 92 | simd(__m128i underlying) noexcept 93 | : 94 | underlying_(underlying) 95 | { 96 | } 97 | 98 | operator __m128i() const noexcept 99 | { 100 | return underlying_; 101 | } 102 | 103 | static bool64x2 zero() noexcept 104 | { 105 | return _mm_setzero_si128(); 106 | } 107 | 108 | static bool64x2 load(const bool64* data) noexcept 109 | { 110 | return _mm_load_si128(reinterpret_cast(data)); 111 | } 112 | 113 | static bool64x2 loadu(const bool64* data) noexcept 114 | { 115 | return _mm_loadu_si128(reinterpret_cast(data)); 116 | } 117 | 118 | void store(bool64* data) const noexcept 119 | { 120 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 121 | } 122 | 123 | void storeu(bool64* data) const noexcept 124 | { 125 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 126 | } 127 | 128 | const bool64* data() const noexcept 129 | { 130 | return reinterpret_cast(&underlying_); 131 | } 132 | 133 | bool64* data() noexcept 134 | { 135 | return reinterpret_cast(&underlying_); 136 | } 137 | }; 138 | } 139 | 140 | namespace tue 141 | { 142 | namespace detail_ 143 | { 144 | inline bool64x2 bitwise_not_operator_s( 145 | const bool64x2& s) noexcept 146 | { 147 | return _mm_xor_si128(s, bool64x2(true64)); 148 | } 149 | 150 | inline bool64x2 bitwise_and_operator_ss( 151 | const bool64x2& lhs, const bool64x2& rhs) noexcept 152 | { 153 | return _mm_and_si128(lhs, rhs); 154 | } 155 | 156 | inline bool64x2 bitwise_or_operator_ss( 157 | const bool64x2& lhs, const bool64x2& rhs) noexcept 158 | { 159 | return _mm_or_si128(lhs, rhs); 160 | } 161 | 162 | inline bool64x2 bitwise_xor_operator_ss( 163 | const bool64x2& lhs, const bool64x2& rhs) noexcept 164 | { 165 | return _mm_xor_si128(lhs, rhs); 166 | } 167 | 168 | inline bool64x2& bitwise_and_assignment_operator_ss( 169 | bool64x2& lhs, const bool64x2& rhs) noexcept 170 | { 171 | return lhs = _mm_and_si128(lhs, rhs); 172 | } 173 | 174 | inline bool64x2& bitwise_or_assignment_operator_ss( 175 | bool64x2& lhs, const bool64x2& rhs) noexcept 176 | { 177 | return lhs = _mm_or_si128(lhs, rhs); 178 | } 179 | 180 | inline bool64x2& bitwise_xor_assignment_operator_ss( 181 | bool64x2& lhs, const bool64x2& rhs) noexcept 182 | { 183 | return lhs = _mm_xor_si128(lhs, rhs); 184 | } 185 | 186 | inline bool equality_operator_ss( 187 | bool64x2& lhs, const bool64x2& rhs) noexcept 188 | { 189 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 190 | } 191 | 192 | inline bool inequality_operator_ss( 193 | bool64x2& lhs, const bool64x2& rhs) noexcept 194 | { 195 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 196 | } 197 | 198 | inline bool64x2 mask_ss( 199 | const bool64x2& conditions, 200 | const bool64x2& values) noexcept 201 | { 202 | return _mm_and_si128(conditions, values); 203 | } 204 | 205 | inline bool64x2 select_sss( 206 | const bool64x2& conditions, 207 | const bool64x2& values, 208 | const bool64x2& otherwise) noexcept 209 | { 210 | return _mm_or_si128( 211 | _mm_and_si128(conditions, values), 212 | _mm_andnot_si128(conditions, otherwise)); 213 | } 214 | 215 | inline bool64x2 equal_ss( 216 | const bool64x2& lhs, const bool64x2& rhs) noexcept 217 | { 218 | const auto cmp = _mm_cmpeq_epi32(lhs, rhs); 219 | const auto hi = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(3, 3, 1, 1)); 220 | const auto lo = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 2, 0, 0)); 221 | return _mm_and_si128(hi, lo); 222 | } 223 | 224 | inline bool64x2 not_equal_ss( 225 | const bool64x2& lhs, const bool64x2& rhs) noexcept 226 | { 227 | return _mm_xor_si128(equal_ss(lhs, rhs), bool64x2(true64)); 228 | } 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse/bool32x4.sse.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include "../../../simd.hpp" 16 | #include "../../../sized_bool.hpp" 17 | 18 | #ifdef TUE_SSE2 19 | #include 20 | #endif 21 | 22 | namespace tue 23 | { 24 | template<> 25 | class alignas(tue::detail_::alignof_simd()) 26 | simd 27 | { 28 | __m128 underlying_; 29 | 30 | private: 31 | template 32 | static bool32x4 explicit_cast(const simd& s) noexcept 33 | { 34 | return { 35 | bool32(s.data()[0]), 36 | bool32(s.data()[1]), 37 | bool32(s.data()[2]), 38 | bool32(s.data()[3]), 39 | }; 40 | } 41 | 42 | public: 43 | using component_type = bool32; 44 | 45 | static constexpr int component_count = 4; 46 | 47 | static constexpr bool is_accelerated = true; 48 | 49 | simd() noexcept = default; 50 | 51 | explicit simd(bool32 x) noexcept 52 | : 53 | underlying_(_mm_set_ps1(tue::detail_::binary_float(x))) 54 | { 55 | } 56 | 57 | template> 58 | inline simd( 59 | bool32 x, bool32 y) noexcept; 60 | 61 | template> 62 | inline simd( 63 | bool32 x, bool32 y, bool32 z, bool32 w) noexcept 64 | : 65 | underlying_(_mm_setr_ps( 66 | tue::detail_::binary_float(x), 67 | tue::detail_::binary_float(y), 68 | tue::detail_::binary_float(z), 69 | tue::detail_::binary_float(w))) 70 | { 71 | } 72 | 73 | template> 74 | inline simd( 75 | bool32 s0, bool32 s1, bool32 s2, bool32 s3, 76 | bool32 s4, bool32 s5, bool32 s6, bool32 s7) noexcept; 77 | 78 | template> 79 | inline simd( 80 | bool32 s0, bool32 s1, bool32 s2, bool32 s3, 81 | bool32 s4, bool32 s5, bool32 s6, bool32 s7, 82 | bool32 s8, bool32 s9, bool32 s10, bool32 s11, 83 | bool32 s12, bool32 s13, bool32 s14, bool32 s15) noexcept; 84 | 85 | template 86 | explicit simd(const simd& s) noexcept 87 | { 88 | *this = explicit_cast(s); 89 | } 90 | 91 | simd(__m128 underlying) noexcept 92 | : 93 | underlying_(underlying) 94 | { 95 | } 96 | 97 | operator __m128() const noexcept 98 | { 99 | return underlying_; 100 | } 101 | 102 | #ifdef TUE_SSE2 103 | simd(__m128i underlying) noexcept 104 | : 105 | underlying_(_mm_castsi128_ps(underlying)) 106 | { 107 | } 108 | 109 | operator __m128i() const noexcept 110 | { 111 | return _mm_castps_si128(underlying_); 112 | } 113 | #endif 114 | 115 | static bool32x4 zero() noexcept 116 | { 117 | return _mm_setzero_ps(); 118 | } 119 | 120 | static bool32x4 load(const bool32* data) noexcept 121 | { 122 | return _mm_load_ps(reinterpret_cast(data)); 123 | } 124 | 125 | static bool32x4 loadu(const bool32* data) noexcept 126 | { 127 | return _mm_loadu_ps(reinterpret_cast(data)); 128 | } 129 | 130 | void store(bool32* data) const noexcept 131 | { 132 | _mm_store_ps(reinterpret_cast(data), underlying_); 133 | } 134 | 135 | void storeu(bool32* data) const noexcept 136 | { 137 | _mm_storeu_ps(reinterpret_cast(data), underlying_); 138 | } 139 | 140 | const bool32* data() const noexcept 141 | { 142 | return reinterpret_cast(&underlying_); 143 | } 144 | 145 | bool32* data() noexcept 146 | { 147 | return reinterpret_cast(&underlying_); 148 | } 149 | }; 150 | } 151 | 152 | namespace tue 153 | { 154 | namespace detail_ 155 | { 156 | inline bool32x4 bitwise_not_operator_s( 157 | const bool32x4& s) noexcept 158 | { 159 | return _mm_xor_ps(s, bool32x4(true32)); 160 | } 161 | 162 | inline bool32x4 bitwise_and_operator_ss( 163 | const bool32x4& lhs, const bool32x4& rhs) noexcept 164 | { 165 | return _mm_and_ps(lhs, rhs); 166 | } 167 | 168 | inline bool32x4 bitwise_or_operator_ss( 169 | const bool32x4& lhs, const bool32x4& rhs) noexcept 170 | { 171 | return _mm_or_ps(lhs, rhs); 172 | } 173 | 174 | inline bool32x4 bitwise_xor_operator_ss( 175 | const bool32x4& lhs, const bool32x4& rhs) noexcept 176 | { 177 | return _mm_xor_ps(lhs, rhs); 178 | } 179 | 180 | inline bool32x4& bitwise_and_assignment_operator_ss( 181 | bool32x4& lhs, const bool32x4& rhs) noexcept 182 | { 183 | return lhs = _mm_and_ps(lhs, rhs); 184 | } 185 | 186 | inline bool32x4& bitwise_or_assignment_operator_ss( 187 | bool32x4& lhs, const bool32x4& rhs) noexcept 188 | { 189 | return lhs = _mm_or_ps(lhs, rhs); 190 | } 191 | 192 | inline bool32x4& bitwise_xor_assignment_operator_ss( 193 | bool32x4& lhs, const bool32x4& rhs) noexcept 194 | { 195 | return lhs = _mm_xor_ps(lhs, rhs); 196 | } 197 | 198 | #ifdef TUE_SSE2 199 | inline bool equality_operator_ss( 200 | bool32x4& lhs, const bool32x4& rhs) noexcept 201 | { 202 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 203 | } 204 | 205 | inline bool inequality_operator_ss( 206 | bool32x4& lhs, const bool32x4& rhs) noexcept 207 | { 208 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 209 | } 210 | #endif 211 | 212 | inline bool32x4 mask_ss( 213 | const bool32x4& conditions, 214 | const bool32x4& values) noexcept 215 | { 216 | return _mm_and_ps(conditions, values); 217 | } 218 | 219 | inline bool32x4 select_sss( 220 | const bool32x4& conditions, 221 | const bool32x4& values, 222 | const bool32x4& otherwise) noexcept 223 | { 224 | return _mm_or_ps( 225 | _mm_and_ps(conditions, values), 226 | _mm_andnot_ps(conditions, otherwise)); 227 | } 228 | 229 | #ifdef TUE_SSE2 230 | inline bool32x4 equal_ss( 231 | const bool32x4& lhs, const bool32x4& rhs) noexcept 232 | { 233 | return _mm_cmpeq_epi32(lhs, rhs); 234 | } 235 | 236 | inline bool32x4 not_equal_ss( 237 | const bool32x4& lhs, const bool32x4& rhs) noexcept 238 | { 239 | return _mm_xor_si128(_mm_cmpeq_epi32(lhs, rhs), bool32x4(true32)); 240 | } 241 | #endif 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/bool16x8.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include "../../../simd.hpp" 16 | #include "../../../sized_bool.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static bool16x8 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | bool16(s.data()[0]), 32 | bool16(s.data()[1]), 33 | bool16(s.data()[2]), 34 | bool16(s.data()[3]), 35 | bool16(s.data()[4]), 36 | bool16(s.data()[5]), 37 | bool16(s.data()[6]), 38 | bool16(s.data()[7]), 39 | }; 40 | } 41 | 42 | inline static bool16x8 explicit_cast(const int16x8& s) noexcept; 43 | 44 | inline static bool16x8 explicit_cast(const uint16x8& s) noexcept; 45 | 46 | public: 47 | using component_type = bool16; 48 | 49 | static constexpr int component_count = 8; 50 | 51 | static constexpr bool is_accelerated = true; 52 | 53 | simd() noexcept = default; 54 | 55 | explicit simd(bool16 x) noexcept 56 | : 57 | underlying_(_mm_set1_epi16(x)) 58 | { 59 | } 60 | 61 | template> 62 | inline simd( 63 | bool16 x, bool16 y) noexcept; 64 | 65 | template> 66 | inline simd( 67 | bool16 x, bool16 y, bool16 z, bool16 w) noexcept; 68 | 69 | template> 70 | inline simd( 71 | bool16 s0, bool16 s1, bool16 s2, bool16 s3, 72 | bool16 s4, bool16 s5, bool16 s6, bool16 s7) noexcept 73 | : 74 | underlying_(_mm_setr_epi16( 75 | s0, s1, s2, s3, s4, s5, s6, s7)) 76 | { 77 | } 78 | 79 | template> 80 | inline simd( 81 | bool16 s0, bool16 s1, bool16 s2, bool16 s3, 82 | bool16 s4, bool16 s5, bool16 s6, bool16 s7, 83 | bool16 s8, bool16 s9, bool16 s10, bool16 s11, 84 | bool16 s12, bool16 s13, bool16 s14, bool16 s15) noexcept; 85 | 86 | template 87 | explicit simd(const simd& s) noexcept 88 | { 89 | *this = explicit_cast(s); 90 | } 91 | 92 | simd(__m128i underlying) noexcept 93 | : 94 | underlying_(underlying) 95 | { 96 | } 97 | 98 | operator __m128i() const noexcept 99 | { 100 | return underlying_; 101 | } 102 | 103 | static bool16x8 zero() noexcept 104 | { 105 | return _mm_setzero_si128(); 106 | } 107 | 108 | static bool16x8 load(const bool16* data) noexcept 109 | { 110 | return _mm_load_si128(reinterpret_cast(data)); 111 | } 112 | 113 | static bool16x8 loadu(const bool16* data) noexcept 114 | { 115 | return _mm_loadu_si128(reinterpret_cast(data)); 116 | } 117 | 118 | void store(bool16* data) const noexcept 119 | { 120 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 121 | } 122 | 123 | void storeu(bool16* data) const noexcept 124 | { 125 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 126 | } 127 | 128 | const bool16* data() const noexcept 129 | { 130 | return reinterpret_cast(&underlying_); 131 | } 132 | 133 | bool16* data() noexcept 134 | { 135 | return reinterpret_cast(&underlying_); 136 | } 137 | }; 138 | } 139 | 140 | #include "int16x8.sse2.hpp" 141 | #include "uint16x8.sse2.hpp" 142 | 143 | namespace tue 144 | { 145 | inline bool16x8 bool16x8::explicit_cast(const int16x8& s) noexcept 146 | { 147 | return __m128i(s); 148 | } 149 | 150 | inline bool16x8 bool16x8::explicit_cast(const uint16x8& s) noexcept 151 | { 152 | return __m128i(s); 153 | } 154 | 155 | namespace detail_ 156 | { 157 | inline bool16x8 bitwise_not_operator_s( 158 | const bool16x8& s) noexcept 159 | { 160 | return _mm_xor_si128(s, bool16x8(true16)); 161 | } 162 | 163 | inline bool16x8 bitwise_and_operator_ss( 164 | const bool16x8& lhs, const bool16x8& rhs) noexcept 165 | { 166 | return _mm_and_si128(lhs, rhs); 167 | } 168 | 169 | inline bool16x8 bitwise_or_operator_ss( 170 | const bool16x8& lhs, const bool16x8& rhs) noexcept 171 | { 172 | return _mm_or_si128(lhs, rhs); 173 | } 174 | 175 | inline bool16x8 bitwise_xor_operator_ss( 176 | const bool16x8& lhs, const bool16x8& rhs) noexcept 177 | { 178 | return _mm_xor_si128(lhs, rhs); 179 | } 180 | 181 | inline bool16x8& bitwise_and_assignment_operator_ss( 182 | bool16x8& lhs, const bool16x8& rhs) noexcept 183 | { 184 | return lhs = _mm_and_si128(lhs, rhs); 185 | } 186 | 187 | inline bool16x8& bitwise_or_assignment_operator_ss( 188 | bool16x8& lhs, const bool16x8& rhs) noexcept 189 | { 190 | return lhs = _mm_or_si128(lhs, rhs); 191 | } 192 | 193 | inline bool16x8& bitwise_xor_assignment_operator_ss( 194 | bool16x8& lhs, const bool16x8& rhs) noexcept 195 | { 196 | return lhs = _mm_xor_si128(lhs, rhs); 197 | } 198 | 199 | inline bool equality_operator_ss( 200 | bool16x8& lhs, const bool16x8& rhs) noexcept 201 | { 202 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 203 | } 204 | 205 | inline bool inequality_operator_ss( 206 | bool16x8& lhs, const bool16x8& rhs) noexcept 207 | { 208 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 209 | } 210 | 211 | inline bool16x8 mask_ss( 212 | const bool16x8& conditions, 213 | const bool16x8& values) noexcept 214 | { 215 | return _mm_and_si128(conditions, values); 216 | } 217 | 218 | inline bool16x8 select_sss( 219 | const bool16x8& conditions, 220 | const bool16x8& values, 221 | const bool16x8& otherwise) noexcept 222 | { 223 | return _mm_or_si128( 224 | _mm_and_si128(conditions, values), 225 | _mm_andnot_si128(conditions, otherwise)); 226 | } 227 | 228 | inline bool16x8 equal_ss( 229 | const bool16x8& lhs, const bool16x8& rhs) noexcept 230 | { 231 | return _mm_cmpeq_epi16(lhs, rhs); 232 | } 233 | 234 | inline bool16x8 not_equal_ss( 235 | const bool16x8& lhs, const bool16x8& rhs) noexcept 236 | { 237 | return _mm_xor_si128(_mm_cmpeq_epi16(lhs, rhs), bool16x8(true16)); 238 | } 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /include/tue/sized_bool.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tue 16 | { 17 | /*! 18 | * \defgroup sized_bool_hpp 19 | * 20 | * \brief The sized boolean types and their associated utility structs 21 | * and functions. 22 | * @{ 23 | */ 24 | 25 | /*! 26 | * \brief An 8-bit boolean value. 27 | */ 28 | enum bool8 : std::uint8_t 29 | { 30 | /*! 31 | * \brief An 8-bit boolean value with all 8 bits set to 1 (true). 32 | */ 33 | true8 = UINT8_C(0xFF), 34 | 35 | /*! 36 | * \brief An 8-bit boolean value with all 8 bits set to 0 (false). 37 | */ 38 | false8 = UINT8_C(0x00), 39 | }; 40 | 41 | /*! 42 | * \brief A 16-bit boolean value. 43 | */ 44 | enum bool16 : std::uint16_t 45 | { 46 | /*! 47 | * \brief A 16-bit boolean value with all 16 bits set to 1 (true). 48 | */ 49 | true16 = UINT16_C(0xFFFF), 50 | 51 | /*! 52 | * \brief A 16-bit boolean value with all 16 bits set to 0 (false). 53 | */ 54 | false16 = UINT16_C(0x0000), 55 | }; 56 | 57 | /*! 58 | * \brief A 32-bit boolean value. 59 | */ 60 | enum bool32 : std::uint32_t 61 | { 62 | /*! 63 | * \brief A 32-bit boolean value with all 32 bits set to 1 (true). 64 | */ 65 | true32 = UINT32_C(0xFFFFFFFF), 66 | 67 | /*! 68 | * \brief A 32-bit boolean value with all 32 bits set to 0 (false). 69 | */ 70 | false32 = UINT32_C(0x00000000), 71 | }; 72 | 73 | /*! 74 | * \brief A 64-bit boolean value. 75 | */ 76 | enum bool64 : std::uint64_t 77 | { 78 | /*! 79 | * \brief A 64-bit boolean value with all 64 bits set to 1 (true). 80 | */ 81 | true64 = UINT64_C(0xFFFFFFFFFFFFFFFF), 82 | 83 | /*! 84 | * \brief A 64-bit boolean value with all 64 bits set to 0 (false). 85 | */ 86 | false64 = UINT64_C(0x0000000000000000), 87 | }; 88 | 89 | /**/ 90 | namespace detail_ 91 | { 92 | template struct sized_bool_utils; 93 | template<> struct sized_bool_utils<1> { using type = bool8; }; 94 | template<> struct sized_bool_utils<2> { using type = bool16; }; 95 | template<> struct sized_bool_utils<4> { using type = bool32; }; 96 | template<> struct sized_bool_utils<8> { using type = bool64; }; 97 | } 98 | 99 | /*! 100 | * \brief A type alias for the sized boolean type (`bool8`, `bool16`, 101 | * `bool32`, or `bool64`) with the given size. 102 | * 103 | * \tparam Size The desired sized boolean type size (in bytes). 104 | */ 105 | template 106 | using sized_bool_t = typename detail_::sized_bool_utils::type; 107 | 108 | /*! 109 | * \brief Checks if a type is a sized boolean type. 110 | * \details Extends `std::integral_constant` for the following 111 | * types and `std::integral_constant` otherwise: 112 | * - `tue::bool8` 113 | * - `tue::bool16` 114 | * - `tue::bool32` 115 | * - `tue::bool64` 116 | * 117 | * \tparam T The type to check. 118 | */ 119 | template 120 | struct is_sized_bool; 121 | 122 | /*!@}*/ 123 | } 124 | 125 | #include "detail_/is_sized_bool.hpp" 126 | 127 | namespace tue 128 | { 129 | /*! 130 | * \addtogroup sized_bool_hpp 131 | * @{ 132 | */ 133 | 134 | /*! 135 | * \brief Computes the bitwise NOT of `b`. 136 | * 137 | * \tparam T The type of parameter 'b'. 138 | * 139 | * \param b A sized boolean value. 140 | * 141 | * \return The bitwise NOT of `b`. 142 | */ 143 | template 144 | inline constexpr std::enable_if_t::value, T> 145 | operator~(T b) noexcept 146 | { 147 | using U = std::underlying_type_t; 148 | return T(~U(b)); 149 | } 150 | 151 | /*! 152 | * \brief Computes the bitwise AND of `lhs` and `rhs`. 153 | * 154 | * \tparam T The type of parameters `lhs` and `rhs`. 155 | * 156 | * \param lhs The left-hand side operand. 157 | * \param rhs The right-hand side operand. 158 | * 159 | * \return The bitwise AND of `lhs` and `rhs`. 160 | */ 161 | template 162 | inline constexpr std::enable_if_t::value, T> 163 | operator&(T lhs, T rhs) noexcept 164 | { 165 | using U = std::underlying_type_t; 166 | return T(U(lhs) & U(rhs)); 167 | } 168 | 169 | /*! 170 | * \brief Computes the bitwise OR of `lhs` and `rhs`. 171 | * 172 | * \tparam T The type of parameters `lhs` and `rhs`. 173 | * 174 | * \param lhs The left-hand side operand. 175 | * \param rhs The right-hand side operand. 176 | * 177 | * \return The bitwise OR of `lhs` and `rhs`. 178 | */ 179 | template 180 | inline constexpr std::enable_if_t::value, T> 181 | operator|(T lhs, T rhs) noexcept 182 | { 183 | using U = std::underlying_type_t; 184 | return T(U(lhs) | U(rhs)); 185 | } 186 | 187 | /*! 188 | * \brief Computes the bitwise XOR of `lhs` and `rhs`. 189 | * 190 | * \tparam T The type of parameters `lhs` and `rhs`. 191 | * 192 | * \param lhs The left-hand side operand. 193 | * \param rhs The right-hand side operand. 194 | * 195 | * \return The bitwise XOR of `lhs` and `rhs`. 196 | */ 197 | template 198 | inline constexpr std::enable_if_t::value, T> 199 | operator^(T lhs, T rhs) noexcept 200 | { 201 | using U = std::underlying_type_t; 202 | return T(U(lhs) ^ U(rhs)); 203 | } 204 | 205 | /*! 206 | * \brief Bitwise ANDs `lhs` with `rhs`. 207 | * 208 | * \tparam T The type of parameters `lhs` and `rhs`. 209 | * 210 | * \param lhs The left-hand side operand. 211 | * \param rhs The right-hand side operand. 212 | * 213 | * \return A reference to `lhs`. 214 | */ 215 | template 216 | inline std::enable_if_t::value, T&> 217 | operator&=(T& lhs, T rhs) noexcept 218 | { 219 | return lhs = lhs & rhs; 220 | } 221 | 222 | /*! 223 | * \brief Bitwise ORs `lhs` with `rhs`. 224 | * 225 | * \tparam T The type of parameters `lhs` and `rhs`. 226 | * 227 | * \param lhs The left-hand side operand. 228 | * \param rhs The right-hand side operand. 229 | * 230 | * \return A reference to `lhs`. 231 | */ 232 | template 233 | inline std::enable_if_t::value, T&> 234 | operator|=(T& lhs, T rhs) noexcept 235 | { 236 | return lhs = lhs | rhs; 237 | } 238 | 239 | /*! 240 | * \brief Bitwise XORs `lhs` with `rhs`. 241 | * 242 | * \tparam T The type of parameters `lhs` and `rhs`. 243 | * 244 | * \param lhs The left-hand side operand. 245 | * \param rhs The right-hand side operand. 246 | * 247 | * \return A reference to `lhs`. 248 | */ 249 | template 250 | inline std::enable_if_t::value, T&> 251 | operator^=(T& lhs, T rhs) noexcept 252 | { 253 | return lhs = lhs ^ rhs; 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/bool8x16.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include "../../../simd.hpp" 16 | #include "../../../sized_bool.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static bool8x16 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | bool8(s.data()[0]), 32 | bool8(s.data()[1]), 33 | bool8(s.data()[2]), 34 | bool8(s.data()[3]), 35 | bool8(s.data()[4]), 36 | bool8(s.data()[5]), 37 | bool8(s.data()[6]), 38 | bool8(s.data()[7]), 39 | bool8(s.data()[8]), 40 | bool8(s.data()[9]), 41 | bool8(s.data()[10]), 42 | bool8(s.data()[11]), 43 | bool8(s.data()[12]), 44 | bool8(s.data()[13]), 45 | bool8(s.data()[14]), 46 | bool8(s.data()[15]), 47 | }; 48 | } 49 | 50 | inline static bool8x16 explicit_cast(const int8x16& s) noexcept; 51 | 52 | inline static bool8x16 explicit_cast(const uint8x16& s) noexcept; 53 | 54 | public: 55 | using component_type = bool8; 56 | 57 | static constexpr int component_count = 16; 58 | 59 | static constexpr bool is_accelerated = true; 60 | 61 | simd() noexcept = default; 62 | 63 | explicit simd(bool8 x) noexcept 64 | : 65 | underlying_(_mm_set1_epi8(x)) 66 | { 67 | } 68 | 69 | template> 70 | inline simd( 71 | bool8 x, bool8 y) noexcept; 72 | 73 | template> 74 | inline simd( 75 | bool8 x, bool8 y, bool8 z, bool8 w) noexcept; 76 | 77 | template> 78 | inline simd( 79 | bool8 s0, bool8 s1, bool8 s2, bool8 s3, 80 | bool8 s4, bool8 s5, bool8 s6, bool8 s7) noexcept; 81 | 82 | template> 83 | inline simd( 84 | bool8 s0, bool8 s1, bool8 s2, bool8 s3, 85 | bool8 s4, bool8 s5, bool8 s6, bool8 s7, 86 | bool8 s8, bool8 s9, bool8 s10, bool8 s11, 87 | bool8 s12, bool8 s13, bool8 s14, bool8 s15) noexcept 88 | : 89 | underlying_(_mm_setr_epi8( 90 | s0, s1, s2, s3, s4, s5, s6, s7, 91 | s8, s9, s10, s11, s12, s13, s14, s15)) 92 | { 93 | } 94 | 95 | template 96 | explicit simd(const simd& s) noexcept 97 | { 98 | *this = explicit_cast(s); 99 | } 100 | 101 | simd(__m128i underlying) noexcept 102 | : 103 | underlying_(underlying) 104 | { 105 | } 106 | 107 | operator __m128i() const noexcept 108 | { 109 | return underlying_; 110 | } 111 | 112 | static bool8x16 zero() noexcept 113 | { 114 | return _mm_setzero_si128(); 115 | } 116 | 117 | static bool8x16 load(const bool8* data) noexcept 118 | { 119 | return _mm_load_si128(reinterpret_cast(data)); 120 | } 121 | 122 | static bool8x16 loadu(const bool8* data) noexcept 123 | { 124 | return _mm_loadu_si128(reinterpret_cast(data)); 125 | } 126 | 127 | void store(bool8* data) const noexcept 128 | { 129 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 130 | } 131 | 132 | void storeu(bool8* data) const noexcept 133 | { 134 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 135 | } 136 | 137 | const bool8* data() const noexcept 138 | { 139 | return reinterpret_cast(&underlying_); 140 | } 141 | 142 | bool8* data() noexcept 143 | { 144 | return reinterpret_cast(&underlying_); 145 | } 146 | }; 147 | } 148 | 149 | #include "int8x16.sse2.hpp" 150 | #include "uint8x16.sse2.hpp" 151 | 152 | namespace tue 153 | { 154 | inline bool8x16 bool8x16::explicit_cast(const int8x16& s) noexcept 155 | { 156 | return __m128i(s); 157 | } 158 | 159 | inline bool8x16 bool8x16::explicit_cast(const uint8x16& s) noexcept 160 | { 161 | return __m128i(s); 162 | } 163 | 164 | namespace detail_ 165 | { 166 | inline bool8x16 bitwise_not_operator_s( 167 | const bool8x16& s) noexcept 168 | { 169 | return _mm_xor_si128(s, bool8x16(true8)); 170 | } 171 | 172 | inline bool8x16 bitwise_and_operator_ss( 173 | const bool8x16& lhs, const bool8x16& rhs) noexcept 174 | { 175 | return _mm_and_si128(lhs, rhs); 176 | } 177 | 178 | inline bool8x16 bitwise_or_operator_ss( 179 | const bool8x16& lhs, const bool8x16& rhs) noexcept 180 | { 181 | return _mm_or_si128(lhs, rhs); 182 | } 183 | 184 | inline bool8x16 bitwise_xor_operator_ss( 185 | const bool8x16& lhs, const bool8x16& rhs) noexcept 186 | { 187 | return _mm_xor_si128(lhs, rhs); 188 | } 189 | 190 | inline bool8x16& bitwise_and_assignment_operator_ss( 191 | bool8x16& lhs, const bool8x16& rhs) noexcept 192 | { 193 | return lhs = _mm_and_si128(lhs, rhs); 194 | } 195 | 196 | inline bool8x16& bitwise_or_assignment_operator_ss( 197 | bool8x16& lhs, const bool8x16& rhs) noexcept 198 | { 199 | return lhs = _mm_or_si128(lhs, rhs); 200 | } 201 | 202 | inline bool8x16& bitwise_xor_assignment_operator_ss( 203 | bool8x16& lhs, const bool8x16& rhs) noexcept 204 | { 205 | return lhs = _mm_xor_si128(lhs, rhs); 206 | } 207 | 208 | inline bool equality_operator_ss( 209 | bool8x16& lhs, const bool8x16& rhs) noexcept 210 | { 211 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 212 | } 213 | 214 | inline bool inequality_operator_ss( 215 | bool8x16& lhs, const bool8x16& rhs) noexcept 216 | { 217 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 218 | } 219 | 220 | inline bool8x16 mask_ss( 221 | const bool8x16& conditions, 222 | const bool8x16& values) noexcept 223 | { 224 | return _mm_and_si128(conditions, values); 225 | } 226 | 227 | inline bool8x16 select_sss( 228 | const bool8x16& conditions, 229 | const bool8x16& values, 230 | const bool8x16& otherwise) noexcept 231 | { 232 | return _mm_or_si128( 233 | _mm_and_si128(conditions, values), 234 | _mm_andnot_si128(conditions, otherwise)); 235 | } 236 | 237 | inline bool8x16 equal_ss( 238 | const bool8x16& lhs, const bool8x16& rhs) noexcept 239 | { 240 | return _mm_cmpeq_epi8(lhs, rhs); 241 | } 242 | 243 | inline bool8x16 not_equal_ss( 244 | const bool8x16& lhs, const bool8x16& rhs) noexcept 245 | { 246 | return _mm_xor_si128(_mm_cmpeq_epi8(lhs, rhs), bool8x16(true8)); 247 | } 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /include/tue/detail_/matmult.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include "../mat.hpp" 14 | #include "../vec.hpp" 15 | 16 | namespace tue 17 | { 18 | namespace detail_ 19 | { 20 | template 21 | inline constexpr decltype(std::declval() * std::declval()) 22 | matmult_component_mv( 23 | const mat& lhs, const vec& rhs, int j) noexcept 24 | { 25 | return lhs[0][j] * rhs[0] 26 | + lhs[1][j] * rhs[1]; 27 | } 28 | 29 | template 30 | inline constexpr decltype(std::declval() * std::declval()) 31 | matmult_component_mv( 32 | const mat& lhs, const vec& rhs, int j) noexcept 33 | { 34 | return lhs[0][j] * rhs[0] 35 | + lhs[1][j] * rhs[1] 36 | + lhs[2][j] * rhs[2]; 37 | } 38 | 39 | template 40 | inline constexpr decltype(std::declval() * std::declval()) 41 | matmult_component_mv( 42 | const mat& lhs, const vec& rhs, int j) noexcept 43 | { 44 | return lhs[0][j] * rhs[0] 45 | + lhs[1][j] * rhs[1] 46 | + lhs[2][j] * rhs[2] 47 | + lhs[3][j] * rhs[3]; 48 | } 49 | 50 | template 51 | inline constexpr decltype(std::declval() * std::declval()) 52 | matmult_component_mm( 53 | const mat& lhs, const mat& rhs, 54 | int i, int j) noexcept 55 | { 56 | return lhs[0][j] * rhs[i][0] 57 | + lhs[1][j] * rhs[i][1]; 58 | } 59 | 60 | template 61 | inline constexpr decltype(std::declval() * std::declval()) 62 | matmult_component_mm( 63 | const mat& lhs, const mat& rhs, 64 | int i, int j) noexcept 65 | { 66 | return lhs[0][j] * rhs[i][0] 67 | + lhs[1][j] * rhs[i][1] 68 | + lhs[2][j] * rhs[i][2]; 69 | } 70 | 71 | template 72 | inline constexpr decltype(std::declval() * std::declval()) 73 | matmult_component_mm( 74 | const mat& lhs, const mat& rhs, 75 | int i, int j) noexcept 76 | { 77 | return lhs[0][j] * rhs[i][0] 78 | + lhs[1][j] * rhs[i][1] 79 | + lhs[2][j] * rhs[i][2] 80 | + lhs[3][j] * rhs[i][3]; 81 | } 82 | 83 | template 84 | inline constexpr vec() * std::declval()), 2> 85 | matmult_column_mm( 86 | const mat& lhs, const mat& rhs, int i) noexcept 87 | { 88 | return { 89 | tue::detail_::matmult_component_mm(lhs, rhs, i, 0), 90 | tue::detail_::matmult_component_mm(lhs, rhs, i, 1), 91 | }; 92 | } 93 | 94 | template 95 | inline constexpr vec() * std::declval()), 3> 96 | matmult_column_mm( 97 | const mat& lhs, const mat& rhs, int i) noexcept 98 | { 99 | return { 100 | tue::detail_::matmult_component_mm(lhs, rhs, i, 0), 101 | tue::detail_::matmult_component_mm(lhs, rhs, i, 1), 102 | tue::detail_::matmult_component_mm(lhs, rhs, i, 2), 103 | }; 104 | } 105 | 106 | template 107 | inline constexpr vec() * std::declval()), 4> 108 | matmult_column_mm( 109 | const mat& lhs, const mat& rhs, int i) noexcept 110 | { 111 | return { 112 | tue::detail_::matmult_component_mm(lhs, rhs, i, 0), 113 | tue::detail_::matmult_component_mm(lhs, rhs, i, 1), 114 | tue::detail_::matmult_component_mm(lhs, rhs, i, 2), 115 | tue::detail_::matmult_component_mm(lhs, rhs, i, 3), 116 | }; 117 | } 118 | 119 | template 120 | inline constexpr vec() * std::declval()), 2> 121 | multiplication_operator_vm( 122 | const vec& lhs, const mat& rhs) noexcept 123 | { 124 | return { 125 | tue::detail_::dot_vv(lhs, rhs[0]), 126 | tue::detail_::dot_vv(lhs, rhs[1]), 127 | }; 128 | } 129 | 130 | template 131 | inline constexpr vec() * std::declval()), 3> 132 | multiplication_operator_vm( 133 | const vec& lhs, const mat& rhs) noexcept 134 | { 135 | return { 136 | tue::detail_::dot_vv(lhs, rhs[0]), 137 | tue::detail_::dot_vv(lhs, rhs[1]), 138 | tue::detail_::dot_vv(lhs, rhs[2]), 139 | }; 140 | } 141 | 142 | template 143 | inline constexpr vec() * std::declval()), 4> 144 | multiplication_operator_vm( 145 | const vec& lhs, const mat& rhs) noexcept 146 | { 147 | return { 148 | tue::detail_::dot_vv(lhs, rhs[0]), 149 | tue::detail_::dot_vv(lhs, rhs[1]), 150 | tue::detail_::dot_vv(lhs, rhs[2]), 151 | tue::detail_::dot_vv(lhs, rhs[3]), 152 | }; 153 | } 154 | 155 | template 156 | inline constexpr vec() * std::declval()), 2> 157 | multiplication_operator_mv( 158 | const mat& lhs, const vec& rhs) noexcept 159 | { 160 | return { 161 | tue::detail_::matmult_component_mv(lhs, rhs, 0), 162 | tue::detail_::matmult_component_mv(lhs, rhs, 1), 163 | }; 164 | } 165 | 166 | template 167 | inline constexpr vec() * std::declval()), 3> 168 | multiplication_operator_mv( 169 | const mat& lhs, const vec& rhs) noexcept 170 | { 171 | return { 172 | tue::detail_::matmult_component_mv(lhs, rhs, 0), 173 | tue::detail_::matmult_component_mv(lhs, rhs, 1), 174 | tue::detail_::matmult_component_mv(lhs, rhs, 2), 175 | }; 176 | } 177 | 178 | template 179 | inline constexpr vec() * std::declval()), 4> 180 | multiplication_operator_mv( 181 | const mat& lhs, const vec& rhs) noexcept 182 | { 183 | return { 184 | tue::detail_::matmult_component_mv(lhs, rhs, 0), 185 | tue::detail_::matmult_component_mv(lhs, rhs, 1), 186 | tue::detail_::matmult_component_mv(lhs, rhs, 2), 187 | tue::detail_::matmult_component_mv(lhs, rhs, 3), 188 | }; 189 | } 190 | 191 | template 192 | inline constexpr mat() * std::declval()), 2, R> 194 | multiplication_operator_mm( 195 | const mat& lhs, const mat& rhs) noexcept 196 | { 197 | return { 198 | tue::detail_::matmult_column_mm(lhs, rhs, 0), 199 | tue::detail_::matmult_column_mm(lhs, rhs, 1), 200 | }; 201 | } 202 | 203 | template 204 | inline constexpr mat() * std::declval()), 3, R> 206 | multiplication_operator_mm( 207 | const mat& lhs, const mat& rhs) noexcept 208 | { 209 | return { 210 | tue::detail_::matmult_column_mm(lhs, rhs, 0), 211 | tue::detail_::matmult_column_mm(lhs, rhs, 1), 212 | tue::detail_::matmult_column_mm(lhs, rhs, 2), 213 | }; 214 | } 215 | 216 | template 217 | inline constexpr mat() * std::declval()), 4, R> 219 | multiplication_operator_mm( 220 | const mat& lhs, const mat& rhs) noexcept 221 | { 222 | return { 223 | tue::detail_::matmult_column_mm(lhs, rhs, 0), 224 | tue::detail_::matmult_column_mm(lhs, rhs, 1), 225 | tue::detail_::matmult_column_mm(lhs, rhs, 2), 226 | tue::detail_::matmult_column_mm(lhs, rhs, 3), 227 | }; 228 | } 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /tests/quat.tests.cpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #include 10 | #include "tue.tests.hpp" 11 | 12 | #include 13 | #include 14 | 15 | namespace 16 | { 17 | using namespace tue; 18 | 19 | TEST_CASE(size) 20 | { 21 | test_assert(sizeof(quat) == sizeof(short[4])); 22 | test_assert(sizeof(fquat) == sizeof(float[4])); 23 | test_assert(sizeof(dquat) == sizeof(double[4])); 24 | } 25 | 26 | TEST_CASE(alignment) 27 | { 28 | test_assert(alignof(quat) == alignof(short[4])); 29 | test_assert(alignof(fquat) == alignof(float[4])); 30 | test_assert(alignof(dquat) <= alignof(double[4])); 31 | } 32 | 33 | TEST_CASE(component_type) 34 | { 35 | test_assert(( 36 | std::is_same::component_type, short>::value)); 37 | test_assert(( 38 | std::is_same::value)); 39 | test_assert(( 40 | std::is_same::value)); 41 | } 42 | 43 | TEST_CASE(component_count) 44 | { 45 | constexpr auto qs = quat::component_count; 46 | constexpr auto fq = fquat::component_count; 47 | constexpr auto dq = dquat::component_count; 48 | test_assert(qs == 4); 49 | test_assert(fq == 4); 50 | test_assert(dq == 4); 51 | } 52 | 53 | TEST_CASE(default_constructor) 54 | { 55 | dquat q; 56 | unused(q); 57 | } 58 | 59 | TEST_CASE(individual_components_constructor) 60 | { 61 | CONST_OR_CONSTEXPR dquat q = { 1.2, 3.4, 5.6, 7.8 }; 62 | test_assert(q[0] == 1.2); 63 | test_assert(q[1] == 3.4); 64 | test_assert(q[2] == 5.6); 65 | test_assert(q[3] == 7.8); 66 | } 67 | 68 | TEST_CASE(extend_vec3_constructor) 69 | { 70 | CONST_OR_CONSTEXPR dquat q = { { 1.2, 3.4, 5.6 }, 7.8 }; 71 | test_assert(q[0] == 1.2); 72 | test_assert(q[1] == 3.4); 73 | test_assert(q[2] == 5.6); 74 | test_assert(q[3] == 7.8); 75 | } 76 | 77 | TEST_CASE(vec4_constructor) 78 | { 79 | CONST_OR_CONSTEXPR dvec4 v(1.2, 3.4, 5.6, 7.8); 80 | CONST_OR_CONSTEXPR dquat q(v); 81 | test_assert(q[0] == 1.2); 82 | test_assert(q[1] == 3.4); 83 | test_assert(q[2] == 5.6); 84 | test_assert(q[3] == 7.8); 85 | } 86 | 87 | TEST_CASE(explicit_conversion_constructor) 88 | { 89 | CONST_OR_CONSTEXPR dquat dq(1.2, 3.4, 5.6, 7.8); 90 | CONST_OR_CONSTEXPR fquat fq(dq); 91 | test_assert(fq[0] == 1.2f); 92 | test_assert(fq[1] == 3.4f); 93 | test_assert(fq[2] == 5.6f); 94 | test_assert(fq[3] == 7.8f); 95 | } 96 | 97 | TEST_CASE(implicit_conversion_operator) 98 | { 99 | CONST_OR_CONSTEXPR fquat fq(1.2f, 3.4f, 5.6f, 7.8f); 100 | CONST_OR_CONSTEXPR dquat dq = fq; 101 | test_assert(dq[0] == 1.2f); 102 | test_assert(dq[1] == 3.4f); 103 | test_assert(dq[2] == 5.6f); 104 | test_assert(dq[3] == 7.8f); 105 | } 106 | 107 | TEST_CASE(identity) 108 | { 109 | CONST_OR_CONSTEXPR auto q = dquat::identity(); 110 | test_assert(q == dquat(0.0, 0.0, 0.0, 1.0)); 111 | } 112 | 113 | TEST_CASE(subscript_operator) 114 | { 115 | CONST_OR_CONSTEXPR dquat ceq(1.2, 3.4, 5.6, 7.8); 116 | CONST_OR_CONSTEXPR auto ceq0 = ceq[0]; 117 | CONST_OR_CONSTEXPR auto ceq1 = ceq[1]; 118 | CONST_OR_CONSTEXPR auto ceq2 = ceq[2]; 119 | CONST_OR_CONSTEXPR auto ceq3 = ceq[3]; 120 | test_assert(ceq0 == 1.2); 121 | test_assert(ceq1 == 3.4); 122 | test_assert(ceq2 == 5.6); 123 | test_assert(ceq3 == 7.8); 124 | 125 | const auto cq = ceq; 126 | const auto& cq0 = cq[0]; 127 | const auto& cq1 = cq[1]; 128 | const auto& cq2 = cq[2]; 129 | const auto& cq3 = cq[3]; 130 | test_assert(&cq0 == cq.data() + 0); 131 | test_assert(&cq1 == cq.data() + 1); 132 | test_assert(&cq2 == cq.data() + 2); 133 | test_assert(&cq3 == cq.data() + 3); 134 | 135 | auto q = ceq; 136 | auto& q0 = q[0]; 137 | auto& q1 = q[1]; 138 | auto& q2 = q[2]; 139 | auto& q3 = q[3]; 140 | test_assert(&q0 == q.data() + 0); 141 | test_assert(&q1 == q.data() + 1); 142 | test_assert(&q2 == q.data() + 2); 143 | test_assert(&q3 == q.data() + 3); 144 | } 145 | 146 | TEST_CASE(data) 147 | { 148 | const dquat dq(1.2, 3.4, 5.6, 7.8); 149 | test_assert(dq.data() == static_cast(&dq)); 150 | 151 | dquat q; 152 | test_assert(q.data() == static_cast(&q)); 153 | } 154 | 155 | TEST_CASE(x_y_z_w) 156 | { 157 | CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8); 158 | CONST_OR_CONSTEXPR auto x = q.x(); 159 | CONST_OR_CONSTEXPR auto y = q.y(); 160 | CONST_OR_CONSTEXPR auto z = q.z(); 161 | CONST_OR_CONSTEXPR auto w = q.w(); 162 | test_assert(x == 1.2); 163 | test_assert(y == 3.4); 164 | test_assert(z == 5.6); 165 | test_assert(w == 7.8); 166 | } 167 | 168 | TEST_CASE(xyz) 169 | { 170 | CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8); 171 | CONST_OR_CONSTEXPR auto xyz = q.xyz(); 172 | test_assert(xyz == dvec3(1.2, 3.4, 5.6)); 173 | } 174 | 175 | TEST_CASE(xyzw) 176 | { 177 | CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8); 178 | CONST_OR_CONSTEXPR auto xyzw = q.xyzw(); 179 | test_assert(xyzw == dvec4(1.2, 3.4, 5.6, 7.8)); 180 | } 181 | 182 | TEST_CASE(set_x_y_z_w) 183 | { 184 | dquat q(1.2, 3.4, 5.6, 7.8); 185 | q.set_x(9.10); 186 | q.set_y(11.12); 187 | q.set_z(13.14); 188 | q.set_w(15.16); 189 | test_assert(q[0] == 9.10); 190 | test_assert(q[1] == 11.12); 191 | test_assert(q[2] == 13.14); 192 | test_assert(q[3] == 15.16); 193 | } 194 | 195 | TEST_CASE(set_xyz) 196 | { 197 | dquat q(1.2, 3.4, 5.6, 7.8); 198 | 199 | q.set_xyz(9.10, 11.12, 13.14); 200 | test_assert(q[0] == 9.10); 201 | test_assert(q[1] == 11.12); 202 | test_assert(q[2] == 13.14); 203 | test_assert(q[3] == 7.8); 204 | 205 | q.set_xyz(dvec3(15.16, 17.18, 19.20)); 206 | test_assert(q[0] == 15.16); 207 | test_assert(q[1] == 17.18); 208 | test_assert(q[2] == 19.20); 209 | test_assert(q[3] == 7.8); 210 | } 211 | 212 | TEST_CASE(set_xyzw) 213 | { 214 | dquat q(1.2, 3.4, 5.6, 7.8); 215 | 216 | q.set_xyzw(9.10, 11.12, 13.14, 15.16); 217 | test_assert(q[0] == 9.10); 218 | test_assert(q[1] == 11.12); 219 | test_assert(q[2] == 13.14); 220 | test_assert(q[3] == 15.16); 221 | 222 | q.set_xyzw(dvec3(17.18, 19.20, 21.22), 23.24); 223 | test_assert(q[0] == 17.18); 224 | test_assert(q[1] == 19.20); 225 | test_assert(q[2] == 21.22); 226 | test_assert(q[3] == 23.24); 227 | 228 | q.set_xyzw(dvec4(25.26, 27.28, 29.30, 31.32)); 229 | test_assert(q[0] == 25.26); 230 | test_assert(q[1] == 27.28); 231 | test_assert(q[2] == 29.30); 232 | test_assert(q[3] == 31.32); 233 | } 234 | 235 | TEST_CASE(v) 236 | { 237 | CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8); 238 | CONST_OR_CONSTEXPR auto v = q.v(); 239 | test_assert(v == dvec3(1.2, 3.4, 5.6)); 240 | } 241 | 242 | TEST_CASE(s) 243 | { 244 | CONST_OR_CONSTEXPR dquat q(1.2, 3.4, 5.6, 7.8); 245 | CONST_OR_CONSTEXPR auto s = q.s(); 246 | test_assert(s == 7.8); 247 | } 248 | 249 | TEST_CASE(set_v) 250 | { 251 | dquat q(1.2, 3.4, 5.6, 7.8); 252 | 253 | q.set_v(9.10, 11.12, 13.14); 254 | test_assert(q[0] == 9.10); 255 | test_assert(q[1] == 11.12); 256 | test_assert(q[2] == 13.14); 257 | test_assert(q[3] == 7.8); 258 | 259 | q.set_v(dvec3(15.16, 17.18, 19.20)); 260 | test_assert(q[0] == 15.16); 261 | test_assert(q[1] == 17.18); 262 | test_assert(q[2] == 19.20); 263 | test_assert(q[3] == 7.8); 264 | } 265 | 266 | TEST_CASE(set_s) 267 | { 268 | dquat q(1.2, 3.4, 5.6, 7.8); 269 | q.set_s(9.10); 270 | test_assert(q[0] == 1.2); 271 | test_assert(q[1] == 3.4); 272 | test_assert(q[2] == 5.6); 273 | test_assert(q[3] == 9.10); 274 | } 275 | 276 | TEST_CASE(multiplication_assignment_operator) 277 | { 278 | const dquat q1(1.2, 3.4, 5.6, 7.8); 279 | const fquat q2(9.10f, 11.12f, 13.14f, 15.16f); 280 | 281 | auto q3 = q1; 282 | test_assert(&(q3 *= q2) == &q3); 283 | test_assert(q3 == q1 * q2); 284 | } 285 | 286 | TEST_CASE(multiplication_operator) 287 | { 288 | CONST_OR_CONSTEXPR dquat q1(1.2, 3.4, 5.6, 7.8); 289 | CONST_OR_CONSTEXPR fquat q2(9.10f, 11.12f, 13.14f, 15.16f); 290 | CONST_OR_CONSTEXPR auto q3 = q1 * q2; 291 | 292 | test_assert( 293 | q3.v() == 294 | q2.s()*q1.v() + q1.s()*q2.v() + math::cross(q2.v(), q1.v())); 295 | 296 | test_assert(nearly_equal( 297 | q3.s(), 298 | q2.s()*q1.s() - math::dot(q2.v(), q1.v()))); 299 | } 300 | 301 | TEST_CASE(vec_multiplication_operator) 302 | { 303 | CONST_OR_CONSTEXPR dvec3 v1(1.2, 3.4, 5.6); 304 | CONST_OR_CONSTEXPR fquat q(7.8f, 9.10f, 11.12f, 13.14f); 305 | CONST_OR_CONSTEXPR auto v2 = v1 * q; 306 | test_assert(v2 == (q * dquat(v1, 0.0) * dquat(-q.v(), q.s())).v()); 307 | } 308 | 309 | TEST_CASE(equality_operator) 310 | { 311 | CONST_OR_CONSTEXPR fquat q1(1.2f, 3.4f, 5.6f, 7.8f); 312 | CONST_OR_CONSTEXPR dquat q2(1.2f, 3.4f, 5.6f, 7.8f); 313 | CONST_OR_CONSTEXPR dquat q3(1.2f, 3.4f, 5.6f, 0.0f); 314 | CONST_OR_CONSTEXPR dquat q4(1.2f, 3.4f, 0.0f, 7.8f); 315 | CONST_OR_CONSTEXPR dquat q5(1.2f, 0.0f, 5.6f, 7.8f); 316 | CONST_OR_CONSTEXPR dquat q6(0.0f, 3.4f, 5.6f, 7.8f); 317 | CONST_OR_CONSTEXPR auto result1 = (q1 == q2); 318 | CONST_OR_CONSTEXPR auto result2 = (q1 == q3); 319 | CONST_OR_CONSTEXPR auto result3 = (q1 == q4); 320 | CONST_OR_CONSTEXPR auto result4 = (q1 == q5); 321 | CONST_OR_CONSTEXPR auto result5 = (q1 == q6); 322 | test_assert(result1 == true); 323 | test_assert(result2 == false); 324 | test_assert(result3 == false); 325 | test_assert(result4 == false); 326 | test_assert(result5 == false); 327 | } 328 | 329 | TEST_CASE(inequality_operator) 330 | { 331 | CONST_OR_CONSTEXPR fquat q1(1.2f, 3.4f, 5.6f, 7.8f); 332 | CONST_OR_CONSTEXPR dquat q2(1.2f, 3.4f, 5.6f, 7.8f); 333 | CONST_OR_CONSTEXPR dquat q3(1.2f, 3.4f, 5.6f, 0.0f); 334 | CONST_OR_CONSTEXPR dquat q4(1.2f, 3.4f, 0.0f, 7.8f); 335 | CONST_OR_CONSTEXPR dquat q5(1.2f, 0.0f, 5.6f, 7.8f); 336 | CONST_OR_CONSTEXPR dquat q6(0.0f, 3.4f, 5.6f, 7.8f); 337 | CONST_OR_CONSTEXPR auto result1 = (q1 != q2); 338 | CONST_OR_CONSTEXPR auto result2 = (q1 != q3); 339 | CONST_OR_CONSTEXPR auto result3 = (q1 != q4); 340 | CONST_OR_CONSTEXPR auto result4 = (q1 != q5); 341 | CONST_OR_CONSTEXPR auto result5 = (q1 != q6); 342 | test_assert(result1 == false); 343 | test_assert(result2 == true); 344 | test_assert(result3 == true); 345 | test_assert(result4 == true); 346 | test_assert(result5 == true); 347 | } 348 | 349 | TEST_CASE(normalize) 350 | { 351 | const auto q = math::normalize(dquat(1.2, 3.4, 5.6, 7.8)); 352 | test_assert(q == dquat(math::normalize(q.xyzw()))); 353 | } 354 | 355 | TEST_CASE(conjugate) 356 | { 357 | CONST_OR_CONSTEXPR auto q = math::conjugate(dquat(1.2, 3.4, 5.6, 7.8)); 358 | test_assert(q == dquat(-1.2, -3.4, -5.6, 7.8)); 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/uint64x2.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "../../../simd.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static uint64x2 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | std::uint64_t(s.data()[0]), 32 | std::uint64_t(s.data()[1]), 33 | }; 34 | } 35 | 36 | inline static uint64x2 explicit_cast(const bool64x2& s) noexcept; 37 | 38 | inline static uint64x2 explicit_cast(const int64x2& s) noexcept; 39 | 40 | public: 41 | using component_type = std::uint64_t; 42 | 43 | static constexpr int component_count = 2; 44 | 45 | static constexpr bool is_accelerated = true; 46 | 47 | simd() noexcept = default; 48 | 49 | explicit simd(std::uint64_t x) noexcept 50 | : 51 | underlying_(_mm_set1_epi64x(x)) 52 | { 53 | } 54 | 55 | template> 56 | inline simd( 57 | std::uint64_t x, std::uint64_t y) noexcept 58 | : 59 | underlying_(_mm_set_epi64x(y, x)) 60 | { 61 | } 62 | 63 | template> 64 | inline simd( 65 | std::uint64_t x, std::uint64_t y, 66 | std::uint64_t z, std::uint64_t w) noexcept; 67 | 68 | template> 69 | inline simd( 70 | std::uint64_t s0, std::uint64_t s1, 71 | std::uint64_t s2, std::uint64_t s3, 72 | std::uint64_t s4, std::uint64_t s5, 73 | std::uint64_t s6, std::uint64_t s7) noexcept; 74 | 75 | template> 76 | inline simd( 77 | std::uint64_t s0, std::uint64_t s1, 78 | std::uint64_t s2, std::uint64_t s3, 79 | std::uint64_t s4, std::uint64_t s5, 80 | std::uint64_t s6, std::uint64_t s7, 81 | std::uint64_t s8, std::uint64_t s9, 82 | std::uint64_t s10, std::uint64_t s11, 83 | std::uint64_t s12, std::uint64_t s13, 84 | std::uint64_t s14, std::uint64_t s15) noexcept; 85 | 86 | template 87 | explicit simd(const simd& s) noexcept 88 | { 89 | *this = explicit_cast(s); 90 | } 91 | 92 | simd(__m128i underlying) noexcept 93 | : 94 | underlying_(underlying) 95 | { 96 | } 97 | 98 | operator __m128i() const noexcept 99 | { 100 | return underlying_; 101 | } 102 | 103 | static uint64x2 zero() noexcept 104 | { 105 | return _mm_setzero_si128(); 106 | } 107 | 108 | static uint64x2 load(const std::uint64_t* data) noexcept 109 | { 110 | return _mm_load_si128(reinterpret_cast(data)); 111 | } 112 | 113 | static uint64x2 loadu(const std::uint64_t* data) noexcept 114 | { 115 | return _mm_loadu_si128(reinterpret_cast(data)); 116 | } 117 | 118 | void store(std::uint64_t* data) const noexcept 119 | { 120 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 121 | } 122 | 123 | void storeu(std::uint64_t* data) const noexcept 124 | { 125 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 126 | } 127 | 128 | const std::uint64_t* data() const noexcept 129 | { 130 | return reinterpret_cast(&underlying_); 131 | } 132 | 133 | std::uint64_t* data() noexcept 134 | { 135 | return reinterpret_cast(&underlying_); 136 | } 137 | }; 138 | } 139 | 140 | #include "bool64x2.sse2.hpp" 141 | #include "int64x2.sse2.hpp" 142 | 143 | namespace tue 144 | { 145 | inline uint64x2 uint64x2::explicit_cast(const bool64x2& s) noexcept 146 | { 147 | return __m128i(s); 148 | } 149 | 150 | inline uint64x2 uint64x2::explicit_cast(const int64x2& s) noexcept 151 | { 152 | return __m128i(s); 153 | } 154 | 155 | namespace detail_ 156 | { 157 | inline uint64x2& pre_increment_operator_s(uint64x2& s) noexcept 158 | { 159 | return s = _mm_add_epi64(s, uint64x2(1)); 160 | } 161 | 162 | inline uint64x2 post_increment_operator_s(uint64x2& s) noexcept 163 | { 164 | const auto result = s; 165 | s = _mm_add_epi64(s, uint64x2(1)); 166 | return result; 167 | } 168 | 169 | inline uint64x2& pre_decrement_operator_s(uint64x2& s) noexcept 170 | { 171 | return s = _mm_sub_epi64(s, uint64x2(1)); 172 | } 173 | 174 | inline uint64x2 post_decrement_operator_s(uint64x2& s) noexcept 175 | { 176 | const auto result = s; 177 | s = _mm_sub_epi64(s, uint64x2(1)); 178 | return result; 179 | } 180 | 181 | inline uint64x2 bitwise_not_operator_s(const uint64x2& s) noexcept 182 | { 183 | return _mm_xor_si128(s, uint64x2(~0ull)); 184 | } 185 | 186 | inline uint64x2 addition_operator_ss( 187 | const uint64x2& lhs, const uint64x2& rhs) noexcept 188 | { 189 | return _mm_add_epi64(lhs, rhs); 190 | } 191 | 192 | inline uint64x2 subtraction_operator_ss( 193 | const uint64x2& lhs, const uint64x2& rhs) noexcept 194 | { 195 | return _mm_sub_epi64(lhs, rhs); 196 | } 197 | 198 | /*inline uint64x2 multiplication_operator_ss( 199 | const uint64x2& lhs, const uint64x2& rhs) noexcept 200 | { 201 | // TODO 202 | } 203 | 204 | inline uint64x2 division_operator_ss( 205 | const uint64x2& lhs, const uint64x2& rhs) noexcept 206 | { 207 | // TODO 208 | } 209 | 210 | inline uint64x2 modulo_operator_ss( 211 | const uint64x2& lhs, const uint64x2& rhs) noexcept 212 | { 213 | // TODO 214 | }*/ 215 | 216 | inline uint64x2 bitwise_and_operator_ss( 217 | const uint64x2& lhs, const uint64x2& rhs) noexcept 218 | { 219 | return _mm_and_si128(lhs, rhs); 220 | } 221 | 222 | inline uint64x2 bitwise_or_operator_ss( 223 | const uint64x2& lhs, const uint64x2& rhs) noexcept 224 | { 225 | return _mm_or_si128(lhs, rhs); 226 | } 227 | 228 | inline uint64x2 bitwise_xor_operator_ss( 229 | const uint64x2& lhs, const uint64x2& rhs) noexcept 230 | { 231 | return _mm_xor_si128(lhs, rhs); 232 | } 233 | 234 | inline uint64x2 bitwise_shift_left_operator_si( 235 | const uint64x2& lhs, int rhs) noexcept 236 | { 237 | return _mm_slli_epi64(lhs, rhs); 238 | } 239 | 240 | inline uint64x2 bitwise_shift_right_operator_si( 241 | const uint64x2& lhs, int rhs) noexcept 242 | { 243 | return _mm_srli_epi64(lhs, rhs); 244 | } 245 | 246 | inline uint64x2& addition_assignment_operator_ss( 247 | uint64x2& lhs, const uint64x2& rhs) noexcept 248 | { 249 | return lhs = _mm_add_epi64(lhs, rhs); 250 | } 251 | 252 | inline uint64x2& subtraction_assignment_operator_ss( 253 | uint64x2& lhs, const uint64x2& rhs) noexcept 254 | { 255 | return lhs = _mm_sub_epi64(lhs, rhs); 256 | } 257 | 258 | /*inline uint64x2& multiplication_assignment_operator_ss( 259 | uint64x2& lhs, const uint64x2& rhs) noexcept 260 | { 261 | // TODO 262 | } 263 | 264 | inline uint64x2& division_assignment_operator_ss( 265 | uint64x2& lhs, const uint64x2& rhs) noexcept 266 | { 267 | // TODO 268 | } 269 | 270 | inline uint64x2& modulo_assignment_operator_ss( 271 | uint64x2& lhs, const uint64x2& rhs) noexcept 272 | { 273 | // TODO 274 | }*/ 275 | 276 | inline uint64x2& bitwise_and_assignment_operator_ss( 277 | uint64x2& lhs, const uint64x2& rhs) noexcept 278 | { 279 | return lhs = _mm_and_si128(lhs, rhs); 280 | } 281 | 282 | inline uint64x2& bitwise_or_assignment_operator_ss( 283 | uint64x2& lhs, const uint64x2& rhs) noexcept 284 | { 285 | return lhs = _mm_or_si128(lhs, rhs); 286 | } 287 | 288 | inline uint64x2& bitwise_xor_assignment_operator_ss( 289 | uint64x2& lhs, const uint64x2& rhs) noexcept 290 | { 291 | return lhs = _mm_xor_si128(lhs, rhs); 292 | } 293 | 294 | inline uint64x2& bitwise_shift_left_assignment_operator_si( 295 | uint64x2& lhs, int rhs) noexcept 296 | { 297 | return lhs = _mm_slli_epi64(lhs, rhs); 298 | } 299 | 300 | inline uint64x2& bitwise_shift_right_assignment_operator_si( 301 | uint64x2& lhs, int rhs) noexcept 302 | { 303 | return lhs = _mm_srli_epi64(lhs, rhs); 304 | } 305 | 306 | inline bool equality_operator_ss( 307 | const uint64x2& lhs, const uint64x2& rhs) noexcept 308 | { 309 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 310 | } 311 | 312 | inline bool inequality_operator_ss( 313 | const uint64x2& lhs, const uint64x2& rhs) noexcept 314 | { 315 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 316 | } 317 | 318 | inline uint64x2 abs_s(const uint64x2& s) noexcept 319 | { 320 | return s; 321 | } 322 | 323 | /*inline uint64x2 min_ss( 324 | const uint64x2& s1, const uint64x2& s2) noexcept 325 | { 326 | // TODO 327 | } 328 | 329 | inline uint64x2 max_ss( 330 | const uint64x2& s1, const uint64x2& s2) noexcept 331 | { 332 | // TODO 333 | }*/ 334 | 335 | inline uint64x2 mask_ss( 336 | const bool64x2& conditions, 337 | const uint64x2& values) noexcept 338 | { 339 | return _mm_and_si128(conditions, values); 340 | } 341 | 342 | inline uint64x2 select_sss( 343 | const bool64x2& conditions, 344 | const uint64x2& values, 345 | const uint64x2& otherwise) noexcept 346 | { 347 | return _mm_or_si128( 348 | _mm_and_si128(conditions, values), 349 | _mm_andnot_si128(conditions, otherwise)); 350 | } 351 | 352 | /*inline bool64x2 less_ss( 353 | const uint64x2& lhs, const uint64x2& rhs) noexcept 354 | { 355 | // TODO 356 | } 357 | 358 | inline bool64x2 less_equal_ss( 359 | const uint64x2& lhs, const uint64x2& rhs) noexcept 360 | { 361 | // TODO 362 | } 363 | 364 | inline bool64x2 greater_ss( 365 | const uint64x2& lhs, const uint64x2& rhs) noexcept 366 | { 367 | // TODO 368 | } 369 | 370 | inline bool64x2 greater_equal_ss( 371 | const uint64x2& lhs, const uint64x2& rhs) noexcept 372 | { 373 | // TODO 374 | }*/ 375 | 376 | inline bool64x2 equal_ss( 377 | const uint64x2& lhs, const uint64x2& rhs) noexcept 378 | { 379 | const auto cmp = _mm_cmpeq_epi32(lhs, rhs); 380 | const auto hi = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(3, 3, 1, 1)); 381 | const auto lo = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 2, 0, 0)); 382 | return _mm_and_si128(hi, lo); 383 | } 384 | 385 | inline bool64x2 not_equal_ss( 386 | const uint64x2& lhs, const uint64x2& rhs) noexcept 387 | { 388 | return _mm_xor_si128(equal_ss(lhs, rhs), uint64x2(~0ull)); 389 | } 390 | } 391 | } 392 | -------------------------------------------------------------------------------- /tests/transform.tests.cpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #include 10 | #include "tue.tests.hpp" 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | namespace 17 | { 18 | using namespace tue; 19 | 20 | TEST_CASE(axis_angle_from_rotation_vec) 21 | { 22 | const auto aa1 = transform::axis_angle(1.2, 3.4, 5.6); 23 | test_assert(aa1.xyz() == math::normalize(dvec3(1.2, 3.4, 5.6))); 24 | test_assert(aa1.a() == math::length(dvec3(1.2, 3.4, 5.6))); 25 | 26 | const auto aa2 = transform::axis_angle(0.0, 0.0, 0.0); 27 | test_assert(aa2.xyz() == dvec3::z_axis()); 28 | test_assert(aa2.a() == 0.0); 29 | 30 | const auto aa3 = transform::axis_angle(dvec3(1.2, 3.4, 5.6)); 31 | test_assert(aa3 == aa1); 32 | 33 | const auto aa4 = transform::axis_angle(dvec3(0.0, 0.0, 0.0)); 34 | test_assert(aa4 == aa2); 35 | } 36 | 37 | TEST_CASE(rotation_vec_from_axis_angle) 38 | { 39 | CONST_OR_CONSTEXPR auto rv1 = 40 | transform::rotation_vec(1.2, 3.4, 5.6, 7.8); 41 | test_assert(rv1 == dvec3(1.2, 3.4, 5.6) * 7.8); 42 | 43 | CONST_OR_CONSTEXPR auto rv2 = 44 | transform::rotation_vec(dvec3(1.2, 3.4, 5.6), 7.8); 45 | test_assert(rv2 == rv1); 46 | 47 | CONST_OR_CONSTEXPR auto rv3 = 48 | transform::rotation_vec(dvec4(1.2, 3.4, 5.6, 7.8)); 49 | test_assert(rv3 == rv1); 50 | } 51 | 52 | TEST_CASE(rotation_quat_from_axis_angle) 53 | { 54 | const auto rq1 = transform::rotation_quat(dvec3(1.2, 3.4, 5.6), 7.8); 55 | test_assert(rq1.v() == dvec3(1.2, 3.4, 5.6) * math::sin(7.8/2)); 56 | test_assert(rq1.s() == math::cos(7.8/2)); 57 | 58 | const auto rq2 = transform::rotation_quat(1.2, 3.4, 5.6, 7.8); 59 | test_assert(rq2 == rq1); 60 | 61 | const auto rq3 = transform::rotation_quat(dvec4(1.2, 3.4, 5.6, 7.8)); 62 | test_assert(rq3 == rq1); 63 | } 64 | 65 | TEST_CASE(rotation_quat_from_rotation_vec) 66 | { 67 | const auto rq1 = transform::rotation_quat(1.2, 3.4, 5.6); 68 | test_assert(rq1 == 69 | transform::rotation_quat(transform::axis_angle(1.2, 3.4, 5.6))); 70 | 71 | const auto rq2 = transform::rotation_quat(0.0, 0.0, 0.0); 72 | test_assert(rq2 == dquat::identity()); 73 | 74 | const auto rq3 = transform::rotation_quat(dvec3(1.2, 3.4, 5.6)); 75 | test_assert(rq3 == rq1); 76 | 77 | const auto rq4 = transform::rotation_quat(dvec3(0.0, 0.0, 0.0)); 78 | test_assert(rq4 == rq2); 79 | } 80 | 81 | TEST_CASE(translation_mat_2d) 82 | { 83 | CONST_OR_CONSTEXPR auto m1 = 84 | transform::translation_mat(1.2, 3.4); 85 | test_assert(m1[0] == dvec4(1.0, 0.0, 0.0, 1.2)); 86 | test_assert(m1[1] == dvec4(0.0, 1.0, 0.0, 3.4)); 87 | test_assert(m1[2] == dvec4(0.0, 0.0, 1.0, 0.0)); 88 | test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0)); 89 | 90 | CONST_OR_CONSTEXPR auto m2 = 91 | transform::translation_mat(1.2, 3.4); 92 | test_assert(m2[0] == dvec3(1.0, 0.0, 1.2)); 93 | test_assert(m2[1] == dvec3(0.0, 1.0, 3.4)); 94 | 95 | CONST_OR_CONSTEXPR auto m3 = 96 | transform::translation_mat(dvec2(1.2, 3.4)); 97 | test_assert(m3 == m1); 98 | 99 | CONST_OR_CONSTEXPR auto m4 = 100 | transform::translation_mat(dvec2(1.2, 3.4)); 101 | test_assert(m4 == m2); 102 | } 103 | 104 | TEST_CASE(translation_mat_3d) 105 | { 106 | CONST_OR_CONSTEXPR auto m1 = 107 | transform::translation_mat(1.2, 3.4, 5.6); 108 | test_assert(m1[0] == dvec4(1.0, 0.0, 0.0, 1.2)); 109 | test_assert(m1[1] == dvec4(0.0, 1.0, 0.0, 3.4)); 110 | test_assert(m1[2] == dvec4(0.0, 0.0, 1.0, 5.6)); 111 | test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0)); 112 | 113 | CONST_OR_CONSTEXPR auto m2 = 114 | transform::translation_mat(1.2, 3.4, 5.6); 115 | test_assert(m2 == dmat3x4(m1)); 116 | 117 | CONST_OR_CONSTEXPR auto m3 = 118 | transform::translation_mat(dvec3(1.2, 3.4, 5.6)); 119 | test_assert(m3 == m1); 120 | 121 | CONST_OR_CONSTEXPR auto m4 = 122 | transform::translation_mat(dvec3(1.2, 3.4, 5.6)); 123 | test_assert(m4 == m2); 124 | } 125 | 126 | TEST_CASE(rotation_mat_2d) 127 | { 128 | const auto m1 = transform::rotation_mat(1.2); 129 | test_assert(m1[0] == dvec4(math::cos(1.2), -math::sin(1.2), 0.0, 0.0)); 130 | test_assert(m1[1] == dvec4(math::sin(1.2), math::cos(1.2), 0.0, 0.0)); 131 | test_assert(m1[2] == dvec4( 0.0 , 0.0 , 1.0, 0.0)); 132 | test_assert(m1[3] == dvec4( 0.0 , 0.0 , 0.0, 1.0)); 133 | 134 | const auto m2 = transform::rotation_mat(1.2); 135 | test_assert(m2 == dmat2x2(m1)); 136 | } 137 | 138 | TEST_CASE(rotation_mat_from_axis_angle) 139 | { 140 | double s, c; 141 | math::sincos(7.8, s, c); 142 | 143 | const auto m1 = transform::rotation_mat(1.2, 3.4, 5.6, 7.8); 144 | test_assert(nearly_equal(m1[0][0], 1.2*1.2*(1-c) + c)); 145 | test_assert(nearly_equal(m1[0][1], 1.2*3.4*(1-c) - 5.6*s)); 146 | test_assert(nearly_equal(m1[0][2], 1.2*5.6*(1-c) + 3.4*s)); 147 | test_assert(nearly_equal(m1[0][3], 0.0)); 148 | test_assert(nearly_equal(m1[1][0], 1.2*3.4*(1-c) + 5.6*s)); 149 | test_assert(nearly_equal(m1[1][1], 3.4*3.4*(1-c) + c)); 150 | test_assert(nearly_equal(m1[1][2], 3.4*5.6*(1-c) - 1.2*s)); 151 | test_assert(nearly_equal(m1[1][3], 0.0)); 152 | test_assert(nearly_equal(m1[2][0], 1.2*5.6*(1-c) - 3.4*s)); 153 | test_assert(nearly_equal(m1[2][1], 3.4*5.6*(1-c) + 1.2*s)); 154 | test_assert(nearly_equal(m1[2][2], 5.6*5.6*(1-c) + c)); 155 | test_assert(nearly_equal(m1[2][3], 0.0)); 156 | test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0)); 157 | 158 | const auto m2 = transform::rotation_mat( 159 | 1.2, 3.4, 5.6, 7.8); 160 | test_assert(m2 == dmat3x3(m1)); 161 | 162 | const auto m3 = transform::rotation_mat( 163 | dvec3(1.2, 3.4, 5.6), 7.8); 164 | test_assert(m3 == m1); 165 | 166 | const auto m4 = transform::rotation_mat( 167 | dvec3(1.2, 3.4, 5.6), 7.8); 168 | test_assert(m4 == m2); 169 | 170 | const auto m5 = transform::rotation_mat( 171 | dvec4(1.2, 3.4, 5.6, 7.8)); 172 | test_assert(m5 == m1); 173 | 174 | const auto m6 = transform::rotation_mat( 175 | dvec4(1.2, 3.4, 5.6, 7.8)); 176 | test_assert(m6 == m2); 177 | } 178 | 179 | TEST_CASE(rotation_mat_from_rotation_vec) 180 | { 181 | const auto m1 = transform::rotation_mat( 182 | 1.2, 3.4, 5.6); 183 | test_assert(m1 == 184 | transform::rotation_mat(transform::axis_angle(1.2, 3.4, 5.6))); 185 | 186 | const auto m2 = transform::rotation_mat( 187 | 1.2, 3.4, 5.6); 188 | test_assert(m2 == dmat3x3(m1)); 189 | 190 | const auto m3 = transform::rotation_mat( 191 | dvec3(1.2, 3.4, 5.6)); 192 | test_assert(m3 == m1); 193 | 194 | const auto m4 = transform::rotation_mat( 195 | dvec3(1.2, 3.4, 5.6)); 196 | test_assert(m4 == m2); 197 | } 198 | 199 | TEST_CASE(rotation_mat_from_rotation_quat) 200 | { 201 | CONST_OR_CONSTEXPR auto x = 1.2, y = 3.4, z = 5.6, w = 7.8; 202 | CONST_OR_CONSTEXPR auto m1 = 203 | transform::rotation_mat(dquat(x, y, z, w)); 204 | test_assert(m1[0][0] == 1 - 2*y*y - 2*z*z); 205 | test_assert(m1[0][1] == 2*x*y - 2*z*w); 206 | test_assert(m1[0][2] == 2*x*z + 2*y*w); 207 | test_assert(m1[0][3] == 0.0); 208 | test_assert(m1[1][0] == 2*x*y + 2*z*w); 209 | test_assert(m1[1][1] == 1 - 2*x*x - 2*z*z); 210 | test_assert(m1[1][2] == 2*y*z - 2*x*w); 211 | test_assert(m1[1][3] == 0.0); 212 | test_assert(m1[2][0] == 2*x*z - 2*y*w); 213 | test_assert(m1[2][1] == 2*y*z + 2*x*w); 214 | test_assert(m1[2][2] == 1 - 2*x*x - 2*y*y); 215 | test_assert(m1[2][3] == 0.0); 216 | test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0)); 217 | 218 | CONST_OR_CONSTEXPR auto m2 = 219 | transform::rotation_mat(dquat(x, y, z, w)); 220 | test_assert(m2 == dmat3x3(m1)); 221 | } 222 | 223 | TEST_CASE(scale_mat_2d) 224 | { 225 | CONST_OR_CONSTEXPR auto m1 = 226 | transform::scale_mat(1.2, 3.4); 227 | test_assert(m1[0] == dvec4(1.2, 0.0, 0.0, 0.0)); 228 | test_assert(m1[1] == dvec4(0.0, 3.4, 0.0, 0.0)); 229 | test_assert(m1[2] == dvec4(0.0, 0.0, 1.0, 0.0)); 230 | test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0)); 231 | 232 | CONST_OR_CONSTEXPR auto m2 = 233 | transform::scale_mat(1.2, 3.4); 234 | test_assert(m2 == dmat2x2(m2)); 235 | 236 | CONST_OR_CONSTEXPR auto m3 = 237 | transform::scale_mat(dvec2(1.2, 3.4)); 238 | test_assert(m3 == m1); 239 | 240 | CONST_OR_CONSTEXPR auto m4 = 241 | transform::scale_mat(dvec2(1.2, 3.4)); 242 | test_assert(m4 == m2); 243 | } 244 | 245 | TEST_CASE(scale_mat_3d) 246 | { 247 | CONST_OR_CONSTEXPR auto m1 = 248 | transform::scale_mat(1.2, 3.4, 5.6); 249 | test_assert(m1[0] == dvec4(1.2, 0.0, 0.0, 0.0)); 250 | test_assert(m1[1] == dvec4(0.0, 3.4, 0.0, 0.0)); 251 | test_assert(m1[2] == dvec4(0.0, 0.0, 5.6, 0.0)); 252 | test_assert(m1[3] == dvec4(0.0, 0.0, 0.0, 1.0)); 253 | 254 | CONST_OR_CONSTEXPR auto m2 = 255 | transform::scale_mat(1.2, 3.4, 5.6); 256 | test_assert(m2 == dmat3x3(m1)); 257 | 258 | CONST_OR_CONSTEXPR auto m3 = 259 | transform::scale_mat(dvec3(1.2, 3.4, 5.6)); 260 | test_assert(m3 == m1); 261 | 262 | CONST_OR_CONSTEXPR auto m4 = 263 | transform::scale_mat(dvec3(1.2, 3.4, 5.6)); 264 | test_assert(m4 == m2); 265 | } 266 | 267 | TEST_CASE(perspective_mat) 268 | { 269 | const auto m1 = transform::perspective_mat(1.2, 3.4, 5.6, 7.8); 270 | test_assert(nearly_equal( 271 | m1[0][0], math::cos(1.2/2) / math::sin(1.2/2) / 3.4)); 272 | test_assert(m1[0][1] == 0.0); 273 | test_assert(m1[0][2] == 0.0); 274 | test_assert(m1[0][3] == 0.0); 275 | test_assert(m1[1][0] == 0.0); 276 | test_assert(nearly_equal( 277 | m1[1][1], math::cos(1.2/2) / math::sin(1.2/2))); 278 | test_assert(m1[1][2] == 0.0); 279 | test_assert(m1[1][3] == 0.0); 280 | test_assert(m1[2][0] == 0.0); 281 | test_assert(m1[2][1] == 0.0); 282 | test_assert(m1[2][2] == (5.6+7.8) / (5.6-7.8)); 283 | test_assert(m1[2][3] == 2 * (5.6*7.8) / (5.6-7.8)); 284 | test_assert(m1[3][0] == 0.0); 285 | test_assert(m1[3][1] == 0.0); 286 | test_assert(m1[3][2] == -1.0); 287 | test_assert(m1[3][3] == 0.0); 288 | 289 | const auto m2 = transform::perspective_mat( 290 | 1.2, 3.4, 5.6, 7.8); 291 | test_assert(m2 == m1); 292 | } 293 | 294 | TEST_CASE(ortho_mat) 295 | { 296 | CONST_OR_CONSTEXPR auto m1 = transform::ortho_mat(1.2, 3.4, 5.6, 7.8); 297 | test_assert(m1[0][0] == 2 / 1.2); 298 | test_assert(m1[0][1] == 0.0); 299 | test_assert(m1[0][2] == 0.0); 300 | test_assert(m1[0][3] == 0.0); 301 | test_assert(m1[1][0] == 0.0); 302 | test_assert(m1[1][1] == 2 / 3.4); 303 | test_assert(m1[1][2] == 0.0); 304 | test_assert(m1[1][3] == 0.0); 305 | test_assert(m1[2][0] == 0.0); 306 | test_assert(m1[2][1] == 0.0); 307 | test_assert(m1[2][2] == 2 / (5.6-7.8)); 308 | test_assert(m1[2][3] == (5.6+7.8) / (5.6-7.8)); 309 | test_assert(m1[3][0] == 0.0); 310 | test_assert(m1[3][1] == 0.0); 311 | test_assert(m1[3][2] == 0.0); 312 | test_assert(m1[3][3] == 1.0); 313 | 314 | const auto m2 = transform::ortho_mat(1.2, 3.4, 5.6, 7.8); 315 | test_assert(m2 == dmat3x4(m2)); 316 | } 317 | } 318 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/uint16x8.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "../../../simd.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static uint16x8 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | std::uint16_t(s.data()[0]), 32 | std::uint16_t(s.data()[1]), 33 | std::uint16_t(s.data()[2]), 34 | std::uint16_t(s.data()[3]), 35 | std::uint16_t(s.data()[4]), 36 | std::uint16_t(s.data()[5]), 37 | std::uint16_t(s.data()[6]), 38 | std::uint16_t(s.data()[7]), 39 | }; 40 | } 41 | 42 | inline static uint16x8 explicit_cast(const bool16x8& s) noexcept; 43 | 44 | inline static uint16x8 explicit_cast(const int16x8& s) noexcept; 45 | 46 | public: 47 | using component_type = std::uint16_t; 48 | 49 | static constexpr int component_count = 8; 50 | 51 | static constexpr bool is_accelerated = true; 52 | 53 | simd() noexcept = default; 54 | 55 | explicit simd(std::uint16_t x) noexcept 56 | : 57 | underlying_(_mm_set1_epi16(x)) 58 | { 59 | } 60 | 61 | template> 62 | inline simd( 63 | std::uint16_t x, std::uint16_t y) noexcept; 64 | 65 | template> 66 | inline simd( 67 | std::uint16_t x, std::uint16_t y, 68 | std::uint16_t z, std::uint16_t w) noexcept; 69 | 70 | template> 71 | inline simd( 72 | std::uint16_t s0, std::uint16_t s1, 73 | std::uint16_t s2, std::uint16_t s3, 74 | std::uint16_t s4, std::uint16_t s5, 75 | std::uint16_t s6, std::uint16_t s7) noexcept 76 | : 77 | underlying_(_mm_setr_epi16( 78 | s0, s1, s2, s3, s4, s5, s6, s7)) 79 | { 80 | } 81 | 82 | template> 83 | inline simd( 84 | std::uint16_t s0, std::uint16_t s1, 85 | std::uint16_t s2, std::uint16_t s3, 86 | std::uint16_t s4, std::uint16_t s5, 87 | std::uint16_t s6, std::uint16_t s7, 88 | std::uint16_t s8, std::uint16_t s9, 89 | std::uint16_t s10, std::uint16_t s11, 90 | std::uint16_t s12, std::uint16_t s13, 91 | std::uint16_t s14, std::uint16_t s15) noexcept; 92 | 93 | template 94 | explicit simd(const simd& s) noexcept 95 | { 96 | *this = explicit_cast(s); 97 | } 98 | 99 | simd(__m128i underlying) noexcept 100 | : 101 | underlying_(underlying) 102 | { 103 | } 104 | 105 | operator __m128i() const noexcept 106 | { 107 | return underlying_; 108 | } 109 | 110 | static uint16x8 zero() noexcept 111 | { 112 | return _mm_setzero_si128(); 113 | } 114 | 115 | static uint16x8 load(const std::uint16_t* data) noexcept 116 | { 117 | return _mm_load_si128(reinterpret_cast(data)); 118 | } 119 | 120 | static uint16x8 loadu(const std::uint16_t* data) noexcept 121 | { 122 | return _mm_loadu_si128(reinterpret_cast(data)); 123 | } 124 | 125 | void store(std::uint16_t* data) const noexcept 126 | { 127 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 128 | } 129 | 130 | void storeu(std::uint16_t* data) const noexcept 131 | { 132 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 133 | } 134 | 135 | const std::uint16_t* data() const noexcept 136 | { 137 | return reinterpret_cast(&underlying_); 138 | } 139 | 140 | std::uint16_t* data() noexcept 141 | { 142 | return reinterpret_cast(&underlying_); 143 | } 144 | }; 145 | } 146 | 147 | #include "bool16x8.sse2.hpp" 148 | #include "int16x8.sse2.hpp" 149 | 150 | namespace tue 151 | { 152 | inline uint16x8 uint16x8::explicit_cast(const bool16x8& s) noexcept 153 | { 154 | return __m128i(s); 155 | } 156 | 157 | inline uint16x8 uint16x8::explicit_cast(const int16x8& s) noexcept 158 | { 159 | return __m128i(s); 160 | } 161 | 162 | namespace detail_ 163 | { 164 | inline uint16x8& pre_increment_operator_s(uint16x8& s) noexcept 165 | { 166 | return s = _mm_add_epi16(s, uint16x8(1)); 167 | } 168 | 169 | inline uint16x8 post_increment_operator_s(uint16x8& s) noexcept 170 | { 171 | const auto result = s; 172 | s = _mm_add_epi16(s, uint16x8(1)); 173 | return result; 174 | } 175 | 176 | inline uint16x8& pre_decrement_operator_s(uint16x8& s) noexcept 177 | { 178 | return s = _mm_sub_epi16(s, uint16x8(1)); 179 | } 180 | 181 | inline uint16x8 post_decrement_operator_s(uint16x8& s) noexcept 182 | { 183 | const auto result = s; 184 | s = _mm_sub_epi16(s, uint16x8(1)); 185 | return result; 186 | } 187 | 188 | inline uint16x8 bitwise_not_operator_s(const uint16x8& s) noexcept 189 | { 190 | return _mm_xor_si128(s, uint16x8(0xFFFF)); 191 | } 192 | 193 | inline uint16x8 addition_operator_ss( 194 | const uint16x8& lhs, const uint16x8& rhs) noexcept 195 | { 196 | return _mm_add_epi16(lhs, rhs); 197 | } 198 | 199 | inline uint16x8 subtraction_operator_ss( 200 | const uint16x8& lhs, const uint16x8& rhs) noexcept 201 | { 202 | return _mm_sub_epi16(lhs, rhs); 203 | } 204 | 205 | /*inline uint16x8 multiplication_operator_ss( 206 | const uint16x8& lhs, const uint16x8& rhs) noexcept 207 | { 208 | // TODO 209 | } 210 | 211 | inline uint16x8 division_operator_ss( 212 | const uint16x8& lhs, const uint16x8& rhs) noexcept 213 | { 214 | // TODO 215 | } 216 | 217 | inline uint16x8 modulo_operator_ss( 218 | const uint16x8& lhs, const uint16x8& rhs) noexcept 219 | { 220 | // TODO 221 | }*/ 222 | 223 | inline uint16x8 bitwise_and_operator_ss( 224 | const uint16x8& lhs, const uint16x8& rhs) noexcept 225 | { 226 | return _mm_and_si128(lhs, rhs); 227 | } 228 | 229 | inline uint16x8 bitwise_or_operator_ss( 230 | const uint16x8& lhs, const uint16x8& rhs) noexcept 231 | { 232 | return _mm_or_si128(lhs, rhs); 233 | } 234 | 235 | inline uint16x8 bitwise_xor_operator_ss( 236 | const uint16x8& lhs, const uint16x8& rhs) noexcept 237 | { 238 | return _mm_xor_si128(lhs, rhs); 239 | } 240 | 241 | inline uint16x8 bitwise_shift_left_operator_si( 242 | const uint16x8& lhs, int rhs) noexcept 243 | { 244 | return _mm_slli_epi16(lhs, rhs); 245 | } 246 | 247 | inline uint16x8 bitwise_shift_right_operator_si( 248 | const uint16x8& lhs, int rhs) noexcept 249 | { 250 | return _mm_srli_epi16(lhs, rhs); 251 | } 252 | 253 | inline uint16x8& addition_assignment_operator_ss( 254 | uint16x8& lhs, const uint16x8& rhs) noexcept 255 | { 256 | return lhs = _mm_add_epi16(lhs, rhs); 257 | } 258 | 259 | inline uint16x8& subtraction_assignment_operator_ss( 260 | uint16x8& lhs, const uint16x8& rhs) noexcept 261 | { 262 | return lhs = _mm_sub_epi16(lhs, rhs); 263 | } 264 | 265 | /*inline uint16x8& multiplication_assignment_operator_ss( 266 | uint16x8& lhs, const uint16x8& rhs) noexcept 267 | { 268 | // TODO 269 | } 270 | 271 | inline uint16x8& division_assignment_operator_ss( 272 | uint16x8& lhs, const uint16x8& rhs) noexcept 273 | { 274 | // TODO 275 | } 276 | 277 | inline uint16x8& modulo_assignment_operator_ss( 278 | uint16x8& lhs, const uint16x8& rhs) noexcept 279 | { 280 | // TODO 281 | }*/ 282 | 283 | inline uint16x8& bitwise_and_assignment_operator_ss( 284 | uint16x8& lhs, const uint16x8& rhs) noexcept 285 | { 286 | return lhs = _mm_and_si128(lhs, rhs); 287 | } 288 | 289 | inline uint16x8& bitwise_or_assignment_operator_ss( 290 | uint16x8& lhs, const uint16x8& rhs) noexcept 291 | { 292 | return lhs = _mm_or_si128(lhs, rhs); 293 | } 294 | 295 | inline uint16x8& bitwise_xor_assignment_operator_ss( 296 | uint16x8& lhs, const uint16x8& rhs) noexcept 297 | { 298 | return lhs = _mm_xor_si128(lhs, rhs); 299 | } 300 | 301 | inline uint16x8& bitwise_shift_left_assignment_operator_si( 302 | uint16x8& lhs, int rhs) noexcept 303 | { 304 | return lhs = _mm_slli_epi16(lhs, rhs); 305 | } 306 | 307 | inline uint16x8& bitwise_shift_right_assignment_operator_si( 308 | uint16x8& lhs, int rhs) noexcept 309 | { 310 | return lhs = _mm_srli_epi16(lhs, rhs); 311 | } 312 | 313 | inline bool equality_operator_ss( 314 | const uint16x8& lhs, const uint16x8& rhs) noexcept 315 | { 316 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 317 | } 318 | 319 | inline bool inequality_operator_ss( 320 | const uint16x8& lhs, const uint16x8& rhs) noexcept 321 | { 322 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 323 | } 324 | 325 | inline uint16x8 abs_s(const uint16x8& s) noexcept 326 | { 327 | return s; 328 | } 329 | 330 | /*inline uint16x8 min_ss( 331 | const uint16x8& s1, const uint16x8& s2) noexcept 332 | { 333 | // TODO 334 | } 335 | 336 | inline uint16x8 max_ss( 337 | const uint16x8& s1, const uint16x8& s2) noexcept 338 | { 339 | // TODO 340 | }*/ 341 | 342 | inline uint16x8 mask_ss( 343 | const bool16x8& conditions, 344 | const uint16x8& values) noexcept 345 | { 346 | return _mm_and_si128(conditions, values); 347 | } 348 | 349 | inline uint16x8 select_sss( 350 | const bool16x8& conditions, 351 | const uint16x8& values, 352 | const uint16x8& otherwise) noexcept 353 | { 354 | return _mm_or_si128( 355 | _mm_and_si128(conditions, values), 356 | _mm_andnot_si128(conditions, otherwise)); 357 | } 358 | 359 | /*inline bool16x8 less_ss( 360 | const uint16x8& lhs, const uint16x8& rhs) noexcept 361 | { 362 | // TODO 363 | } 364 | 365 | inline bool16x8 less_equal_ss( 366 | const uint16x8& lhs, const uint16x8& rhs) noexcept 367 | { 368 | // TODO 369 | } 370 | 371 | inline bool16x8 greater_ss( 372 | const uint16x8& lhs, const uint16x8& rhs) noexcept 373 | { 374 | // TODO 375 | } 376 | 377 | inline bool16x8 greater_equal_ss( 378 | const uint16x8& lhs, const uint16x8& rhs) noexcept 379 | { 380 | // TODO 381 | }*/ 382 | 383 | inline bool16x8 equal_ss( 384 | const uint16x8& lhs, const uint16x8& rhs) noexcept 385 | { 386 | return _mm_cmpeq_epi16(lhs, rhs); 387 | } 388 | 389 | inline bool16x8 not_equal_ss( 390 | const uint16x8& lhs, const uint16x8& rhs) noexcept 391 | { 392 | return _mm_xor_si128(_mm_cmpeq_epi16(lhs, rhs), uint16x8(0xFFFF)); 393 | } 394 | } 395 | } 396 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/uint32x4.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "../../../simd.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static uint32x4 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | std::uint32_t(s.data()[0]), 32 | std::uint32_t(s.data()[1]), 33 | std::uint32_t(s.data()[2]), 34 | std::uint32_t(s.data()[3]), 35 | }; 36 | } 37 | 38 | inline static uint32x4 explicit_cast(const bool32x4& s) noexcept; 39 | 40 | inline static uint32x4 explicit_cast(const float32x4& s) noexcept; 41 | 42 | inline static uint32x4 explicit_cast(const int32x4& s) noexcept; 43 | 44 | public: 45 | using component_type = std::uint32_t; 46 | 47 | static constexpr int component_count = 4; 48 | 49 | static constexpr bool is_accelerated = true; 50 | 51 | simd() noexcept = default; 52 | 53 | explicit simd(std::uint32_t x) noexcept 54 | : 55 | underlying_(_mm_set1_epi32(x)) 56 | { 57 | } 58 | 59 | template> 60 | inline simd( 61 | std::uint32_t x, std::uint32_t y) noexcept; 62 | 63 | template> 64 | simd( 65 | std::uint32_t x, std::uint32_t y, 66 | std::uint32_t z, std::uint32_t w) noexcept 67 | : 68 | underlying_(_mm_setr_epi32(x, y, z, w)) 69 | { 70 | } 71 | 72 | template> 73 | inline simd( 74 | std::uint32_t s0, std::uint32_t s1, 75 | std::uint32_t s2, std::uint32_t s3, 76 | std::uint32_t s4, std::uint32_t s5, 77 | std::uint32_t s6, std::uint32_t s7) noexcept; 78 | 79 | template> 80 | inline simd( 81 | std::uint32_t s0, std::uint32_t s1, 82 | std::uint32_t s2, std::uint32_t s3, 83 | std::uint32_t s4, std::uint32_t s5, 84 | std::uint32_t s6, std::uint32_t s7, 85 | std::uint32_t s8, std::uint32_t s9, 86 | std::uint32_t s10, std::uint32_t s11, 87 | std::uint32_t s12, std::uint32_t s13, 88 | std::uint32_t s14, std::uint32_t s15) noexcept; 89 | 90 | template 91 | explicit simd(const simd& s) noexcept 92 | { 93 | *this = explicit_cast(s); 94 | } 95 | 96 | simd(__m128i underlying) noexcept 97 | : 98 | underlying_(underlying) 99 | { 100 | } 101 | 102 | operator __m128i() const noexcept 103 | { 104 | return underlying_; 105 | } 106 | 107 | static uint32x4 zero() noexcept 108 | { 109 | return _mm_setzero_si128(); 110 | } 111 | 112 | static uint32x4 load(const std::uint32_t* data) noexcept 113 | { 114 | return _mm_load_si128(reinterpret_cast(data)); 115 | } 116 | 117 | static uint32x4 loadu(const std::uint32_t* data) noexcept 118 | { 119 | return _mm_loadu_si128(reinterpret_cast(data)); 120 | } 121 | 122 | void store(std::uint32_t* data) const noexcept 123 | { 124 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 125 | } 126 | 127 | void storeu(std::uint32_t* data) const noexcept 128 | { 129 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 130 | } 131 | 132 | const std::uint32_t* data() const noexcept 133 | { 134 | return reinterpret_cast(&underlying_); 135 | } 136 | 137 | std::uint32_t* data() noexcept 138 | { 139 | return reinterpret_cast(&underlying_); 140 | } 141 | }; 142 | } 143 | 144 | #include "../sse/bool32x4.sse.hpp" 145 | #include "../sse/float32x4.sse.hpp" 146 | #include "int32x4.sse2.hpp" 147 | 148 | namespace tue 149 | { 150 | inline uint32x4 uint32x4::explicit_cast(const bool32x4& s) noexcept 151 | { 152 | return __m128i(s); 153 | } 154 | 155 | inline uint32x4 uint32x4::explicit_cast(const float32x4& s) noexcept 156 | { 157 | return _mm_cvtps_epi32(s); 158 | } 159 | 160 | inline uint32x4 uint32x4::explicit_cast(const int32x4& s) noexcept 161 | { 162 | return __m128i(s); 163 | } 164 | 165 | namespace detail_ 166 | { 167 | inline uint32x4& pre_increment_operator_s(uint32x4& s) noexcept 168 | { 169 | return s = _mm_add_epi32(s, uint32x4(1)); 170 | } 171 | 172 | inline uint32x4 post_increment_operator_s(uint32x4& s) noexcept 173 | { 174 | const auto result = s; 175 | s = _mm_add_epi32(s, uint32x4(1)); 176 | return result; 177 | } 178 | 179 | inline uint32x4& pre_decrement_operator_s(uint32x4& s) noexcept 180 | { 181 | return s = _mm_sub_epi32(s, uint32x4(1)); 182 | } 183 | 184 | inline uint32x4 post_decrement_operator_s(uint32x4& s) noexcept 185 | { 186 | const auto result = s; 187 | s = _mm_sub_epi32(s, uint32x4(1)); 188 | return result; 189 | } 190 | 191 | inline uint32x4 bitwise_not_operator_s(const uint32x4& s) noexcept 192 | { 193 | return _mm_xor_si128(s, uint32x4(0xFFFFFFFF)); 194 | } 195 | 196 | inline uint32x4 addition_operator_ss( 197 | const uint32x4& lhs, const uint32x4& rhs) noexcept 198 | { 199 | return _mm_add_epi32(lhs, rhs); 200 | } 201 | 202 | inline uint32x4 subtraction_operator_ss( 203 | const uint32x4& lhs, const uint32x4& rhs) noexcept 204 | { 205 | return _mm_sub_epi32(lhs, rhs); 206 | } 207 | 208 | /*inline uint32x4 multiplication_operator_ss( 209 | const uint32x4& lhs, const uint32x4& rhs) noexcept 210 | { 211 | // TODO 212 | } 213 | 214 | inline uint32x4 division_operator_ss( 215 | const uint32x4& lhs, const uint32x4& rhs) noexcept 216 | { 217 | // TODO 218 | } 219 | 220 | inline uint32x4 modulo_operator_ss( 221 | const uint32x4& lhs, const uint32x4& rhs) noexcept 222 | { 223 | // TODO 224 | }*/ 225 | 226 | inline uint32x4 bitwise_and_operator_ss( 227 | const uint32x4& lhs, const uint32x4& rhs) noexcept 228 | { 229 | return _mm_and_si128(lhs, rhs); 230 | } 231 | 232 | inline uint32x4 bitwise_or_operator_ss( 233 | const uint32x4& lhs, const uint32x4& rhs) noexcept 234 | { 235 | return _mm_or_si128(lhs, rhs); 236 | } 237 | 238 | inline uint32x4 bitwise_xor_operator_ss( 239 | const uint32x4& lhs, const uint32x4& rhs) noexcept 240 | { 241 | return _mm_xor_si128(lhs, rhs); 242 | } 243 | 244 | inline uint32x4 bitwise_shift_left_operator_si( 245 | const uint32x4& lhs, int rhs) noexcept 246 | { 247 | return _mm_slli_epi32(lhs, rhs); 248 | } 249 | 250 | inline uint32x4 bitwise_shift_right_operator_si( 251 | const uint32x4& lhs, int rhs) noexcept 252 | { 253 | return _mm_srli_epi32(lhs, rhs); 254 | } 255 | 256 | inline uint32x4& addition_assignment_operator_ss( 257 | uint32x4& lhs, const uint32x4& rhs) noexcept 258 | { 259 | return lhs = _mm_add_epi32(lhs, rhs); 260 | } 261 | 262 | inline uint32x4& subtraction_assignment_operator_ss( 263 | uint32x4& lhs, const uint32x4& rhs) noexcept 264 | { 265 | return lhs = _mm_sub_epi32(lhs, rhs); 266 | } 267 | 268 | /*inline uint32x4& multiplication_assignment_operator_ss( 269 | uint32x4& lhs, const uint32x4& rhs) noexcept 270 | { 271 | // TODO 272 | } 273 | 274 | inline uint32x4& division_assignment_operator_ss( 275 | uint32x4& lhs, const uint32x4& rhs) noexcept 276 | { 277 | // TODO 278 | } 279 | 280 | inline uint32x4& modulo_assignment_operator_ss( 281 | uint32x4& lhs, const uint32x4& rhs) noexcept 282 | { 283 | // TODO 284 | }*/ 285 | 286 | inline uint32x4& bitwise_and_assignment_operator_ss( 287 | uint32x4& lhs, const uint32x4& rhs) noexcept 288 | { 289 | return lhs = _mm_and_si128(lhs, rhs); 290 | } 291 | 292 | inline uint32x4& bitwise_or_assignment_operator_ss( 293 | uint32x4& lhs, const uint32x4& rhs) noexcept 294 | { 295 | return lhs = _mm_or_si128(lhs, rhs); 296 | } 297 | 298 | inline uint32x4& bitwise_xor_assignment_operator_ss( 299 | uint32x4& lhs, const uint32x4& rhs) noexcept 300 | { 301 | return lhs = _mm_xor_si128(lhs, rhs); 302 | } 303 | 304 | inline uint32x4& bitwise_shift_left_assignment_operator_si( 305 | uint32x4& lhs, int rhs) noexcept 306 | { 307 | return lhs = _mm_slli_epi32(lhs, rhs); 308 | } 309 | 310 | inline uint32x4& bitwise_shift_right_assignment_operator_si( 311 | uint32x4& lhs, int rhs) noexcept 312 | { 313 | return lhs = _mm_srli_epi32(lhs, rhs); 314 | } 315 | 316 | inline bool equality_operator_ss( 317 | const uint32x4& lhs, const uint32x4& rhs) noexcept 318 | { 319 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 320 | } 321 | 322 | inline bool inequality_operator_ss( 323 | const uint32x4& lhs, const uint32x4& rhs) noexcept 324 | { 325 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 326 | } 327 | 328 | inline uint32x4 abs_s(const uint32x4& s) noexcept 329 | { 330 | return s; 331 | } 332 | 333 | /*inline uint32x4 min_ss( 334 | const uint32x4& s1, const uint32x4& s2) noexcept 335 | { 336 | // TODO 337 | } 338 | 339 | inline uint32x4 max_ss( 340 | const uint32x4& s1, const uint32x4& s2) noexcept 341 | { 342 | // TODO 343 | }*/ 344 | 345 | inline uint32x4 mask_ss( 346 | const bool32x4& conditions, 347 | const uint32x4& values) noexcept 348 | { 349 | return _mm_and_si128(conditions, values); 350 | } 351 | 352 | inline uint32x4 select_sss( 353 | const bool32x4& conditions, 354 | const uint32x4& values, 355 | const uint32x4& otherwise) noexcept 356 | { 357 | return _mm_or_si128( 358 | _mm_and_si128(conditions, values), 359 | _mm_andnot_si128(conditions, otherwise)); 360 | } 361 | 362 | /*inline bool32x4 less_ss( 363 | const uint32x4& lhs, const uint32x4& rhs) noexcept 364 | { 365 | // TODO 366 | } 367 | 368 | inline bool32x4 less_equal_ss( 369 | const uint32x4& lhs, const uint32x4& rhs) noexcept 370 | { 371 | // TODO 372 | } 373 | 374 | inline bool32x4 greater_ss( 375 | const uint32x4& lhs, const uint32x4& rhs) noexcept 376 | { 377 | // TODO 378 | } 379 | 380 | inline bool32x4 greater_equal_ss( 381 | const uint32x4& lhs, const uint32x4& rhs) noexcept 382 | { 383 | // TODO 384 | }*/ 385 | 386 | inline bool32x4 equal_ss( 387 | const uint32x4& lhs, const uint32x4& rhs) noexcept 388 | { 389 | return _mm_cmpeq_epi32(lhs, rhs); 390 | } 391 | 392 | inline bool32x4 not_equal_ss( 393 | const uint32x4& lhs, const uint32x4& rhs) noexcept 394 | { 395 | return _mm_xor_si128( 396 | _mm_cmpeq_epi32(lhs, rhs), uint32x4(0xFFFFFFFF)); 397 | } 398 | } 399 | } 400 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/int64x2.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "../../../simd.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static int64x2 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | std::int64_t(s.data()[0]), 32 | std::int64_t(s.data()[1]), 33 | }; 34 | } 35 | 36 | inline static int64x2 explicit_cast(const bool64x2& s) noexcept; 37 | 38 | inline static int64x2 explicit_cast(const uint64x2& s) noexcept; 39 | 40 | public: 41 | using component_type = std::int64_t; 42 | 43 | static constexpr int component_count = 2; 44 | 45 | static constexpr bool is_accelerated = true; 46 | 47 | simd() noexcept = default; 48 | 49 | explicit simd(std::int64_t x) noexcept 50 | : 51 | underlying_(_mm_set1_epi64x(x)) 52 | { 53 | } 54 | 55 | template> 56 | inline simd( 57 | std::int64_t x, std::int64_t y) noexcept 58 | : 59 | underlying_(_mm_set_epi64x(y, x)) 60 | { 61 | } 62 | 63 | template> 64 | inline simd( 65 | std::int64_t x, std::int64_t y, 66 | std::int64_t z, std::int64_t w) noexcept; 67 | 68 | template> 69 | inline simd( 70 | std::int64_t s0, std::int64_t s1, 71 | std::int64_t s2, std::int64_t s3, 72 | std::int64_t s4, std::int64_t s5, 73 | std::int64_t s6, std::int64_t s7) noexcept; 74 | 75 | template> 76 | inline simd( 77 | std::int64_t s0, std::int64_t s1, 78 | std::int64_t s2, std::int64_t s3, 79 | std::int64_t s4, std::int64_t s5, 80 | std::int64_t s6, std::int64_t s7, 81 | std::int64_t s8, std::int64_t s9, 82 | std::int64_t s10, std::int64_t s11, 83 | std::int64_t s12, std::int64_t s13, 84 | std::int64_t s14, std::int64_t s15) noexcept; 85 | 86 | template 87 | explicit simd(const simd& s) noexcept 88 | { 89 | *this = explicit_cast(s); 90 | } 91 | 92 | simd(__m128i underlying) noexcept 93 | : 94 | underlying_(underlying) 95 | { 96 | } 97 | 98 | operator __m128i() const noexcept 99 | { 100 | return underlying_; 101 | } 102 | 103 | static int64x2 zero() noexcept 104 | { 105 | return _mm_setzero_si128(); 106 | } 107 | 108 | static int64x2 load(const std::int64_t* data) noexcept 109 | { 110 | return _mm_load_si128(reinterpret_cast(data)); 111 | } 112 | 113 | static int64x2 loadu(const std::int64_t* data) noexcept 114 | { 115 | return _mm_loadu_si128(reinterpret_cast(data)); 116 | } 117 | 118 | void store(std::int64_t* data) const noexcept 119 | { 120 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 121 | } 122 | 123 | void storeu(std::int64_t* data) const noexcept 124 | { 125 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 126 | } 127 | 128 | const std::int64_t* data() const noexcept 129 | { 130 | return reinterpret_cast(&underlying_); 131 | } 132 | 133 | std::int64_t* data() noexcept 134 | { 135 | return reinterpret_cast(&underlying_); 136 | } 137 | }; 138 | } 139 | 140 | #include "bool64x2.sse2.hpp" 141 | #include "uint64x2.sse2.hpp" 142 | 143 | namespace tue 144 | { 145 | inline int64x2 int64x2::explicit_cast(const bool64x2& s) noexcept 146 | { 147 | return __m128i(s); 148 | } 149 | 150 | inline int64x2 int64x2::explicit_cast(const uint64x2& s) noexcept 151 | { 152 | return __m128i(s); 153 | } 154 | 155 | namespace detail_ 156 | { 157 | inline int64x2 unary_plus_operator_s(const int64x2& s) noexcept 158 | { 159 | return s; 160 | } 161 | 162 | inline int64x2& pre_increment_operator_s(int64x2& s) noexcept 163 | { 164 | return s = _mm_add_epi64(s, int64x2(1)); 165 | } 166 | 167 | inline int64x2 post_increment_operator_s(int64x2& s) noexcept 168 | { 169 | const auto result = s; 170 | s = _mm_add_epi64(s, int64x2(1)); 171 | return result; 172 | } 173 | 174 | inline int64x2 unary_minus_operator_s(const int64x2& s) noexcept 175 | { 176 | return _mm_sub_epi64(_mm_setzero_si128(), s); 177 | } 178 | 179 | inline int64x2& pre_decrement_operator_s(int64x2& s) noexcept 180 | { 181 | return s = _mm_sub_epi64(s, int64x2(1)); 182 | } 183 | 184 | inline int64x2 post_decrement_operator_s(int64x2& s) noexcept 185 | { 186 | const auto result = s; 187 | s = _mm_sub_epi64(s, int64x2(1)); 188 | return result; 189 | } 190 | 191 | inline int64x2 bitwise_not_operator_s(const int64x2& s) noexcept 192 | { 193 | return _mm_xor_si128(s, int64x2(~0ull)); 194 | } 195 | 196 | inline int64x2 addition_operator_ss( 197 | const int64x2& lhs, const int64x2& rhs) noexcept 198 | { 199 | return _mm_add_epi64(lhs, rhs); 200 | } 201 | 202 | inline int64x2 subtraction_operator_ss( 203 | const int64x2& lhs, const int64x2& rhs) noexcept 204 | { 205 | return _mm_sub_epi64(lhs, rhs); 206 | } 207 | 208 | /*inline int64x2 multiplication_operator_ss( 209 | const int64x2& lhs, const int64x2& rhs) noexcept 210 | { 211 | // TODO 212 | } 213 | 214 | inline int64x2 division_operator_ss( 215 | const int64x2& lhs, const int64x2& rhs) noexcept 216 | { 217 | // TODO 218 | } 219 | 220 | inline int64x2 modulo_operator_ss( 221 | const int64x2& lhs, const int64x2& rhs) noexcept 222 | { 223 | // TODO 224 | }*/ 225 | 226 | inline int64x2 bitwise_and_operator_ss( 227 | const int64x2& lhs, const int64x2& rhs) noexcept 228 | { 229 | return _mm_and_si128(lhs, rhs); 230 | } 231 | 232 | inline int64x2 bitwise_or_operator_ss( 233 | const int64x2& lhs, const int64x2& rhs) noexcept 234 | { 235 | return _mm_or_si128(lhs, rhs); 236 | } 237 | 238 | inline int64x2 bitwise_xor_operator_ss( 239 | const int64x2& lhs, const int64x2& rhs) noexcept 240 | { 241 | return _mm_xor_si128(lhs, rhs); 242 | } 243 | 244 | inline int64x2 bitwise_shift_left_operator_si( 245 | const int64x2& lhs, int rhs) noexcept 246 | { 247 | return _mm_slli_epi64(lhs, rhs); 248 | } 249 | 250 | inline int64x2 bitwise_shift_right_operator_si( 251 | const int64x2& lhs, int rhs) noexcept 252 | { 253 | return _mm_srli_epi64(lhs, rhs); 254 | } 255 | 256 | inline int64x2& addition_assignment_operator_ss( 257 | int64x2& lhs, const int64x2& rhs) noexcept 258 | { 259 | return lhs = _mm_add_epi64(lhs, rhs); 260 | } 261 | 262 | inline int64x2& subtraction_assignment_operator_ss( 263 | int64x2& lhs, const int64x2& rhs) noexcept 264 | { 265 | return lhs = _mm_sub_epi64(lhs, rhs); 266 | } 267 | 268 | /*inline int64x2& multiplication_assignment_operator_ss( 269 | int64x2& lhs, const int64x2& rhs) noexcept 270 | { 271 | // TODO 272 | } 273 | 274 | inline int64x2& division_assignment_operator_ss( 275 | int64x2& lhs, const int64x2& rhs) noexcept 276 | { 277 | // TODO 278 | } 279 | 280 | inline int64x2& modulo_assignment_operator_ss( 281 | int64x2& lhs, const int64x2& rhs) noexcept 282 | { 283 | // TODO 284 | }*/ 285 | 286 | inline int64x2& bitwise_and_assignment_operator_ss( 287 | int64x2& lhs, const int64x2& rhs) noexcept 288 | { 289 | return lhs = _mm_and_si128(lhs, rhs); 290 | } 291 | 292 | inline int64x2& bitwise_or_assignment_operator_ss( 293 | int64x2& lhs, const int64x2& rhs) noexcept 294 | { 295 | return lhs = _mm_or_si128(lhs, rhs); 296 | } 297 | 298 | inline int64x2& bitwise_xor_assignment_operator_ss( 299 | int64x2& lhs, const int64x2& rhs) noexcept 300 | { 301 | return lhs = _mm_xor_si128(lhs, rhs); 302 | } 303 | 304 | inline int64x2& bitwise_shift_left_assignment_operator_si( 305 | int64x2& lhs, int rhs) noexcept 306 | { 307 | return lhs = _mm_slli_epi64(lhs, rhs); 308 | } 309 | 310 | inline int64x2& bitwise_shift_right_assignment_operator_si( 311 | int64x2& lhs, int rhs) noexcept 312 | { 313 | return lhs = _mm_srli_epi64(lhs, rhs); 314 | } 315 | 316 | inline bool equality_operator_ss( 317 | const int64x2& lhs, const int64x2& rhs) noexcept 318 | { 319 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 320 | } 321 | 322 | inline bool inequality_operator_ss( 323 | const int64x2& lhs, const int64x2& rhs) noexcept 324 | { 325 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 326 | } 327 | 328 | inline int64x2 abs_s(const int64x2& s) noexcept 329 | { 330 | const auto nmask = _mm_shuffle_epi32( 331 | _mm_cmplt_epi32(s, _mm_setzero_si128()), 332 | _MM_SHUFFLE(3, 3, 1, 1)); 333 | return _mm_or_si128( 334 | _mm_and_si128(nmask, unary_minus_operator_s(s)), 335 | _mm_andnot_si128(nmask, s)); 336 | } 337 | 338 | /*inline int64x2 min_ss( 339 | const int64x2& s1, const int64x2& s2) noexcept 340 | { 341 | // TODO 342 | } 343 | 344 | inline int64x2 max_ss( 345 | const int64x2& s1, const int64x2& s2) noexcept 346 | { 347 | // TODO 348 | }*/ 349 | 350 | inline int64x2 mask_ss( 351 | const bool64x2& conditions, 352 | const int64x2& values) noexcept 353 | { 354 | return _mm_and_si128(conditions, values); 355 | } 356 | 357 | inline int64x2 select_sss( 358 | const bool64x2& conditions, 359 | const int64x2& values, 360 | const int64x2& otherwise) noexcept 361 | { 362 | return _mm_or_si128( 363 | _mm_and_si128(conditions, values), 364 | _mm_andnot_si128(conditions, otherwise)); 365 | } 366 | 367 | /*inline bool64x2 less_ss( 368 | const int64x2& lhs, const int64x2& rhs) noexcept 369 | { 370 | // TODO 371 | } 372 | 373 | inline bool64x2 less_equal_ss( 374 | const int64x2& lhs, const int64x2& rhs) noexcept 375 | { 376 | // TODO 377 | } 378 | 379 | inline bool64x2 greater_ss( 380 | const int64x2& lhs, const int64x2& rhs) noexcept 381 | { 382 | // TODO 383 | } 384 | 385 | inline bool64x2 greater_equal_ss( 386 | const int64x2& lhs, const int64x2& rhs) noexcept 387 | { 388 | // TODO 389 | }*/ 390 | 391 | inline bool64x2 equal_ss( 392 | const int64x2& lhs, const int64x2& rhs) noexcept 393 | { 394 | const auto cmp = _mm_cmpeq_epi32(lhs, rhs); 395 | const auto hi = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(3, 3, 1, 1)); 396 | const auto lo = _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 2, 0, 0)); 397 | return _mm_and_si128(hi, lo); 398 | } 399 | 400 | inline bool64x2 not_equal_ss( 401 | const int64x2& lhs, const int64x2& rhs) noexcept 402 | { 403 | return _mm_xor_si128(equal_ss(lhs, rhs), int64x2(~0ull)); 404 | } 405 | } 406 | } 407 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/uint8x16.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "../../../simd.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static uint8x16 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | std::uint8_t(s.data()[0]), 32 | std::uint8_t(s.data()[1]), 33 | std::uint8_t(s.data()[2]), 34 | std::uint8_t(s.data()[3]), 35 | std::uint8_t(s.data()[4]), 36 | std::uint8_t(s.data()[5]), 37 | std::uint8_t(s.data()[6]), 38 | std::uint8_t(s.data()[7]), 39 | std::uint8_t(s.data()[8]), 40 | std::uint8_t(s.data()[9]), 41 | std::uint8_t(s.data()[10]), 42 | std::uint8_t(s.data()[11]), 43 | std::uint8_t(s.data()[12]), 44 | std::uint8_t(s.data()[13]), 45 | std::uint8_t(s.data()[14]), 46 | std::uint8_t(s.data()[15]), 47 | }; 48 | } 49 | 50 | inline static uint8x16 explicit_cast(const bool8x16& s) noexcept; 51 | 52 | inline static uint8x16 explicit_cast(const int8x16& s) noexcept; 53 | 54 | public: 55 | using component_type = std::uint8_t; 56 | 57 | static constexpr int component_count = 16; 58 | 59 | static constexpr bool is_accelerated = true; 60 | 61 | simd() noexcept = default; 62 | 63 | explicit simd(std::uint8_t x) noexcept 64 | : 65 | underlying_(_mm_set1_epi8(x)) 66 | { 67 | } 68 | 69 | template> 70 | inline simd( 71 | std::uint8_t x, std::uint8_t y) noexcept; 72 | 73 | template> 74 | inline simd( 75 | std::uint8_t x, std::uint8_t y, 76 | std::uint8_t z, std::uint8_t w) noexcept; 77 | 78 | template> 79 | inline simd( 80 | std::uint8_t s0, std::uint8_t s1, 81 | std::uint8_t s2, std::uint8_t s3, 82 | std::uint8_t s4, std::uint8_t s5, 83 | std::uint8_t s6, std::uint8_t s7) noexcept; 84 | 85 | template> 86 | inline simd( 87 | std::uint8_t s0, std::uint8_t s1, 88 | std::uint8_t s2, std::uint8_t s3, 89 | std::uint8_t s4, std::uint8_t s5, 90 | std::uint8_t s6, std::uint8_t s7, 91 | std::uint8_t s8, std::uint8_t s9, 92 | std::uint8_t s10, std::uint8_t s11, 93 | std::uint8_t s12, std::uint8_t s13, 94 | std::uint8_t s14, std::uint8_t s15) noexcept 95 | : 96 | underlying_(_mm_setr_epi8( 97 | s0, s1, s2, s3, s4, s5, s6, s7, 98 | s8, s9, s10, s11, s12, s13, s14, s15)) 99 | { 100 | } 101 | 102 | template 103 | explicit simd(const simd& s) noexcept 104 | { 105 | *this = explicit_cast(s); 106 | } 107 | 108 | simd(__m128i underlying) noexcept 109 | : 110 | underlying_(underlying) 111 | { 112 | } 113 | 114 | operator __m128i() const noexcept 115 | { 116 | return underlying_; 117 | } 118 | 119 | static uint8x16 zero() noexcept 120 | { 121 | return _mm_setzero_si128(); 122 | } 123 | 124 | static uint8x16 load(const std::uint8_t* data) noexcept 125 | { 126 | return _mm_load_si128(reinterpret_cast(data)); 127 | } 128 | 129 | static uint8x16 loadu(const std::uint8_t* data) noexcept 130 | { 131 | return _mm_loadu_si128(reinterpret_cast(data)); 132 | } 133 | 134 | void store(std::uint8_t* data) const noexcept 135 | { 136 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 137 | } 138 | 139 | void storeu(std::uint8_t* data) const noexcept 140 | { 141 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 142 | } 143 | 144 | const std::uint8_t* data() const noexcept 145 | { 146 | return reinterpret_cast(&underlying_); 147 | } 148 | 149 | std::uint8_t* data() noexcept 150 | { 151 | return reinterpret_cast(&underlying_); 152 | } 153 | }; 154 | } 155 | 156 | #include "bool8x16.sse2.hpp" 157 | #include "int8x16.sse2.hpp" 158 | 159 | namespace tue 160 | { 161 | inline uint8x16 uint8x16::explicit_cast(const bool8x16& s) noexcept 162 | { 163 | return __m128i(s); 164 | } 165 | 166 | inline uint8x16 uint8x16::explicit_cast(const int8x16& s) noexcept 167 | { 168 | return __m128i(s); 169 | } 170 | 171 | namespace detail_ 172 | { 173 | inline uint8x16& pre_increment_operator_s(uint8x16& s) noexcept 174 | { 175 | return s = _mm_add_epi8(s, uint8x16(1)); 176 | } 177 | 178 | inline uint8x16 post_increment_operator_s(uint8x16& s) noexcept 179 | { 180 | const auto result = s; 181 | s = _mm_add_epi8(s, uint8x16(1)); 182 | return result; 183 | } 184 | 185 | inline uint8x16& pre_decrement_operator_s(uint8x16& s) noexcept 186 | { 187 | return s = _mm_sub_epi8(s, uint8x16(1)); 188 | } 189 | 190 | inline uint8x16 post_decrement_operator_s(uint8x16& s) noexcept 191 | { 192 | const auto result = s; 193 | s = _mm_sub_epi8(s, uint8x16(1)); 194 | return result; 195 | } 196 | 197 | inline uint8x16 bitwise_not_operator_s(const uint8x16& s) noexcept 198 | { 199 | return _mm_xor_si128(s, uint8x16(0xFF)); 200 | } 201 | 202 | inline uint8x16 addition_operator_ss( 203 | const uint8x16& lhs, const uint8x16& rhs) noexcept 204 | { 205 | return _mm_add_epi8(lhs, rhs); 206 | } 207 | 208 | inline uint8x16 subtraction_operator_ss( 209 | const uint8x16& lhs, const uint8x16& rhs) noexcept 210 | { 211 | return _mm_sub_epi8(lhs, rhs); 212 | } 213 | 214 | /*inline uint8x16 multiplication_operator_ss( 215 | const uint8x16& lhs, const uint8x16& rhs) noexcept 216 | { 217 | // TODO 218 | } 219 | 220 | inline uint8x16 division_operator_ss( 221 | const uint8x16& lhs, const uint8x16& rhs) noexcept 222 | { 223 | // TODO 224 | } 225 | 226 | inline uint8x16 modulo_operator_ss( 227 | const uint8x16& lhs, const uint8x16& rhs) noexcept 228 | { 229 | // TODO 230 | }*/ 231 | 232 | inline uint8x16 bitwise_and_operator_ss( 233 | const uint8x16& lhs, const uint8x16& rhs) noexcept 234 | { 235 | return _mm_and_si128(lhs, rhs); 236 | } 237 | 238 | inline uint8x16 bitwise_or_operator_ss( 239 | const uint8x16& lhs, const uint8x16& rhs) noexcept 240 | { 241 | return _mm_or_si128(lhs, rhs); 242 | } 243 | 244 | inline uint8x16 bitwise_xor_operator_ss( 245 | const uint8x16& lhs, const uint8x16& rhs) noexcept 246 | { 247 | return _mm_xor_si128(lhs, rhs); 248 | } 249 | 250 | /*inline uint8x16 bitwise_shift_left_operator_si( 251 | const uint8x16& lhs, int rhs) noexcept 252 | { 253 | // TODO 254 | } 255 | 256 | inline uint8x16 bitwise_shift_right_operator_si( 257 | const uint8x16& lhs, int rhs) noexcept 258 | { 259 | // TODO 260 | }*/ 261 | 262 | inline uint8x16& addition_assignment_operator_ss( 263 | uint8x16& lhs, const uint8x16& rhs) noexcept 264 | { 265 | return lhs = _mm_add_epi8(lhs, rhs); 266 | } 267 | 268 | inline uint8x16& subtraction_assignment_operator_ss( 269 | uint8x16& lhs, const uint8x16& rhs) noexcept 270 | { 271 | return lhs = _mm_sub_epi8(lhs, rhs); 272 | } 273 | 274 | /*inline uint8x16& multiplication_assignment_operator_ss( 275 | uint8x16& lhs, const uint8x16& rhs) noexcept 276 | { 277 | // TODO 278 | } 279 | 280 | inline uint8x16& division_assignment_operator_ss( 281 | uint8x16& lhs, const uint8x16& rhs) noexcept 282 | { 283 | // TODO 284 | } 285 | 286 | inline uint8x16& modulo_assignment_operator_ss( 287 | uint8x16& lhs, const uint8x16& rhs) noexcept 288 | { 289 | // TODO 290 | }*/ 291 | 292 | inline uint8x16& bitwise_and_assignment_operator_ss( 293 | uint8x16& lhs, const uint8x16& rhs) noexcept 294 | { 295 | return lhs = _mm_and_si128(lhs, rhs); 296 | } 297 | 298 | inline uint8x16& bitwise_or_assignment_operator_ss( 299 | uint8x16& lhs, const uint8x16& rhs) noexcept 300 | { 301 | return lhs = _mm_or_si128(lhs, rhs); 302 | } 303 | 304 | inline uint8x16& bitwise_xor_assignment_operator_ss( 305 | uint8x16& lhs, const uint8x16& rhs) noexcept 306 | { 307 | return lhs = _mm_xor_si128(lhs, rhs); 308 | } 309 | 310 | /*inline uint8x16& bitwise_shift_left_assignment_operator_si( 311 | uint8x16& lhs, int rhs) noexcept 312 | { 313 | // TODO 314 | } 315 | 316 | inline uint8x16& bitwise_shift_right_assignment_operator_si( 317 | uint8x16& lhs, int rhs) noexcept 318 | { 319 | // TODO 320 | }*/ 321 | 322 | inline bool equality_operator_ss( 323 | const uint8x16& lhs, const uint8x16& rhs) noexcept 324 | { 325 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 326 | } 327 | 328 | inline bool inequality_operator_ss( 329 | const uint8x16& lhs, const uint8x16& rhs) noexcept 330 | { 331 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 332 | } 333 | 334 | inline uint8x16 abs_s(const uint8x16& s) noexcept 335 | { 336 | return s; 337 | } 338 | 339 | inline uint8x16 min_ss( 340 | const uint8x16& s1, const uint8x16& s2) noexcept 341 | { 342 | return _mm_min_epu8(s1, s2); 343 | } 344 | 345 | inline uint8x16 max_ss( 346 | const uint8x16& s1, const uint8x16& s2) noexcept 347 | { 348 | return _mm_max_epu8(s1, s2); 349 | } 350 | 351 | inline uint8x16 mask_ss( 352 | const bool8x16& conditions, 353 | const uint8x16& values) noexcept 354 | { 355 | return _mm_and_si128(conditions, values); 356 | } 357 | 358 | inline uint8x16 select_sss( 359 | const bool8x16& conditions, 360 | const uint8x16& values, 361 | const uint8x16& otherwise) noexcept 362 | { 363 | return _mm_or_si128( 364 | _mm_and_si128(conditions, values), 365 | _mm_andnot_si128(conditions, otherwise)); 366 | } 367 | 368 | /*inline bool8x16 less_ss( 369 | const uint8x16& lhs, const uint8x16& rhs) noexcept 370 | { 371 | // TODO 372 | } 373 | 374 | inline bool8x16 less_equal_ss( 375 | const uint8x16& lhs, const uint8x16& rhs) noexcept 376 | { 377 | // TODO 378 | } 379 | 380 | inline bool8x16 greater_ss( 381 | const uint8x16& lhs, const uint8x16& rhs) noexcept 382 | { 383 | // TODO 384 | } 385 | 386 | inline bool8x16 greater_equal_ss( 387 | const uint8x16& lhs, const uint8x16& rhs) noexcept 388 | { 389 | // TODO 390 | }*/ 391 | 392 | inline bool8x16 equal_ss( 393 | const uint8x16& lhs, const uint8x16& rhs) noexcept 394 | { 395 | return _mm_cmpeq_epi8(lhs, rhs); 396 | } 397 | 398 | inline bool8x16 not_equal_ss( 399 | const uint8x16& lhs, const uint8x16& rhs) noexcept 400 | { 401 | return _mm_xor_si128(_mm_cmpeq_epi8(lhs, rhs), uint8x16(0xFF)); 402 | } 403 | } 404 | } 405 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/int16x8.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "../../../simd.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static int16x8 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | std::int16_t(s.data()[0]), 32 | std::int16_t(s.data()[1]), 33 | std::int16_t(s.data()[2]), 34 | std::int16_t(s.data()[3]), 35 | std::int16_t(s.data()[4]), 36 | std::int16_t(s.data()[5]), 37 | std::int16_t(s.data()[6]), 38 | std::int16_t(s.data()[7]), 39 | }; 40 | } 41 | 42 | inline static int16x8 explicit_cast(const bool16x8& s) noexcept; 43 | 44 | inline static int16x8 explicit_cast(const uint16x8& s) noexcept; 45 | 46 | public: 47 | using component_type = std::int16_t; 48 | 49 | static constexpr int component_count = 8; 50 | 51 | static constexpr bool is_accelerated = true; 52 | 53 | simd() noexcept = default; 54 | 55 | explicit simd(std::int16_t x) noexcept 56 | : 57 | underlying_(_mm_set1_epi16(x)) 58 | { 59 | } 60 | 61 | template> 62 | inline simd( 63 | std::int16_t x, std::int16_t y) noexcept; 64 | 65 | template> 66 | inline simd( 67 | std::int16_t x, std::int16_t y, 68 | std::int16_t z, std::int16_t w) noexcept; 69 | 70 | template> 71 | inline simd( 72 | std::int16_t s0, std::int16_t s1, 73 | std::int16_t s2, std::int16_t s3, 74 | std::int16_t s4, std::int16_t s5, 75 | std::int16_t s6, std::int16_t s7) noexcept 76 | : 77 | underlying_(_mm_setr_epi16( 78 | s0, s1, s2, s3, s4, s5, s6, s7)) 79 | { 80 | } 81 | 82 | template> 83 | inline simd( 84 | std::int16_t s0, std::int16_t s1, 85 | std::int16_t s2, std::int16_t s3, 86 | std::int16_t s4, std::int16_t s5, 87 | std::int16_t s6, std::int16_t s7, 88 | std::int16_t s8, std::int16_t s9, 89 | std::int16_t s10, std::int16_t s11, 90 | std::int16_t s12, std::int16_t s13, 91 | std::int16_t s14, std::int16_t s15) noexcept; 92 | 93 | template 94 | explicit simd(const simd& s) noexcept 95 | { 96 | *this = explicit_cast(s); 97 | } 98 | 99 | simd(__m128i underlying) noexcept 100 | : 101 | underlying_(underlying) 102 | { 103 | } 104 | 105 | operator __m128i() const noexcept 106 | { 107 | return underlying_; 108 | } 109 | 110 | static int16x8 zero() noexcept 111 | { 112 | return _mm_setzero_si128(); 113 | } 114 | 115 | static int16x8 load(const std::int16_t* data) noexcept 116 | { 117 | return _mm_load_si128(reinterpret_cast(data)); 118 | } 119 | 120 | static int16x8 loadu(const std::int16_t* data) noexcept 121 | { 122 | return _mm_loadu_si128(reinterpret_cast(data)); 123 | } 124 | 125 | void store(std::int16_t* data) const noexcept 126 | { 127 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 128 | } 129 | 130 | void storeu(std::int16_t* data) const noexcept 131 | { 132 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 133 | } 134 | 135 | const std::int16_t* data() const noexcept 136 | { 137 | return reinterpret_cast(&underlying_); 138 | } 139 | 140 | std::int16_t* data() noexcept 141 | { 142 | return reinterpret_cast(&underlying_); 143 | } 144 | }; 145 | } 146 | 147 | #include "bool16x8.sse2.hpp" 148 | #include "uint16x8.sse2.hpp" 149 | 150 | namespace tue 151 | { 152 | inline int16x8 int16x8::explicit_cast(const bool16x8& s) noexcept 153 | { 154 | return __m128i(s); 155 | } 156 | 157 | inline int16x8 int16x8::explicit_cast(const uint16x8& s) noexcept 158 | { 159 | return __m128i(s); 160 | } 161 | 162 | namespace detail_ 163 | { 164 | inline int16x8 unary_plus_operator_s(const int16x8& s) noexcept 165 | { 166 | return s; 167 | } 168 | 169 | inline int16x8& pre_increment_operator_s(int16x8& s) noexcept 170 | { 171 | return s = _mm_add_epi16(s, int16x8(1)); 172 | } 173 | 174 | inline int16x8 post_increment_operator_s(int16x8& s) noexcept 175 | { 176 | const auto result = s; 177 | s = _mm_add_epi16(s, int16x8(1)); 178 | return result; 179 | } 180 | 181 | inline int16x8 unary_minus_operator_s(const int16x8& s) noexcept 182 | { 183 | return _mm_sub_epi16(_mm_setzero_si128(), s); 184 | } 185 | 186 | inline int16x8& pre_decrement_operator_s(int16x8& s) noexcept 187 | { 188 | return s = _mm_sub_epi16(s, int16x8(1)); 189 | } 190 | 191 | inline int16x8 post_decrement_operator_s(int16x8& s) noexcept 192 | { 193 | const auto result = s; 194 | s = _mm_sub_epi16(s, int16x8(1)); 195 | return result; 196 | } 197 | 198 | inline int16x8 bitwise_not_operator_s(const int16x8& s) noexcept 199 | { 200 | return _mm_xor_si128(s, int16x8(0xFFFFu)); 201 | } 202 | 203 | inline int16x8 addition_operator_ss( 204 | const int16x8& lhs, const int16x8& rhs) noexcept 205 | { 206 | return _mm_add_epi16(lhs, rhs); 207 | } 208 | 209 | inline int16x8 subtraction_operator_ss( 210 | const int16x8& lhs, const int16x8& rhs) noexcept 211 | { 212 | return _mm_sub_epi16(lhs, rhs); 213 | } 214 | 215 | /*inline int16x8 multiplication_operator_ss( 216 | const int16x8& lhs, const int16x8& rhs) noexcept 217 | { 218 | // TODO 219 | } 220 | 221 | inline int16x8 division_operator_ss( 222 | const int16x8& lhs, const int16x8& rhs) noexcept 223 | { 224 | // TODO 225 | } 226 | 227 | inline int16x8 modulo_operator_ss( 228 | const int16x8& lhs, const int16x8& rhs) noexcept 229 | { 230 | // TODO 231 | }*/ 232 | 233 | inline int16x8 bitwise_and_operator_ss( 234 | const int16x8& lhs, const int16x8& rhs) noexcept 235 | { 236 | return _mm_and_si128(lhs, rhs); 237 | } 238 | 239 | inline int16x8 bitwise_or_operator_ss( 240 | const int16x8& lhs, const int16x8& rhs) noexcept 241 | { 242 | return _mm_or_si128(lhs, rhs); 243 | } 244 | 245 | inline int16x8 bitwise_xor_operator_ss( 246 | const int16x8& lhs, const int16x8& rhs) noexcept 247 | { 248 | return _mm_xor_si128(lhs, rhs); 249 | } 250 | 251 | inline int16x8 bitwise_shift_left_operator_si( 252 | const int16x8& lhs, int rhs) noexcept 253 | { 254 | return _mm_slli_epi16(lhs, rhs); 255 | } 256 | 257 | inline int16x8 bitwise_shift_right_operator_si( 258 | const int16x8& lhs, int rhs) noexcept 259 | { 260 | return _mm_srli_epi16(lhs, rhs); 261 | } 262 | 263 | inline int16x8& addition_assignment_operator_ss( 264 | int16x8& lhs, const int16x8& rhs) noexcept 265 | { 266 | return lhs = _mm_add_epi16(lhs, rhs); 267 | } 268 | 269 | inline int16x8& subtraction_assignment_operator_ss( 270 | int16x8& lhs, const int16x8& rhs) noexcept 271 | { 272 | return lhs = _mm_sub_epi16(lhs, rhs); 273 | } 274 | 275 | /*inline int16x8& multiplication_assignment_operator_ss( 276 | int16x8& lhs, const int16x8& rhs) noexcept 277 | { 278 | // TODO 279 | } 280 | 281 | inline int16x8& division_assignment_operator_ss( 282 | int16x8& lhs, const int16x8& rhs) noexcept 283 | { 284 | // TODO 285 | } 286 | 287 | inline int16x8& modulo_assignment_operator_ss( 288 | int16x8& lhs, const int16x8& rhs) noexcept 289 | { 290 | // TODO 291 | }*/ 292 | 293 | inline int16x8& bitwise_and_assignment_operator_ss( 294 | int16x8& lhs, const int16x8& rhs) noexcept 295 | { 296 | return lhs = _mm_and_si128(lhs, rhs); 297 | } 298 | 299 | inline int16x8& bitwise_or_assignment_operator_ss( 300 | int16x8& lhs, const int16x8& rhs) noexcept 301 | { 302 | return lhs = _mm_or_si128(lhs, rhs); 303 | } 304 | 305 | inline int16x8& bitwise_xor_assignment_operator_ss( 306 | int16x8& lhs, const int16x8& rhs) noexcept 307 | { 308 | return lhs = _mm_xor_si128(lhs, rhs); 309 | } 310 | 311 | inline int16x8& bitwise_shift_left_assignment_operator_si( 312 | int16x8& lhs, int rhs) noexcept 313 | { 314 | return lhs = _mm_slli_epi16(lhs, rhs); 315 | } 316 | 317 | inline int16x8& bitwise_shift_right_assignment_operator_si( 318 | int16x8& lhs, int rhs) noexcept 319 | { 320 | return lhs = _mm_srli_epi16(lhs, rhs); 321 | } 322 | 323 | inline bool equality_operator_ss( 324 | const int16x8& lhs, const int16x8& rhs) noexcept 325 | { 326 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 327 | } 328 | 329 | inline bool inequality_operator_ss( 330 | const int16x8& lhs, const int16x8& rhs) noexcept 331 | { 332 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 333 | } 334 | 335 | inline int16x8 abs_s(const int16x8& s) noexcept 336 | { 337 | const auto nmask = _mm_cmplt_epi16(s, _mm_setzero_si128()); 338 | return _mm_or_si128( 339 | _mm_and_si128(nmask, unary_minus_operator_s(s)), 340 | _mm_andnot_si128(nmask, s)); 341 | } 342 | 343 | inline int16x8 min_ss( 344 | const int16x8& s1, const int16x8& s2) noexcept 345 | { 346 | return _mm_min_epi16(s1, s2); 347 | } 348 | 349 | inline int16x8 max_ss( 350 | const int16x8& s1, const int16x8& s2) noexcept 351 | { 352 | return _mm_max_epi16(s1, s2); 353 | } 354 | 355 | inline int16x8 mask_ss( 356 | const bool16x8& conditions, 357 | const int16x8& values) noexcept 358 | { 359 | return _mm_and_si128(conditions, values); 360 | } 361 | 362 | inline int16x8 select_sss( 363 | const bool16x8& conditions, 364 | const int16x8& values, 365 | const int16x8& otherwise) noexcept 366 | { 367 | return _mm_or_si128( 368 | _mm_and_si128(conditions, values), 369 | _mm_andnot_si128(conditions, otherwise)); 370 | } 371 | 372 | inline bool16x8 less_ss( 373 | const int16x8& lhs, const int16x8& rhs) noexcept 374 | { 375 | return _mm_cmplt_epi16(lhs, rhs); 376 | } 377 | 378 | inline bool16x8 less_equal_ss( 379 | const int16x8& lhs, const int16x8& rhs) noexcept 380 | { 381 | return _mm_xor_si128(_mm_cmpgt_epi16(lhs, rhs), int16x8(0xFFFFu)); 382 | } 383 | 384 | inline bool16x8 greater_ss( 385 | const int16x8& lhs, const int16x8& rhs) noexcept 386 | { 387 | return _mm_cmpgt_epi16(lhs, rhs); 388 | } 389 | 390 | inline bool16x8 greater_equal_ss( 391 | const int16x8& lhs, const int16x8& rhs) noexcept 392 | { 393 | return _mm_xor_si128(_mm_cmplt_epi16(lhs, rhs), int16x8(0xFFFFu)); 394 | } 395 | 396 | inline bool16x8 equal_ss( 397 | const int16x8& lhs, const int16x8& rhs) noexcept 398 | { 399 | return _mm_cmpeq_epi16(lhs, rhs); 400 | } 401 | 402 | inline bool16x8 not_equal_ss( 403 | const int16x8& lhs, const int16x8& rhs) noexcept 404 | { 405 | return _mm_xor_si128(_mm_cmpeq_epi16(lhs, rhs), int16x8(0xFFFFu)); 406 | } 407 | } 408 | } 409 | -------------------------------------------------------------------------------- /include/tue/detail_/simd/sse2/int32x4.sse2.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Jo Bates 2015. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | // 6 | // Please report any bugs, typos, or suggestions to 7 | // https://github.com/Cincinesh/tue/issues 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "../../../simd.hpp" 17 | 18 | namespace tue 19 | { 20 | template<> 21 | class alignas(tue::detail_::alignof_simd()) 22 | simd 23 | { 24 | __m128i underlying_; 25 | 26 | private: 27 | template 28 | static int32x4 explicit_cast(const simd& s) noexcept 29 | { 30 | return { 31 | std::int32_t(s.data()[0]), 32 | std::int32_t(s.data()[1]), 33 | std::int32_t(s.data()[2]), 34 | std::int32_t(s.data()[3]), 35 | }; 36 | } 37 | 38 | inline static int32x4 explicit_cast(const bool32x4& s) noexcept; 39 | 40 | inline static int32x4 explicit_cast(const float32x4& s) noexcept; 41 | 42 | inline static int32x4 explicit_cast(const uint32x4& s) noexcept; 43 | 44 | public: 45 | using component_type = std::int32_t; 46 | 47 | static constexpr int component_count = 4; 48 | 49 | static constexpr bool is_accelerated = true; 50 | 51 | simd() noexcept = default; 52 | 53 | explicit simd(std::int32_t x) noexcept 54 | : 55 | underlying_(_mm_set1_epi32(x)) 56 | { 57 | } 58 | 59 | template> 60 | inline simd( 61 | std::int32_t x, std::int32_t y) noexcept; 62 | 63 | template> 64 | inline simd( 65 | std::int32_t x, std::int32_t y, 66 | std::int32_t z, std::int32_t w) noexcept 67 | : 68 | underlying_(_mm_setr_epi32(x, y, z, w)) 69 | { 70 | } 71 | 72 | template> 73 | inline simd( 74 | std::int32_t s0, std::int32_t s1, 75 | std::int32_t s2, std::int32_t s3, 76 | std::int32_t s4, std::int32_t s5, 77 | std::int32_t s6, std::int32_t s7) noexcept; 78 | 79 | template> 80 | inline simd( 81 | std::int32_t s0, std::int32_t s1, 82 | std::int32_t s2, std::int32_t s3, 83 | std::int32_t s4, std::int32_t s5, 84 | std::int32_t s6, std::int32_t s7, 85 | std::int32_t s8, std::int32_t s9, 86 | std::int32_t s10, std::int32_t s11, 87 | std::int32_t s12, std::int32_t s13, 88 | std::int32_t s14, std::int32_t s15) noexcept; 89 | 90 | template 91 | explicit simd(const simd& s) noexcept 92 | { 93 | *this = explicit_cast(s); 94 | } 95 | 96 | simd(__m128i underlying) noexcept 97 | : 98 | underlying_(underlying) 99 | { 100 | } 101 | 102 | operator __m128i() const noexcept 103 | { 104 | return underlying_; 105 | } 106 | 107 | static int32x4 zero() noexcept 108 | { 109 | return _mm_setzero_si128(); 110 | } 111 | 112 | static int32x4 load(const std::int32_t* data) noexcept 113 | { 114 | return _mm_load_si128(reinterpret_cast(data)); 115 | } 116 | 117 | static int32x4 loadu(const std::int32_t* data) noexcept 118 | { 119 | return _mm_loadu_si128(reinterpret_cast(data)); 120 | } 121 | 122 | void store(std::int32_t* data) const noexcept 123 | { 124 | _mm_store_si128(reinterpret_cast<__m128i*>(data), underlying_); 125 | } 126 | 127 | void storeu(std::int32_t* data) const noexcept 128 | { 129 | _mm_storeu_si128(reinterpret_cast<__m128i*>(data), underlying_); 130 | } 131 | 132 | const std::int32_t* data() const noexcept 133 | { 134 | return reinterpret_cast(&underlying_); 135 | } 136 | 137 | std::int32_t* data() noexcept 138 | { 139 | return reinterpret_cast(&underlying_); 140 | } 141 | }; 142 | } 143 | 144 | #include "../sse/bool32x4.sse.hpp" 145 | #include "../sse/float32x4.sse.hpp" 146 | #include "uint32x4.sse2.hpp" 147 | 148 | namespace tue 149 | { 150 | inline int32x4 int32x4::explicit_cast(const bool32x4& s) noexcept 151 | { 152 | return __m128i(s); 153 | } 154 | 155 | inline int32x4 int32x4::explicit_cast(const float32x4& s) noexcept 156 | { 157 | return _mm_cvtps_epi32(s); 158 | } 159 | 160 | inline int32x4 int32x4::explicit_cast(const uint32x4& s) noexcept 161 | { 162 | return __m128i(s); 163 | } 164 | 165 | namespace detail_ 166 | { 167 | inline int32x4 unary_plus_operator_s(const int32x4& s) noexcept 168 | { 169 | return s; 170 | } 171 | 172 | inline int32x4& pre_increment_operator_s(int32x4& s) noexcept 173 | { 174 | return s = _mm_add_epi32(s, int32x4(1)); 175 | } 176 | 177 | inline int32x4 post_increment_operator_s(int32x4& s) noexcept 178 | { 179 | const auto result = s; 180 | s = _mm_add_epi32(s, int32x4(1)); 181 | return result; 182 | } 183 | 184 | inline int32x4 unary_minus_operator_s(const int32x4& s) noexcept 185 | { 186 | return _mm_sub_epi32(_mm_setzero_si128(), s); 187 | } 188 | 189 | inline int32x4& pre_decrement_operator_s(int32x4& s) noexcept 190 | { 191 | return s = _mm_sub_epi32(s, int32x4(1)); 192 | } 193 | 194 | inline int32x4 post_decrement_operator_s(int32x4& s) noexcept 195 | { 196 | const auto result = s; 197 | s = _mm_sub_epi32(s, int32x4(1)); 198 | return result; 199 | } 200 | 201 | inline int32x4 bitwise_not_operator_s(const int32x4& s) noexcept 202 | { 203 | return _mm_xor_si128(s, int32x4(0xFFFFFFFF)); 204 | } 205 | 206 | inline int32x4 addition_operator_ss( 207 | const int32x4& lhs, const int32x4& rhs) noexcept 208 | { 209 | return _mm_add_epi32(lhs, rhs); 210 | } 211 | 212 | inline int32x4 subtraction_operator_ss( 213 | const int32x4& lhs, const int32x4& rhs) noexcept 214 | { 215 | return _mm_sub_epi32(lhs, rhs); 216 | } 217 | 218 | /*inline int32x4 multiplication_operator_ss( 219 | const int32x4& lhs, const int32x4& rhs) noexcept 220 | { 221 | // TODO 222 | } 223 | 224 | inline int32x4 division_operator_ss( 225 | const int32x4& lhs, const int32x4& rhs) noexcept 226 | { 227 | // TODO 228 | } 229 | 230 | inline int32x4 modulo_operator_ss( 231 | const int32x4& lhs, const int32x4& rhs) noexcept 232 | { 233 | // TODO 234 | }*/ 235 | 236 | inline int32x4 bitwise_and_operator_ss( 237 | const int32x4& lhs, const int32x4& rhs) noexcept 238 | { 239 | return _mm_and_si128(lhs, rhs); 240 | } 241 | 242 | inline int32x4 bitwise_or_operator_ss( 243 | const int32x4& lhs, const int32x4& rhs) noexcept 244 | { 245 | return _mm_or_si128(lhs, rhs); 246 | } 247 | 248 | inline int32x4 bitwise_xor_operator_ss( 249 | const int32x4& lhs, const int32x4& rhs) noexcept 250 | { 251 | return _mm_xor_si128(lhs, rhs); 252 | } 253 | 254 | inline int32x4 bitwise_shift_left_operator_si( 255 | const int32x4& lhs, int rhs) noexcept 256 | { 257 | return _mm_slli_epi32(lhs, rhs); 258 | } 259 | 260 | inline int32x4 bitwise_shift_right_operator_si( 261 | const int32x4& lhs, int rhs) noexcept 262 | { 263 | return _mm_srli_epi32(lhs, rhs); 264 | } 265 | 266 | inline int32x4& addition_assignment_operator_ss( 267 | int32x4& lhs, const int32x4& rhs) noexcept 268 | { 269 | return lhs = _mm_add_epi32(lhs, rhs); 270 | } 271 | 272 | inline int32x4& subtraction_assignment_operator_ss( 273 | int32x4& lhs, const int32x4& rhs) noexcept 274 | { 275 | return lhs = _mm_sub_epi32(lhs, rhs); 276 | } 277 | 278 | /*inline int32x4& multiplication_assignment_operator_ss( 279 | int32x4& lhs, const int32x4& rhs) noexcept 280 | { 281 | // TODO 282 | } 283 | 284 | inline int32x4& division_assignment_operator_ss( 285 | int32x4& lhs, const int32x4& rhs) noexcept 286 | { 287 | // TODO 288 | } 289 | 290 | inline int32x4& modulo_assignment_operator_ss( 291 | int32x4& lhs, const int32x4& rhs) noexcept 292 | { 293 | // TODO 294 | }*/ 295 | 296 | inline int32x4& bitwise_and_assignment_operator_ss( 297 | int32x4& lhs, const int32x4& rhs) noexcept 298 | { 299 | return lhs = _mm_and_si128(lhs, rhs); 300 | } 301 | 302 | inline int32x4& bitwise_or_assignment_operator_ss( 303 | int32x4& lhs, const int32x4& rhs) noexcept 304 | { 305 | return lhs = _mm_or_si128(lhs, rhs); 306 | } 307 | 308 | inline int32x4& bitwise_xor_assignment_operator_ss( 309 | int32x4& lhs, const int32x4& rhs) noexcept 310 | { 311 | return lhs = _mm_xor_si128(lhs, rhs); 312 | } 313 | 314 | inline int32x4& bitwise_shift_left_assignment_operator_si( 315 | int32x4& lhs, int rhs) noexcept 316 | { 317 | return lhs = _mm_slli_epi32(lhs, rhs); 318 | } 319 | 320 | inline int32x4& bitwise_shift_right_assignment_operator_si( 321 | int32x4& lhs, int rhs) noexcept 322 | { 323 | return lhs = _mm_srli_epi32(lhs, rhs); 324 | } 325 | 326 | inline bool equality_operator_ss( 327 | const int32x4& lhs, const int32x4& rhs) noexcept 328 | { 329 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) == 0xFFFF; 330 | } 331 | 332 | inline bool inequality_operator_ss( 333 | const int32x4& lhs, const int32x4& rhs) noexcept 334 | { 335 | return _mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) != 0xFFFF; 336 | } 337 | 338 | inline int32x4 abs_s(const int32x4& s) noexcept 339 | { 340 | const auto nmask = _mm_cmplt_epi32(s, _mm_setzero_si128()); 341 | return _mm_or_si128( 342 | _mm_and_si128(nmask, unary_minus_operator_s(s)), 343 | _mm_andnot_si128(nmask, s)); 344 | } 345 | 346 | /*inline int32x4 min_ss( 347 | const int32x4& s1, const int32x4& s2) noexcept 348 | { 349 | // TODO 350 | } 351 | 352 | inline int32x4 max_ss( 353 | const int32x4& s1, const int32x4& s2) noexcept 354 | { 355 | // TODO 356 | }*/ 357 | 358 | inline int32x4 mask_ss( 359 | const bool32x4& conditions, 360 | const int32x4& values) noexcept 361 | { 362 | return _mm_and_si128(conditions, values); 363 | } 364 | 365 | inline int32x4 select_sss( 366 | const bool32x4& conditions, 367 | const int32x4& values, 368 | const int32x4& otherwise) noexcept 369 | { 370 | return _mm_or_si128( 371 | _mm_and_si128(conditions, values), 372 | _mm_andnot_si128(conditions, otherwise)); 373 | } 374 | 375 | inline bool32x4 less_ss( 376 | const int32x4& lhs, const int32x4& rhs) noexcept 377 | { 378 | return _mm_cmplt_epi32(lhs, rhs); 379 | } 380 | 381 | inline bool32x4 less_equal_ss( 382 | const int32x4& lhs, const int32x4& rhs) noexcept 383 | { 384 | return _mm_xor_si128( 385 | _mm_cmpgt_epi32(lhs, rhs), int32x4(0xFFFFFFFF)); 386 | } 387 | 388 | inline bool32x4 greater_ss( 389 | const int32x4& lhs, const int32x4& rhs) noexcept 390 | { 391 | return _mm_cmpgt_epi32(lhs, rhs); 392 | } 393 | 394 | inline bool32x4 greater_equal_ss( 395 | const int32x4& lhs, const int32x4& rhs) noexcept 396 | { 397 | return _mm_xor_si128( 398 | _mm_cmplt_epi32(lhs, rhs), int32x4(0xFFFFFFFF)); 399 | } 400 | 401 | inline bool32x4 equal_ss( 402 | const int32x4& lhs, const int32x4& rhs) noexcept 403 | { 404 | return _mm_cmpeq_epi32(lhs, rhs); 405 | } 406 | 407 | inline bool32x4 not_equal_ss( 408 | const int32x4& lhs, const int32x4& rhs) noexcept 409 | { 410 | return _mm_xor_si128( 411 | _mm_cmpeq_epi32(lhs, rhs), int32x4(0xFFFFFFFF)); 412 | } 413 | } 414 | } 415 | --------------------------------------------------------------------------------