├── LICENSE ├── README.md ├── aabb.hpp ├── cross.hpp ├── dot.hpp ├── matrix.cpp ├── matrix.hpp ├── matrix_determinant.hpp ├── matrix_inverse.hpp ├── minmax.hpp ├── onb.hpp ├── quat.cpp ├── quat.hpp ├── rotation.hpp ├── rsqrt.hpp ├── sinf.hpp ├── tanf.hpp ├── tr_aabb_aabb.hpp ├── tri.hpp ├── vector.cpp └── vector.hpp /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vxl 2 | vxl is a lightweight SIMD vector, quaternion and matrix library for vectors and matrices of small dimensions. The library supports SSE, NEON and the gcc/clang vector extensions. 3 | 4 | The test .cpp files should provide clues on how to use the library. A C++17 compliant compiler is needed to use the library and to complle the .cpp files. 5 | -------------------------------------------------------------------------------- /aabb.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_AABB_HPP 2 | # define VXL_AABB_HPP 3 | # pragma once 4 | 5 | #include 6 | 7 | #include "vector.hpp" 8 | 9 | namespace vxl 10 | { 11 | 12 | template 13 | using aabb_t = std::array, 2>; 14 | 15 | } 16 | 17 | #endif // VXL_AABB_HPP 18 | -------------------------------------------------------------------------------- /cross.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_CROSS_HPP 2 | # define VXL_CROSS_HPP 3 | # pragma once 4 | 5 | #include "vector.hpp" 6 | 7 | namespace vxl 8 | { 9 | 10 | // http://fastcpp.blogspot.si/2011/04/vector-cross-product-using-sse-code.html 11 | template 12 | //__attribute__ ((noinline)) 13 | inline constexpr auto cross(vector const& l, 14 | vector const& r) noexcept 15 | { 16 | /* 17 | return vector{ 18 | l.data_[1] * r.data_[2] - l.data_[2] * r.data_[1], 19 | l.data_[2] * r.data_[0] - l.data_[0] * r.data_[2], 20 | l.data_[0] * r.data_[1] - l.data_[1] * r.data_[0] 21 | }; 22 | */ 23 | return swizzled<1, 2, 0>( 24 | l * swizzled<1, 2, 0>(r) - 25 | r * swizzled<1, 2, 0>(l) 26 | ); 27 | } 28 | 29 | } 30 | 31 | #endif // VXL_CROSS_HPP 32 | -------------------------------------------------------------------------------- /dot.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_DOT_HPP 2 | # define VXL_DOT_HPP 3 | # pragma once 4 | 5 | #include "vector.hpp" 6 | 7 | namespace vxl 8 | { 9 | 10 | #if defined(__ARM_NEON) 11 | 12 | //__attribute__ ((noinline)) 13 | inline auto dot(vector const& l, vector const& r) noexcept 14 | { 15 | using vector_type = typename vector_traits::vector_type; 16 | 17 | auto const prod(float32x2_t(l.data_) * float32x2_t(r.data_)); 18 | // prod = l0*r0 l1*r1 19 | 20 | return vector{ 21 | vector_type(prod + vrev64_f32(prod)) 22 | }; 23 | //l0*r0+l1*r1 l1*r1+l0*r0 24 | } 25 | 26 | //__attribute__ ((noinline)) 27 | inline auto dot(vector const& l, vector const& r) noexcept 28 | { 29 | using vector_type = typename vector_traits::vector_type; 30 | 31 | auto prod(float32x4_t(l.data_) * float32x4_t(r.data_)); 32 | 33 | prod += vrev64q_f32(prod); 34 | 35 | return vector{ 36 | vector_type( 37 | prod + vcombine_f32(vget_high_f32(prod), vget_low_f32(prod)) 38 | ) 39 | }; 40 | } 41 | 42 | //__attribute__ ((noinline)) 43 | inline auto dot(vector const& l, vector const& r) noexcept 44 | { 45 | using vector_type = typename vector_traits::vector_type; 46 | 47 | auto prod(float32x4_t(l.data_) * float32x4_t(r.data_)); 48 | 49 | prod += vrev64q_f32(prod); 50 | // l0 l1 l2 l3 51 | // r0 r1 r2 r3 52 | // prod = l0*r0 l1*r1 l2*r2 l3*r3 53 | // prod = l0*r0+l1*r1 l1*r1+l0*r0 l2*r2+r3*r3 l3*r3+l2*r2 54 | 55 | return vector{ 56 | vector_type( 57 | prod + vcombine_f32(vget_high_f32(prod), vget_low_f32(prod)) 58 | ) 59 | }; 60 | // l0*r0+l1*r1+l2*r2+r3*r3 ... 61 | } 62 | 63 | /* 64 | #elif defined(__SSE4_1__) 65 | 66 | //__attribute__ ((noinline)) 67 | inline auto dot(vector const& l, vector const& r) noexcept 68 | { 69 | return vector{ 70 | _mm_dp_ps(l.data_, r.data_, 0x3f) 71 | }; 72 | } 73 | 74 | //__attribute__ ((noinline)) 75 | inline auto dot(vector const& l, vector const& r) noexcept 76 | { 77 | return vector{ 78 | _mm_dp_ps(l.data_, r.data_, 0x7f) 79 | }; 80 | } 81 | 82 | //__attribute__ ((noinline)) 83 | inline auto dot(vector const& l, vector const& r) noexcept 84 | { 85 | return vector{ 86 | _mm_dp_ps(l.data_, r.data_, 0xff) 87 | }; 88 | } 89 | 90 | #elif defined(__SSE3__) 91 | 92 | //__attribute__ ((noinline)) 93 | inline auto dot(vector const& l, vector const& r) noexcept 94 | { 95 | auto prod(l.data_ * r.data_); 96 | 97 | prod = _mm_hadd_ps(prod, prod); 98 | 99 | return vector{ 100 | _mm_hadd_ps(prod, prod) 101 | }; 102 | } 103 | 104 | //__attribute__ ((noinline)) 105 | inline auto dot(vector const& l, vector const& r) noexcept 106 | { 107 | auto prod(l.data_ * r.data_); 108 | 109 | prod = _mm_hadd_ps(prod, prod); 110 | 111 | return vector{ 112 | _mm_hadd_ps(prod, prod) 113 | }; 114 | } 115 | 116 | //__attribute__ ((noinline)) 117 | inline auto dot(vector const& l, vector const& r) noexcept 118 | { 119 | auto prod(l.data_ * r.data_); 120 | 121 | prod = _mm_hadd_ps(prod, prod); 122 | 123 | return vector{ 124 | _mm_hadd_ps(prod, prod) 125 | }; 126 | } 127 | 128 | */ 129 | 130 | #endif 131 | 132 | namespace detail::vector 133 | { 134 | 135 | /* 136 | ** 1 2 3 4 137 | ** 12 23 34 14 138 | ** 1234 1234 1234 1234 139 | */ 140 | 141 | template 142 | //__attribute__ ((noinline)) 143 | constexpr auto dot(typename vector_traits::vector_type v, 144 | std::index_sequence) noexcept 145 | { 146 | ( 147 | // result depends on previous shuffles 148 | ( 149 | v += pow2_shuffler(v, 150 | std::make_index_sequence() 151 | ) 152 | ), 153 | ... 154 | ); 155 | 156 | return v; 157 | } 158 | 159 | } 160 | 161 | // dot product 162 | template 163 | //__attribute__ ((noinline)) 164 | constexpr auto dot(vector const& l, 165 | vector const& r) noexcept 166 | { 167 | return vector{ 168 | detail::vector::dot(l.data_ * r.data_, 169 | std::make_index_sequence(N)>() 170 | ) 171 | }; 172 | } 173 | 174 | template 175 | constexpr auto norm2(vector const& v) noexcept 176 | { 177 | return dot(v, v); 178 | } 179 | 180 | } 181 | 182 | #endif // VXL_DOT_HPP 183 | -------------------------------------------------------------------------------- /matrix.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "matrix.hpp" 4 | 5 | #include "matrix_determinant.hpp" 6 | 7 | int main() 8 | { 9 | auto m2(vxl::make_matrix<2, 2>(1.0f, 2.0f, 3.0f, 6.0f)); 10 | auto n2(vxl::make_matrix<2, 2>(-2.0f, -2.0f, 1.0f, 1.0f)); 11 | 12 | std::cout << m2 << std::endl; 13 | std::cout << n2 << std::endl; 14 | std::cout << (m2 == m2) << std::endl; 15 | std::cout << (m2 == n2) << std::endl; 16 | std::cout << vxl::trans(m2) << std::endl; 17 | std::cout << (m2 * n2) << std::endl; 18 | std::cout << (m2 * vxl::vector{-1.f, 1.f}) << std::endl; 19 | std::cout << (vxl::vector{-1.f, 1.f} * m2) << std::endl; 20 | 21 | std::cout << vxl::det( 22 | vxl::make_matrix<2, 2>( 23 | 1.f, 2.f, 3.f, 4.f) 24 | ) << 25 | std::endl; 26 | 27 | std::cout << vxl::det( 28 | vxl::make_matrix<3, 3>( 29 | 6.f, 4.f, 7.f, 3.f, 8.f, 6.f, 4.f, 7.f, 3.f) 30 | ) << 31 | std::endl; 32 | 33 | std::cout << vxl::det( 34 | vxl::make_matrix<4, 4>( 35 | 1.f, 2.f, 3.f, 4.f, 36 | 2.f, 3.f, 4.f, 1.f, 37 | 1.f, 4.f, 2.f, 3.f, 38 | 3.f, 2.f, 4.f, 5.f 39 | ) 40 | ) << 41 | std::endl; 42 | 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /matrix.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_MATRIX_HPP 2 | # define VXL_MATRIX_HPP 3 | # pragma once 4 | 5 | #include 6 | 7 | #include "dot.hpp" 8 | 9 | namespace vxl 10 | { 11 | 12 | namespace detail 13 | { 14 | 15 | namespace matrix 16 | { 17 | 18 | template 19 | inline constexpr void copy(D& dst, S const& src, std::index_sequence) 20 | { 21 | ((dst[Is][I] = src[Is]), ...); 22 | } 23 | 24 | template 25 | inline constexpr auto sample(S const& src, std::index_sequence) 26 | { 27 | return D{(src[Is][I])...}; 28 | } 29 | 30 | } 31 | 32 | } 33 | 34 | template 35 | struct matrix 36 | { 37 | static constexpr auto rows = M; 38 | static constexpr auto cols = N; 39 | 40 | #ifndef VXL_ROW_MAJOR 41 | typename vector_traits::vector_type data_[M]; 42 | #else 43 | typename vector_traits::vector_type data_[N]; 44 | #endif // VXL_ROW_MAJOR 45 | 46 | constexpr auto operator()(unsigned const i, unsigned const j) const noexcept 47 | { 48 | return get_element(i, j); 49 | } 50 | 51 | // assignment 52 | auto& operator=(std::initializer_list const l) noexcept 53 | { 54 | assert(M * N == l.size()); 55 | auto k(l.begin()); 56 | 57 | for (unsigned i{}; i != M; ++i) 58 | { 59 | for (unsigned j{}; j != N; ++j) 60 | { 61 | #ifndef VXL_ROW_MAJOR 62 | data_[j][i] = *k++; 63 | #else 64 | data_[i][j] = *k++; 65 | #endif // VXL_ROW_MAJOR 66 | } 67 | } 68 | 69 | return *this; 70 | } 71 | 72 | // element access 73 | constexpr auto get_element(unsigned const i, 74 | unsigned const j) const noexcept 75 | { 76 | #ifndef VXL_ROW_MAJOR 77 | return data_[j][i]; 78 | #else 79 | return data_[i][j]; 80 | #endif // VXL_ROW_MAJOR 81 | } 82 | 83 | constexpr void set_element(unsigned const i, unsigned const j, 84 | T const v) noexcept 85 | { 86 | #ifndef VXL_ROW_MAJOR 87 | data_[j][i] = v; 88 | #else 89 | data_[i][j] = v; 90 | #endif // VXL_ROW_MAJOR 91 | } 92 | 93 | template 94 | constexpr auto row() const noexcept 95 | { 96 | #ifndef VXL_ROW_MAJOR 97 | return detail::matrix::sample< 98 | typename vector_traits::vector_type, I 99 | >(data_, std::make_index_sequence()); 100 | #else 101 | return data_[i]; 102 | #endif // VXL_ROW_MAJOR 103 | } 104 | 105 | template 106 | constexpr void set_row( 107 | typename vector_traits::vector_type const& v) noexcept 108 | { 109 | #ifndef VXL_ROW_MAJOR 110 | detail::matrix::copy(data_, 111 | v, 112 | std::make_index_sequence() 113 | ); 114 | #else 115 | data_[i] = v; 116 | #endif // VXL_ROW_MAJOR 117 | } 118 | 119 | template 120 | constexpr void set_row(vector const& v) noexcept 121 | { 122 | #ifndef VXL_ROW_MAJOR 123 | detail::matrix::copy(data_, 124 | v.data_, 125 | std::make_index_sequence() 126 | ); 127 | #else 128 | data_[i] = v.data_; 129 | #endif // VXL_ROW_MAJOR 130 | } 131 | 132 | template >... 136 | >{} 137 | > 138 | > 139 | constexpr void set_row(A const... a) noexcept 140 | { 141 | set_row(typename vector_traits::vector_type{a...}); 142 | } 143 | 144 | template 145 | constexpr auto col() const noexcept 146 | { 147 | #ifndef VXL_ROW_MAJOR 148 | return data_[J]; 149 | #else 150 | return detail::matrix::sample< 151 | typename vector_traits::vector_type, J 152 | >(data_, std::make_index_sequence()); 153 | #endif // VXL_ROW_MAJOR 154 | } 155 | 156 | template 157 | constexpr void set_col( 158 | typename vector_traits::vector_type const& v) noexcept 159 | { 160 | #ifndef VXL_ROW_MAJOR 161 | data_[J] = v; 162 | #else 163 | detail::matrix::copy(data_, v, 164 | std::make_index_sequence() 165 | ); 166 | #endif // VXL_ROW_MAJOR 167 | } 168 | 169 | template 170 | constexpr void set_col(vector const& v) noexcept 171 | { 172 | #ifndef VXL_ROW_MAJOR 173 | data_[J] = v.data_; 174 | #else 175 | detail::matrix::copy(data_, v.data_, 176 | std::make_index_sequence() 177 | ); 178 | #endif // VXL_ROW_MAJOR 179 | } 180 | 181 | template >... 185 | >{} 186 | > 187 | > 188 | constexpr void set_col(A const... a) noexcept 189 | { 190 | set_col(typename vector_traits::vector_type{a...}); 191 | } 192 | 193 | // ref 194 | constexpr auto& ref() noexcept {return data_;} 195 | constexpr auto& ref() const noexcept {return data_;} 196 | }; 197 | 198 | template >, std::decay_t>... 202 | >{} && 203 | std::is_arithmetic>>{} 204 | > 205 | > 206 | inline constexpr auto make_matrix(A const ...a) noexcept 207 | { 208 | static_assert(M * N == sizeof...(A)); 209 | matrix>, M, N> r{}; 210 | 211 | unsigned i{}; 212 | 213 | ( 214 | ( 215 | r.set_element(i / M, i % N, a), 216 | ++i 217 | ), 218 | ... 219 | ); 220 | 221 | return r; 222 | } 223 | 224 | namespace detail 225 | { 226 | 227 | namespace matrix 228 | { 229 | 230 | template 231 | constexpr inline auto sample(vxl::matrix const& m, 232 | std::index_sequence const) noexcept -> 233 | typename vector_traits::vector_type 234 | { 235 | return typename vector_traits::vector_type{m.data_[Is][I]...}; 236 | } 237 | 238 | template 239 | constexpr inline auto sample(vxl::matrix const& m, 240 | unsigned const i, std::index_sequence const) noexcept -> 241 | typename vector_traits::vector_type 242 | { 243 | return typename vector_traits::vector_type{m.data_[Is][i]...}; 244 | } 245 | 246 | } 247 | 248 | } 249 | 250 | template 251 | constexpr inline vector row(matrix const& m) noexcept 252 | { 253 | #ifndef VXL_ROW_MAJOR 254 | return { 255 | detail::matrix::sample(m, I, std::make_index_sequence()) 256 | }; 257 | #else 258 | return { 259 | l.data_[I] 260 | }; 261 | #endif // VXL_ROW_MAJOR 262 | } 263 | 264 | template 265 | constexpr inline vector row(matrix const& m, 266 | unsigned const i) noexcept 267 | { 268 | #ifndef VXL_ROW_MAJOR 269 | return { 270 | detail::matrix::sample(m, i, std::make_index_sequence()) 271 | }; 272 | #else 273 | return { 274 | l.data_[i] 275 | }; 276 | #endif // VXL_ROW_MAJOR 277 | } 278 | 279 | template 280 | constexpr inline vector col(matrix const& m) noexcept 281 | { 282 | #ifndef VXL_ROW_MAJOR 283 | return { 284 | m.data_[J] 285 | }; 286 | #else 287 | return { 288 | detail::matrix::sample(m, std::make_index_sequence()) 289 | }; 290 | #endif // VXL_ROW_MAJOR 291 | } 292 | 293 | template 294 | constexpr inline vector col(matrix const& m, 295 | unsigned const j) noexcept 296 | { 297 | #ifndef VXL_ROW_MAJOR 298 | return { 299 | m.data_[j] 300 | }; 301 | #else 302 | return { 303 | detail::matrix::sample(m, j, std::make_index_sequence()) 304 | }; 305 | #endif // VXL_ROW_MAJOR 306 | } 307 | 308 | // arithmetic operations 309 | template 310 | //__attribute__ ((noinline)) 311 | inline auto operator+(matrix const& l, 312 | matrix const& r) noexcept 313 | { 314 | decltype(l + r) result; 315 | 316 | #ifndef VXL_ROW_MAJOR 317 | for (unsigned j{}; j != N; ++j) 318 | { 319 | result.data_[j] = l.data_[j] + r.data_[j]; 320 | } 321 | #else 322 | for (unsigned i{}; i != M; ++i) 323 | { 324 | result.data_[i] = l.data_[i] + r.data_[i]; 325 | } 326 | #endif // VXL_ROW_MAJOR 327 | 328 | return result; 329 | } 330 | 331 | template 332 | //__attribute__ ((noinline)) 333 | inline matrix operator-(matrix const& l, 334 | matrix const& r) noexcept 335 | { 336 | decltype(l - r) result; 337 | 338 | #ifndef VXL_ROW_MAJOR 339 | for (unsigned j{}; j != N; ++j) 340 | { 341 | result.data_[j] = l.data_[j] - r.data_[j]; 342 | } 343 | #else 344 | for (unsigned i{}; i != M; ++i) 345 | { 346 | result.data_[i] = l.data_[i] - r.data_[i]; 347 | } 348 | #endif // VXL_ROW_MAJOR 349 | 350 | return result; 351 | } 352 | 353 | template 354 | //__attribute__ ((noinline)) 355 | inline matrix operator*(matrix const& l, 356 | matrix const& r) noexcept 357 | { 358 | static_assert(N1 == M2); 359 | decltype(l * r) result; 360 | 361 | #ifndef VXL_ROW_MAJOR 362 | for (unsigned i{}; i != N2; ++i) 363 | { 364 | result.data_[i] = cvector(r.data_[i][0]) * l.data_[0]; 365 | 366 | for (unsigned j{1}; j != N1; ++j) 367 | { 368 | result.data_[i] += cvector(r.data_[i][j]) * l.data_[j]; 369 | } 370 | } 371 | #else 372 | for (unsigned i{}; i != M1; ++i) 373 | { 374 | result.data_[i] = cvector(l.data_[i][0]) * r.data_[0]; 375 | 376 | for (unsigned j{1}; j != M2; ++j) 377 | { 378 | result.data_[i] += cvector(l.data_[i][j]) * r.data_[j]; 379 | } 380 | } 381 | #endif // VXL_ROW_MAJOR 382 | 383 | return result; 384 | } 385 | 386 | template 387 | //__attribute__ ((noinline)) 388 | inline vector operator*(matrix const& l, 389 | vector const& r) noexcept 390 | { 391 | static_assert(N1 == N, ""); 392 | 393 | #ifndef VXL_ROW_MAJOR 394 | decltype(l * r) result{cvector(r.data_[0]) * l.data_[0]}; 395 | 396 | for (unsigned j{1}; j != N; ++j) 397 | { 398 | result.data_ += cvector(r.data_[j]) * l.data_[j]; 399 | } 400 | #else 401 | decltype(l * r) result; 402 | 403 | for (unsigned i{}; i != M1; ++i) 404 | { 405 | result.data_[i] = dot({l.data_[i]}, r).data_[0]; 406 | } 407 | #endif // VXL_ROW_MAJOR 408 | 409 | return result; 410 | } 411 | 412 | template 413 | //__attribute__ ((noinline)) 414 | inline vector operator*(vector const& l, 415 | matrix const& r) noexcept 416 | { 417 | static_assert(N == M2); 418 | 419 | #ifndef VXL_ROW_MAJOR 420 | decltype(l * r) result; 421 | 422 | for (unsigned j{}; j != N2; ++j) 423 | { 424 | result.data_[j] = dot(l, {r.data_[j]}).data_[0]; 425 | } 426 | #else 427 | decltype(l * r) result{cvector(l.data_[0]) * r.data_[0]}; 428 | 429 | for (unsigned j{1}; j != M2; ++j) 430 | { 431 | result.data_ += cvector(l.data_[j]) * r.data_[j]; 432 | } 433 | #endif // VXL_ROW_MAJOR 434 | 435 | return result; 436 | } 437 | 438 | template 439 | //__attribute__ ((noinline)) 440 | inline matrix operator*(T const l, 441 | matrix const& r) noexcept 442 | { 443 | decltype(l * r) result; 444 | 445 | #ifndef VXL_ROW_MAJOR 446 | for (unsigned j{}; j != N; ++j) 447 | { 448 | result.data_[j] = vxl::cvector(l) * r.data_[j]; 449 | } 450 | #else 451 | for (unsigned i{}; i != M; ++i) 452 | { 453 | result.data_[i] = vxl::cvector(l) * r.data_[i]; 454 | } 455 | #endif // VXL_ROW_MAJOR 456 | 457 | return result; 458 | } 459 | 460 | template 461 | //__attribute__ ((noinline)) 462 | inline matrix operator*(matrix const& l, 463 | T const r) noexcept 464 | { 465 | return operator*(r, l); 466 | } 467 | 468 | template 469 | //__attribute__ ((noinline)) 470 | inline matrix operator/(matrix const& l, 471 | T const r) noexcept 472 | { 473 | decltype(l * r) result; 474 | 475 | auto const invr(T(1) / r); 476 | 477 | #ifndef VXL_ROW_MAJOR 478 | for (unsigned j{}; j != N; ++j) 479 | { 480 | result.data_[j] = vxl::cvector(invr) * l.data_[j]; 481 | } 482 | #else 483 | for (unsigned i{}; i != M; ++i) 484 | { 485 | result.data_[i] = vxl::cvector(invr) * l.data_[i]; 486 | } 487 | #endif // VXL_ROW_MAJOR 488 | 489 | return result; 490 | } 491 | 492 | template 493 | //__attribute__ ((noinline)) 494 | inline matrix& operator*=(matrix& l, 495 | matrix const& r) noexcept 496 | { 497 | #ifndef VXL_ROW_MAJOR 498 | // iterate over all columns of r 499 | for (unsigned i{}; i != N; ++i) 500 | { 501 | // capture i-th column 502 | auto const tmp(r.data_[i]); 503 | 504 | l.data_[i] = cvector(tmp[0]) * l.data_[0]; 505 | 506 | for (unsigned j{1}; j != N; ++j) 507 | { 508 | l.data_[i] += cvector(tmp[j]) * l.data_[j]; 509 | } 510 | } 511 | #else 512 | // iterate over all rows of l 513 | for (unsigned i{}; i != M; ++i) 514 | { 515 | // capture i-th row 516 | auto const tmp(r.data_[i]); 517 | 518 | l.data_[i] = cvector(tmp[0]) * r.data_[0]; 519 | 520 | for (unsigned j{1}; j != M; ++j) 521 | { 522 | l.data_[i] += cvector(tmp[j]) * r.data_[j]; 523 | } 524 | } 525 | #endif // VXL_ROW_MAJOR 526 | 527 | return l; 528 | } 529 | 530 | template 531 | //__attribute__ ((noinline)) 532 | inline matrix& operator*=(matrix& l, T const r) noexcept 533 | { 534 | #ifndef VXL_ROW_MAJOR 535 | for (unsigned j{}; j != N; ++j) 536 | { 537 | l.data_[j] *= vxl::cvector(r); 538 | } 539 | #else 540 | for (unsigned i{}; i != M; ++i) 541 | { 542 | l.data_[i] *= vxl::cvector(r); 543 | } 544 | #endif // VXL_ROW_MAJOR 545 | 546 | return l; 547 | } 548 | 549 | // comparison 550 | template 551 | //__attribute__ ((noinline)) 552 | inline bool operator==(matrix const& l, 553 | matrix const& r) noexcept 554 | { 555 | #ifndef VXL_ROW_MAJOR 556 | typename vector_traits::int_vector_type result( 557 | l.data_[0] != r.data_[0] 558 | ); 559 | 560 | for (unsigned j{1}; j != N; ++j) 561 | { 562 | result |= l.data_[j] != r.data_[j]; 563 | } 564 | 565 | return detail::vector::all_zeros(result, 566 | std::make_index_sequence() 567 | ); 568 | #else 569 | typename vector_traits::int_vector_type result( 570 | l.data_[0] != r.data_[0] 571 | ); 572 | 573 | for (unsigned i{1}; i != M; ++i) 574 | { 575 | result |= l.data_[i] != r.data_[i]; 576 | } 577 | 578 | return detail::vector::all_zeros(result, 579 | std::make_index_sequence() 580 | ); 581 | #endif // VXL_ROW_MAJOR 582 | } 583 | 584 | namespace detail 585 | { 586 | 587 | namespace matrix 588 | { 589 | 590 | template 591 | inline constexpr auto identity(std::index_sequence) noexcept 592 | { 593 | vxl::matrix r{}; 594 | 595 | ( 596 | r.set_element(Is, Is, T(1)), 597 | ... 598 | ); 599 | 600 | return r; 601 | } 602 | 603 | template 604 | inline constexpr auto diag(vxl::vector const& v, 605 | std::index_sequence) noexcept 606 | { 607 | vxl::matrix r{}; 608 | 609 | ( 610 | r.set_element(Is, Is, v(Is)), 611 | ... 612 | ); 613 | 614 | return r; 615 | } 616 | 617 | } 618 | 619 | } 620 | 621 | // diagonal 622 | template 623 | inline constexpr auto diag(vxl::vector const& v) noexcept 624 | { 625 | return detail::matrix::diag(v, std::make_index_sequence()); 626 | } 627 | 628 | // identity 629 | template 630 | inline constexpr void identity(matrix& m) noexcept 631 | { 632 | static_assert(M == N); 633 | m = detail::matrix::identity(std::make_index_sequence()); 634 | } 635 | 636 | template 637 | inline constexpr auto identity() noexcept 638 | { 639 | static_assert(M == N); 640 | return detail::matrix::identity(std::make_index_sequence()); 641 | } 642 | 643 | // transposition 644 | template 645 | inline constexpr auto trans(matrix const& m) noexcept 646 | { 647 | matrix result{}; 648 | 649 | #ifndef VXL_ROW_MAJOR 650 | for (unsigned j{}; j != N; ++j) 651 | { 652 | for (unsigned i{}; i != M; ++i) 653 | { 654 | result.data_[i][j] = m.data_[j][i]; 655 | } 656 | } 657 | #else 658 | for (unsigned i{}; i != M; ++i) 659 | { 660 | for (unsigned j{}; j != N; ++j) 661 | { 662 | result.data_[j][i] = m.data_[i][j]; 663 | } 664 | } 665 | #endif // VXL_ROW_MAJOR 666 | 667 | return result; 668 | } 669 | 670 | // zero 671 | template 672 | inline constexpr auto zero() noexcept 673 | { 674 | static_assert(M == N); 675 | return matrix{}; 676 | } 677 | 678 | template 679 | inline constexpr void zero(matrix& m) noexcept 680 | { 681 | static_assert(M == N); 682 | m = zero(); 683 | } 684 | 685 | template 686 | std::ostream& operator<<(std::ostream& os, matrix const& m) 687 | { 688 | for (unsigned i{}; i != M - 1; ++i) 689 | { 690 | os << (i ? ' ' : '['); 691 | 692 | for (unsigned j{}; j != N - 1; ++j) 693 | { 694 | os << m(i, j) << ", "; 695 | } 696 | 697 | os << m(i, N - 1) << '\n'; 698 | } 699 | 700 | os << ' '; 701 | 702 | for (unsigned j{}; j != N - 1; ++j) 703 | { 704 | os << m(M - 1, j) << ", "; 705 | } 706 | 707 | return os << m(M - 1, N - 1) << ']'; 708 | } 709 | 710 | } 711 | 712 | #endif // VXL_MATRIX_HPP 713 | -------------------------------------------------------------------------------- /matrix_determinant.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_DETERMINANT_HPP 2 | # define VXL_DETERMINANT_HPP 3 | # pragma once 4 | 5 | #include "cross.hpp" 6 | 7 | #include "dot.hpp" 8 | 9 | #include "matrix.hpp" 10 | 11 | #include "matrix_inverse.hpp" 12 | 13 | namespace vxl 14 | { 15 | 16 | // https://en.wikipedia.org/wiki/Determinant#Block_matrices 17 | ////////////////////////////////////////////////////////////////////////////// 18 | template 19 | //__attribute__ ((noinline)) 20 | constexpr auto det(vxl::matrix const& m) noexcept 21 | { 22 | //return 23 | // m(0, 0) * m(1, 1) - 24 | // m(0, 1) * m(1, 0); 25 | #ifndef VXL_ROW_MAJOR 26 | auto const a(col<0>(m)); 27 | auto const b(col<1>(m)); 28 | #else 29 | auto const a(row<0>(m)); 30 | auto const b(row<1>(m)); 31 | #endif // VXL_ROW_MAJOR 32 | 33 | auto const prod(a * swizzled<3, 2>(a, b)); 34 | 35 | return (prod - swizzled<1>(prod))(0); 36 | } 37 | 38 | ////////////////////////////////////////////////////////////////////////////// 39 | template 40 | //__attribute__ ((noinline)) 41 | constexpr auto det(vxl::matrix const& m) noexcept 42 | { 43 | // get row or col, whatever is more efficient as det(A) == det(A^T) 44 | #ifndef VXL_ROW_MAJOR 45 | return dot(col<0>(m), cross(col<1>(m), col<2>(m)))(0); 46 | #else 47 | return dot(row<0>(m), cross(row<1>(m), row<2>(m)))(0); 48 | #endif // VXL_ROW_MAJOR 49 | } 50 | 51 | ////////////////////////////////////////////////////////////////////////////// 52 | template 53 | //__attribute__ ((noinline)) 54 | constexpr auto det(vxl::matrix const& m) noexcept 55 | { 56 | auto const A(make_matrix<2, 2>(m(0, 0), m(0, 1), m(1, 0), m(1, 1))); 57 | auto const B(make_matrix<2, 2>(m(0, 2), m(0, 3), m(1, 2), m(1, 3))); 58 | auto const C(make_matrix<2, 2>(m(2, 0), m(2, 1), m(3, 0), m(3, 1))); 59 | auto const D(make_matrix<2, 2>(m(2, 2), m(2, 3), m(3, 2), m(3, 3))); 60 | 61 | //return det(A) * det(D - C * inv(A) * B); 62 | return det(D) * det(A - B * inv(D) * C); 63 | } 64 | 65 | } 66 | 67 | #endif // VXL_DETERMINANT_HPP 68 | -------------------------------------------------------------------------------- /matrix_inverse.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_MATRIX_INVERSE_HPP 2 | # define VXL_MATRIX_INVERSE_HPP 3 | # pragma once 4 | 5 | #include "matrix_determinant.hpp" 6 | 7 | namespace vxl 8 | { 9 | 10 | ////////////////////////////////////////////////////////////////////////////// 11 | template 12 | //__attribute__ ((noinline)) 13 | inline matrix inv(matrix const& ma) noexcept 14 | { 15 | decltype(inv(ma)) mb; 16 | 17 | #ifndef VXL_ROW_MAJOR 18 | mb.template set_col<0>(vector{ma(1, 1), -ma(1, 0)}); 19 | mb.template set_col<1>(vector{-ma(0, 1), ma(0, 0)}); 20 | #else 21 | mb.template set_row<0>(vector{ma(1, 1), -ma(0, 1)}); 22 | mb.template set_row<1>(vector{-ma(1, 0), ma(0, 0)}); 23 | #endif // VXL_ROW_MAJOR 24 | 25 | return mb / det(ma); 26 | } 27 | 28 | ////////////////////////////////////////////////////////////////////////////// 29 | template 30 | inline matrix inv(matrix const& ma) noexcept 31 | { 32 | decltype(inv(ma)) mb; 33 | 34 | mb(0, 0, ma(1, 1) * ma(2, 2) - ma(1, 2) * ma(2, 1)); 35 | mb(0, 1, ma(0, 2) * ma(2, 1) - ma(0, 1) * ma(2, 2)); 36 | mb(0, 2, ma(0, 1) * ma(1, 2) - ma(0, 2) * ma(1, 1)); 37 | 38 | mb(1, 0, ma(1, 2) * ma(2, 0) - ma(1, 0) * ma(2, 2)); 39 | mb(1, 1, ma(0, 0) * ma(2, 2) - ma(0, 2) * ma(2, 0)); 40 | mb(1, 2, ma(0, 2) * ma(1, 0) - ma(0, 0) * ma(1, 2)); 41 | 42 | mb(2, 0, ma(1, 0) * ma(2, 1) - ma(1, 1) * ma(2, 0)); 43 | mb(2, 1, ma(0, 1) * ma(2, 0) - ma(0, 0) * ma(2, 1)); 44 | mb(2, 2, ma(0, 0) * ma(1, 1) - ma(0, 1) * ma(1, 0)); 45 | 46 | return mb / det(ma); 47 | } 48 | 49 | ////////////////////////////////////////////////////////////////////////////// 50 | template 51 | inline matrix inv(matrix const& ma) noexcept 52 | { 53 | decltype(inv(ma)) mb; 54 | 55 | mb(0, 0, 56 | ma(1, 2) * ma(2, 3) * ma(3, 1) - ma(1, 3) * ma(2, 2) * ma(3, 1) + 57 | ma(1, 3) * ma(2, 1) * ma(3, 2) - ma(1, 1) * ma(2, 3) * ma(3, 2) - 58 | ma(1, 2) * ma(2, 1) * ma(3, 3) + ma(1, 1) * ma(2, 2) * ma(3, 3) 59 | ); 60 | mb(0, 1, 61 | ma(0, 3) * ma(2, 2) * ma(3, 1) - ma(0, 2) * ma(2, 3) * ma(3, 1) - 62 | ma(0, 3) * ma(2, 1) * ma(3, 2) + ma(0, 1) * ma(2, 3) * ma(3, 2) + 63 | ma(0, 2) * ma(2, 1) * ma(3, 3) - ma(0, 1) * ma(2, 2) * ma(3, 3) 64 | ); 65 | mb(0, 2, 66 | ma(0, 2) * ma(1, 3) * ma(3, 1) - ma(0, 3) * ma(1, 2) * ma(3, 1) + 67 | ma(0, 3) * ma(1, 1) * ma(3, 2) - ma(0, 1) * ma(1, 3) * ma(3, 2) - 68 | ma(0, 2) * ma(1, 1) * ma(3, 3) + ma(0, 1) * ma(1, 2) * ma(3, 3) 69 | ); 70 | mb(0, 3, 71 | ma(0, 3) * ma(1, 2) * ma(2, 1) - ma(0, 2) * ma(1, 3) * ma(2, 1) - 72 | ma(0, 3) * ma(1, 1) * ma(2, 2) + ma(0, 1) * ma(1, 3) * ma(2, 2) + 73 | ma(0, 2) * ma(1, 1) * ma(2, 3) - ma(0, 1) * ma(1, 2) * ma(2, 3) 74 | ); 75 | mb(1, 0, 76 | ma(1, 3) * ma(2, 2) * ma(3, 0) - ma(1, 2) * ma(2, 3) * ma(3, 0) - 77 | ma(1, 3) * ma(2, 0) * ma(3, 2) + ma(1, 0) * ma(2, 3) * ma(3, 2) + 78 | ma(1, 2) * ma(2, 0) * ma(3, 3) - ma(1, 0) * ma(2, 2) * ma(3, 3) 79 | ); 80 | mb(1, 1, 81 | ma(0, 2) * ma(2, 3) * ma(3, 0) - ma(0, 3) * ma(2, 2) * ma(3, 0) + 82 | ma(0, 3) * ma(2, 0) * ma(3, 2) - ma(0, 0) * ma(2, 3) * ma(3, 2) - 83 | ma(0, 2) * ma(2, 0) * ma(3, 3) + ma(0, 0) * ma(2, 2) * ma(3, 3) 84 | ); 85 | mb(1, 2, 86 | ma(0, 3) * ma(1, 2) * ma(3, 0) - ma(0, 2) * ma(1, 3) * ma(3, 0) - 87 | ma(0, 3) * ma(1, 0) * ma(3, 2) + ma(0, 0) * ma(1, 3) * ma(3, 2) + 88 | ma(0, 2) * ma(1, 0) * ma(3, 3) - ma(0, 0) * ma(1, 2) * ma(3, 3) 89 | ); 90 | mb(1, 3, 91 | ma(0, 2) * ma(1, 3) * ma(2, 0) - ma(0, 3) * ma(1, 2) * ma(2, 0) + 92 | ma(0, 3) * ma(1, 0) * ma(2, 2) - ma(0, 0) * ma(1, 3) * ma(2, 2) - 93 | ma(0, 2) * ma(1, 0) * ma(2, 3) + ma(0, 0) * ma(1, 2) * ma(2, 3) 94 | ); 95 | mb(2, 0, 96 | ma(1, 1) * ma(2, 3) * ma(3, 0) - ma(1, 3) * ma(2, 1) * ma(3, 0) + 97 | ma(1, 3) * ma(2, 0) * ma(3, 1) - ma(1, 0) * ma(2, 3) * ma(3, 1) - 98 | ma(1, 1) * ma(2, 0) * ma(3, 3) + ma(1, 0) * ma(2, 1) * ma(3, 3) 99 | ); 100 | mb(2, 1, 101 | ma(0, 3) * ma(2, 1) * ma(3, 0) - ma(0, 1) * ma(2, 3) * ma(3, 0) - 102 | ma(0, 3) * ma(2, 0) * ma(3, 1) + ma(0, 0) * ma(2, 3) * ma(3, 1) + 103 | ma(0, 1) * ma(2, 0) * ma(3, 3) - ma(0, 0) * ma(2, 1) * ma(3, 3) 104 | ); 105 | mb(2, 2, 106 | ma(0, 1) * ma(1, 3) * ma(3, 0) - ma(0, 3) * ma(1, 1) * ma(3, 0) + 107 | ma(0, 3) * ma(1, 0) * ma(3, 1) - ma(0, 0) * ma(1, 3) * ma(3, 1) - 108 | ma(0, 1) * ma(1, 0) * ma(3, 3) + ma(0, 0) * ma(1, 1) * ma(3, 3) 109 | ); 110 | mb(2, 3, 111 | ma(0, 3) * ma(1, 1) * ma(2, 0) - ma(0, 1) * ma(1, 3) * ma(2, 0) - 112 | ma(0, 3) * ma(1, 0) * ma(2, 1) + ma(0, 0) * ma(1, 3) * ma(2, 1) + 113 | ma(0, 1) * ma(1, 0) * ma(2, 3) - ma(0, 0) * ma(1, 1) * ma(2, 3) 114 | ); 115 | mb(3, 0, 116 | ma(1, 2) * ma(2, 1) * ma(3, 0) - ma(1, 1) * ma(2, 2) * ma(3, 0) - 117 | ma(1, 2) * ma(2, 0) * ma(3, 1) + ma(1, 0) * ma(2, 2) * ma(3, 1) + 118 | ma(1, 1) * ma(2, 0) * ma(3, 2) - ma(1, 0) * ma(2, 1) * ma(3, 2) 119 | ); 120 | mb(3, 1, 121 | ma(0, 1) * ma(2, 2) * ma(3, 0) - ma(0, 2) * ma(2, 1) * ma(3, 0) + 122 | ma(0, 2) * ma(2, 0) * ma(3, 1) - ma(0, 0) * ma(2, 2) * ma(3, 1) - 123 | ma(0, 1) * ma(2, 0) * ma(3, 2) + ma(0, 0) * ma(2, 1) * ma(3, 2) 124 | ); 125 | mb(3, 2, 126 | ma(0, 2) * ma(1, 1) * ma(3, 0) - ma(0, 1) * ma(1, 2) * ma(3, 0) - 127 | ma(0, 2) * ma(1, 0) * ma(3, 1) + ma(0, 0) * ma(1, 2) * ma(3, 1) + 128 | ma(0, 1) * ma(1, 0) * ma(3, 2) - ma(0, 0) * ma(1, 1) * ma(3, 2) 129 | ); 130 | mb(3, 3, 131 | ma(0, 1) * ma(1, 2) * ma(2, 0) - ma(0, 2) * ma(1, 1) * ma(2, 0) + 132 | ma(0, 2) * ma(1, 0) * ma(2, 1) - ma(0, 0) * ma(1, 2) * ma(2, 1) - 133 | ma(0, 1) * ma(1, 0) * ma(2, 2) + ma(0, 0) * ma(1, 1) * ma(2, 2) 134 | ); 135 | 136 | return mb / det(ma); 137 | } 138 | 139 | } 140 | 141 | #endif // VXL_MATRIX_INVERSE_HPP 142 | -------------------------------------------------------------------------------- /minmax.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_MINMAX_HPP 2 | # define VXL_MINMAX_HPP 3 | # pragma once 4 | 5 | #if defined(__ARM_NEON) 6 | 7 | #include "arm_neon.h" 8 | 9 | #endif // __ARM_NEON 10 | 11 | #include "vector.hpp" 12 | 13 | namespace vxl 14 | { 15 | 16 | #if defined(__ARM_NEON) 17 | 18 | //__attribute__ ((noinline)) 19 | inline auto min(vector const& v) noexcept 20 | { 21 | using vector_type = typename vector_traits::vector_type; 22 | 23 | auto const tmp(float32x2_t(v.data_)); 24 | 25 | return vector{ 26 | vector_type(vpmin_f32(tmp, tmp)) 27 | }; 28 | } 29 | 30 | //__attribute__ ((noinline)) 31 | inline auto max(vector const& v) noexcept 32 | { 33 | using vector_type = typename vector_traits::vector_type; 34 | 35 | auto const tmp(float32x2_t(v.data_)); 36 | 37 | return vector{ 38 | vector_type(vpmax_f32(tmp, tmp)) 39 | }; 40 | } 41 | 42 | //__attribute__ ((noinline)) 43 | inline auto min(vector const& v) noexcept 44 | { 45 | using vector_type = typename vector_traits::vector_type; 46 | 47 | auto tmp(vpmin_f32(vget_low_f32(float32x4_t(v.data_)), 48 | vdup_lane_f32(vget_high_f32(float32x4_t(v.data_)), 0))); 49 | 50 | tmp = vpmin_f32(tmp, tmp); 51 | 52 | return vector{ 53 | vector_type(vcombine_f32(tmp, tmp)) 54 | }; 55 | } 56 | 57 | //__attribute__ ((noinline)) 58 | inline auto max(vector const& v) noexcept 59 | { 60 | using vector_type = typename vector_traits::vector_type; 61 | 62 | auto tmp(vpmax_f32(vget_low_f32(float32x4_t(v.data_)), 63 | vdup_lane_f32(vget_high_f32(float32x4_t(v.data_)), 0))); 64 | 65 | tmp = vpmax_f32(tmp, tmp); 66 | 67 | return vector{ 68 | vector_type(vcombine_f32(tmp, tmp)) 69 | }; 70 | } 71 | 72 | //__attribute__ ((noinline)) 73 | inline auto min(vector const& v) noexcept 74 | { 75 | using vector_type = typename vector_traits::vector_type; 76 | 77 | auto tmp(vpmin_f32(vget_low_f32(float32x4_t(v.data_)), 78 | vget_high_f32(float32x4_t(v.data_)))); 79 | 80 | tmp = vpmin_f32(tmp, tmp); 81 | 82 | return vector{ 83 | vector_type(vcombine_f32(tmp, tmp)) 84 | }; 85 | } 86 | 87 | //__attribute__ ((noinline)) 88 | inline auto max(vector const& v) noexcept 89 | { 90 | using vector_type = typename vector_traits::vector_type; 91 | 92 | auto tmp(vpmax_f32(vget_low_f32(float32x4_t(v.data_)), 93 | vget_high_f32(float32x4_t(v.data_)))); 94 | 95 | tmp = vpmax_f32(tmp, tmp); 96 | 97 | return vector{ 98 | vector_type(vcombine_f32(tmp, tmp)) 99 | }; 100 | } 101 | 102 | #endif 103 | 104 | namespace detail 105 | { 106 | 107 | namespace minmax 108 | { 109 | 110 | // 1 2 3 4 111 | // m(1, 2) m(2, 3) m(3, 4) m(4, 1) 112 | // m(1, 2, 3, 4) m(2, 3, 4, 1) m(3, 4, 1, 2) m(4, 1, 2, 3) 113 | template 114 | inline constexpr auto minmax(typename vector_traits::vector_type v, 115 | std::index_sequence) noexcept 116 | { 117 | decltype(v) sr; 118 | 119 | if constexpr(op) 120 | ( 121 | ( 122 | sr = detail::vector::pow2_shuffler(v, 123 | std::make_index_sequence() 124 | ), 125 | v = select(v, sr, v < sr) 126 | ), 127 | ... 128 | ); 129 | else 130 | ( 131 | ( 132 | sr = detail::vector::pow2_shuffler(v, 133 | std::make_index_sequence() 134 | ), 135 | v = select(v, sr, v > sr) 136 | ), 137 | ... 138 | ); 139 | 140 | return v; 141 | } 142 | 143 | } 144 | 145 | } 146 | 147 | // min 148 | template 149 | //__attribute__ ((noinline)) 150 | inline constexpr auto min(vector const& v) noexcept 151 | { 152 | return vector{ 153 | detail::minmax::minmax(v.data_, 154 | std::make_index_sequence(N)>() 155 | ) 156 | }; 157 | } 158 | 159 | // max 160 | template 161 | //__attribute__ ((noinline)) 162 | inline constexpr auto max(vector const& v) noexcept 163 | { 164 | return vector{ 165 | detail::minmax::minmax(v.data_, 166 | std::make_index_sequence(N)>() 167 | ) 168 | }; 169 | } 170 | 171 | } 172 | 173 | #endif // VXL_MINMAX_HPP 174 | -------------------------------------------------------------------------------- /onb.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_ONB_HPP 2 | # define VXL_ONB_HPP 3 | # pragma once 4 | 5 | #include "vector.hpp" 6 | 7 | namespace vxl 8 | { 9 | 10 | // Graphics Tools---The jgt Editors' Choice 11 | // Building an orthonormal basis from a unit vector 12 | // Building an Orthonormal Basis from a 3D Unit Vector Without Normalization 13 | // http://blog.selfshadow.com/2011/10/17/perp-vectors/ 14 | // http://lolengine.net/blog/2013/09/21/picking-orthogonal-vector-combing-coconuts 15 | ////////////////////////////////////////////////////////////////////////////// 16 | template 17 | //__attribute__ ((noinline)) 18 | constexpr auto ortho(vector const& v, default_tag = {}) noexcept 19 | { 20 | return vector{-v(1), v(0)}; 21 | } 22 | 23 | ////////////////////////////////////////////////////////////////////////////// 24 | template 25 | //__attribute__ ((noinline)) 26 | constexpr auto ortho(vector const& v, default_tag = {}) noexcept 27 | { 28 | using int_value_type = typename vector_traits::int_value_type; 29 | using vector_type = typename vector_traits::vector_type; 30 | 31 | auto const tmp(abs(v)); 32 | 33 | /* 34 | return tmp(0) > tmp(1) ? 35 | vector{-v(2), T(0), v(0)} ? 36 | vector{T(0), -v(2), v(1)}; 37 | */ 38 | 39 | // if abs(v(0)) > abs(v(1)), then we keep v(0) and v(2), otherwise 40 | // v(1) and v(2), this way the maximum vector element by abs value is always 41 | // retained. We negate the comparison because of select(). 42 | return vector{ 43 | select( 44 | vector_type{-v(2), T(0), v(0)}, 45 | vector_type{T(0), -v(2), v(1)}, 46 | cvector(-(tmp(0) > tmp(1))) 47 | ) 48 | }; 49 | } 50 | 51 | namespace detail::onb 52 | { 53 | 54 | template 55 | constexpr auto min_element(typename vector_traits::vector_type v, 56 | std::index_sequence, std::index_sequence) noexcept 57 | { 58 | using int_value_type = typename vector_traits::int_value_type; 59 | using int_vector_type = typename vector_traits::int_vector_type; 60 | 61 | int_vector_type j{Js...}; 62 | int_vector_type sj; 63 | 64 | int_vector_type s; 65 | 66 | decltype(v) sv; 67 | 68 | ( 69 | ( 70 | sv = detail::vector::pow2_shuffler(v, 71 | std::make_index_sequence() 72 | ), 73 | s = v < sv, 74 | v = select(v, sv, s), 75 | sj = detail::vector::pow2_shuffler(j, 76 | std::make_index_sequence() 77 | ), 78 | j = select(j, sj, s) 79 | ), 80 | ... 81 | ); 82 | 83 | return j; 84 | } 85 | 86 | } 87 | 88 | ////////////////////////////////////////////////////////////////////////////// 89 | template 90 | //__attribute__ ((noinline)) 91 | constexpr auto ortho(vector const& v, canonical_tag) noexcept 92 | { 93 | // The canonical way is to find the minimum element by absolute value, 94 | // set it to zero and swap the other 2. This is much more involved. 95 | auto const i(detail::onb::min_element(v.data_, 96 | std::make_index_sequence< 97 | vxl::detail::vector::log<2>(sizeof(v) / sizeof(T)) 98 | >(), 99 | std::make_index_sequence() 100 | )[0] 101 | ); 102 | 103 | vector r{}; 104 | 105 | switch (i) 106 | { 107 | case 0: 108 | r = swizzled<0, 2, 1>(v); 109 | break; 110 | 111 | case 1: 112 | r = swizzled<2, 1, 0>(v); 113 | break; 114 | 115 | case 2: 116 | r = swizzled<1, 0, 2>(v); 117 | break; 118 | 119 | default: 120 | assert(0); 121 | } 122 | 123 | r.set_element(i, T(0)); 124 | 125 | auto const j((i + 1) % 3); 126 | r.set_element(j, -r(j)); 127 | 128 | return r; 129 | } 130 | 131 | } 132 | 133 | #endif // VXL_ONB_HPP 134 | -------------------------------------------------------------------------------- /quat.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "rotation.hpp" 6 | 7 | int main() 8 | { 9 | vxl::quat a{0.f, 1.f, 0.f, 1.f}; 10 | vxl::quat b{.5f, .5f, .75f, 1.f}; 11 | 12 | std::cout << a * b << std::endl; 13 | //(.87 - .5k)(i + 2j)(.87 + .5k) = (2.25, .14, 0) 14 | std::cout << vxl::rotated(vxl::quat{.0f, .0f, -.5f, .87f}, 15 | vxl::vector{1.f, 2.f, 0.f}) << 16 | std::endl; 17 | std::cout << vxl::quat{.0f, .0f, -.5f, .87f} * 18 | vxl::quat{1.f, 2.f, 0.f} * 19 | vxl::quat{.0f, .0f, .5f, .87f} << 20 | std::endl; 21 | 22 | std::cout << 23 | vxl::to_quat(vxl::vector{1.f, .0f, .0f}, 2.f) << 24 | std::endl; 25 | 26 | std::cout << 27 | vxl::norm2(vxl::quat{1, 1, 1, 1}) << 28 | std::endl; 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /quat.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_QUAT_HPP 2 | # define VXL_QUAT_HPP 3 | # pragma once 4 | 5 | #include "matrix.hpp" 6 | 7 | namespace vxl 8 | { 9 | 10 | // elements in a quat are laid out as: (x, y, z, w) 11 | 12 | template 13 | struct quat 14 | { 15 | typename vector_traits::vector_type data_; 16 | 17 | // element access 18 | constexpr auto operator()(unsigned const i) const noexcept 19 | { 20 | return data_[i]; 21 | } 22 | 23 | constexpr void set_element(unsigned const i, T const v) noexcept 24 | { 25 | data_[i] = v; 26 | } 27 | 28 | // conversion 29 | constexpr auto& ref() noexcept { return data_; } 30 | constexpr auto& ref() const noexcept { return data_; } 31 | }; 32 | 33 | template 34 | inline constexpr quat operator+(quat const& a, 35 | quat const& b) noexcept 36 | { 37 | return { a.data_ + b.data_ }; 38 | } 39 | 40 | template 41 | inline constexpr quat operator-(quat const& a, 42 | quat const& b) noexcept 43 | { 44 | return { a.data_ - b.data_ }; 45 | } 46 | 47 | template 48 | //__attribute__ ((noinline)) 49 | inline constexpr auto operator*(quat const& l, quat const& r) noexcept 50 | { 51 | // l(0)r(3) + l(1)r(2) - l(2)r(1) + l(3)r(0) 52 | // -l(0)r(2) + l(1)r(3) + l(2)r(0) + l(3)r(1) 53 | // l(0)r(1) - l(1)r(0) + l(2)r(3) + l(3)r(2) 54 | // -l(0)r(0) - l(1)r(1) - l(2)r(2) + l(3)r(3) 55 | using int_vector_type = typename vector_traits::int_vector_type; 56 | using vector_type = typename vector_traits::vector_type; 57 | 58 | #if defined(__clang__) 59 | auto const t1( 60 | l.data_ * 61 | __builtin_shufflevector(r.data_, r.data_, 3, 3, 3, 3) 62 | ); 63 | auto const t2( 64 | __builtin_shufflevector(l.data_, l.data_, 1, 2, 0, 2) * 65 | __builtin_shufflevector(r.data_, r.data_, 2, 0, 1, 2) 66 | ); 67 | auto const t3( 68 | __builtin_shufflevector(l.data_, l.data_, 3, 3, 3, 1) * 69 | __builtin_shufflevector(r.data_, r.data_, 0, 1, 2, 1) 70 | ); 71 | auto const t4( 72 | __builtin_shufflevector(l.data_, l.data_, 2, 0, 1, 0) * 73 | __builtin_shufflevector(r.data_, r.data_, 1, 2, 0, 0) 74 | ); 75 | #else 76 | auto const t1( 77 | l.data_ * 78 | __builtin_shuffle(r.data_, int_vector_type{3, 3, 3, 3}) 79 | ); 80 | auto const t2( 81 | __builtin_shuffle(l.data_, int_vector_type{1, 2, 0, 2}) * 82 | __builtin_shuffle(r.data_, int_vector_type{2, 0, 1, 2}) 83 | ); 84 | auto const t3( 85 | __builtin_shuffle(l.data_, int_vector_type{3, 3, 3, 1}) * 86 | __builtin_shuffle(r.data_, int_vector_type{0, 1, 2, 1}) 87 | ); 88 | auto const t4( 89 | __builtin_shuffle(l.data_, int_vector_type{2, 0, 1, 0}) * 90 | __builtin_shuffle(r.data_, int_vector_type{1, 2, 0, 0}) 91 | ); 92 | #endif 93 | 94 | return quat{ 95 | t1 + 96 | decltype(t2)(int_vector_type(t2 + t3) ^ 97 | int_vector_type(vector_type{T(0), T(0), T(0), T(-.0)}) 98 | ) - 99 | t4 100 | }; 101 | } 102 | 103 | template 104 | inline constexpr auto& operator+=(quat& a, quat const& b) noexcept 105 | { 106 | return a.data_ += b.data_, a; 107 | } 108 | 109 | template 110 | inline constexpr auto& operator-=(quat& a, quat const& b) noexcept 111 | { 112 | return a.data_ -= b.data_, a; 113 | } 114 | 115 | // comparison 116 | template 117 | inline bool operator==(quat const& l, quat const& r) noexcept 118 | { 119 | return detail::vector::all_zeros(l.data_ != r.data_, 120 | std::make_index_sequence() 121 | ); 122 | } 123 | 124 | template 125 | inline constexpr bool operator!=(quat const& l, quat const& r) noexcept 126 | { 127 | return !operator==(l, r); 128 | } 129 | 130 | // scalar part 131 | template 132 | inline constexpr auto scalar(quat const& x) noexcept 133 | { 134 | return x.data_[3]; 135 | } 136 | 137 | template 138 | //__attribute__ ((noinline)) 139 | inline constexpr auto scalar_vector(quat const& x) noexcept 140 | { 141 | return cvector(scalar(x)); 142 | } 143 | 144 | // vector part 145 | template 146 | inline constexpr auto vec(quat const& x) noexcept 147 | { 148 | return vector{x.data_}; 149 | } 150 | 151 | // conjugation 152 | template 153 | inline constexpr void conjugate(quat& x) noexcept 154 | { 155 | using int_vector_type = typename vector_traits::int_vector_type; 156 | using vector_type = typename vector_traits::vector_type; 157 | 158 | x.data_ = vector_type( 159 | int_vector_type(x.data_) ^ 160 | int_vector_type(vector_type{T(-.0), T(-.0), T(-.0), T(.0)}) 161 | ); 162 | } 163 | 164 | template 165 | inline constexpr auto conjugated(quat const& x) noexcept 166 | { 167 | using int_vector_type = typename vector_traits::int_vector_type; 168 | using vector_type = typename vector_traits::vector_type; 169 | 170 | return quat{ 171 | vector_type( 172 | int_vector_type(x.data_) ^ 173 | int_vector_type(vector_type{T(-.0), T(-.0), T(-.0), T(0)}) 174 | ) 175 | }; 176 | } 177 | 178 | template 179 | inline constexpr auto norm2(quat const& x) noexcept 180 | { 181 | vector const q{x.data_}; 182 | 183 | return dot(q, q); 184 | } 185 | 186 | } 187 | 188 | // stream operators 189 | template 190 | std::ostream& operator<<(std::ostream& os, vxl::quat const& v) 191 | { 192 | os << '('; 193 | 194 | for (unsigned i{}; i != 3; ++i) 195 | { 196 | os << v.data_[i] << ", "; 197 | } 198 | 199 | return os << v.data_[3] << ')'; 200 | } 201 | 202 | #endif // VXL_QUAT_HPP 203 | -------------------------------------------------------------------------------- /rotation.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_ROTATION_HPP 2 | # define VXL_ROTATION_HPP 3 | # pragma once 4 | 5 | #include "cross.hpp" 6 | 7 | #include "quat.hpp" 8 | 9 | #include "sinf.hpp" 10 | 11 | namespace vxl 12 | { 13 | 14 | enum struct ea 15 | { 16 | XYZ, 17 | XZY, 18 | YXZ, 19 | YZX, 20 | ZXY, 21 | ZYX 22 | }; 23 | 24 | struct scale; 25 | 26 | namespace detail 27 | { 28 | 29 | template 30 | inline constexpr auto rot_x(std::pair, 31 | vxl::vector> const& sc) noexcept 32 | { 33 | return vxl::matrix{ 34 | T(1), T(0), T(0), T(0), 35 | T(0), sc.second(I), -sc.first(I), T(0), 36 | T(0), sc.first(I), sc.second(I), T(0), 37 | T(0), T(0), T(0), T(1) 38 | }; 39 | } 40 | 41 | template 42 | inline constexpr auto rot_y(std::pair, 43 | vxl::vector> const& sc) noexcept 44 | { 45 | return vxl::matrix{ 46 | sc.second(I), T(0), sc.first(I), T(0), 47 | T(0), T(1), T(0), T(0), 48 | -sc.first(I), T(0), sc.second(I), T(0), 49 | T(0), T(0), T(0), T(1) 50 | }; 51 | } 52 | 53 | template 54 | inline constexpr auto rot_z(std::pair, 55 | vxl::vector> const& sc) noexcept 56 | { 57 | return vxl::matrix{ 58 | sc.second(I), -sc.first(I), T(0), T(0), 59 | sc.first(I), sc.second(I), T(0), T(0), 60 | T(0), T(0), T(1), T(0), 61 | T(0), T(0), T(0), T(1) 62 | }; 63 | } 64 | 65 | } 66 | 67 | // Euler angle to rotation matrix conversions. 68 | ////////////////////////////////////////////////////////////////////////////// 69 | template 70 | inline constexpr std::enable_if_t> 71 | to_matrix(vxl::vector const& a) noexcept 72 | { 73 | auto const sc(sincos(a)); 74 | 75 | return detail::rot_z<2>(sc) * detail::rot_y<1>(sc) * detail::rot_x<0>(sc); 76 | } 77 | 78 | template 79 | inline constexpr std::enable_if_t> 80 | to_matrix(vxl::vector const& a) noexcept 81 | { 82 | auto const sc(sincos(a)); 83 | 84 | return detail::rot_y<2>(sc) * detail::rot_z<1>(sc) * detail::rot_x<0>(sc); 85 | } 86 | 87 | template 88 | inline constexpr std::enable_if_t> 89 | to_matrix(vxl::vector const& a) noexcept 90 | { 91 | auto const sc(sincos(a)); 92 | 93 | return detail::rot_z<2>(sc) * detail::rot_x<1>(sc) * detail::rot_y<0>(sc); 94 | } 95 | 96 | template 97 | inline constexpr std::enable_if_t> 98 | to_matrix(vxl::vector const& a) noexcept 99 | { 100 | auto const sc(sincos(a)); 101 | 102 | return detail::rot_x<2>(sc) * detail::rot_z<1>(sc) * detail::rot_y<0>(sc); 103 | } 104 | 105 | template 106 | inline constexpr std::enable_if_t> 107 | to_matrix(vxl::vector const& a) noexcept 108 | { 109 | auto const sc(sincos(a)); 110 | 111 | return detail::rot_y<2>(sc) * detail::rot_x<1>(sc) * detail::rot_z<0>(sc); 112 | } 113 | 114 | template 115 | inline constexpr std::enable_if_t> 116 | to_matrix(vxl::vector const& a) noexcept 117 | { 118 | auto const sc(sincos(a)); 119 | 120 | return detail::rot_x<2>(sc) * detail::rot_y<1>(sc) * detail::rot_z<0>(sc); 121 | } 122 | 123 | template 124 | inline constexpr std::enable_if_t{}, matrix> 125 | to_matrix(vxl::vector const& s) noexcept 126 | { 127 | return vxl::diag(vxl::vector{s(0), s(1), s(2), T(1)}); 128 | } 129 | 130 | // convert axis angle to quaternion 131 | ////////////////////////////////////////////////////////////////////////////// 132 | template 133 | //__attribute__ ((noinline)) 134 | inline quat to_quat(vector const a, T const angle) noexcept 135 | { 136 | using vector_type = typename vector_traits::vector_type; 137 | 138 | auto const p(sincos(vector{T(.5) * angle})); 139 | 140 | return { 141 | vector_type{p.first.data_, p.first.data_, p.first.data_, p.second.data_} * 142 | vector_type{a.data_[0], a.data_[1], a.data_[2], T(1)} 143 | }; 144 | } 145 | 146 | ////////////////////////////////////////////////////////////////////////////// 147 | template 148 | //__attribute__ ((noinline)) 149 | inline vector rotated(quat const& q, vector const& v) noexcept 150 | { 151 | auto const vq(vec(q)); 152 | 153 | auto const t(cross(vq, v)); 154 | 155 | auto const u(scalar_vector(q) * t.data_ + cross(vq, t).data_); 156 | 157 | return {v.data_ + u + u}; 158 | } 159 | 160 | ////////////////////////////////////////////////////////////////////////////// 161 | template 162 | //__attribute__ ((noinline)) 163 | inline vector& rotate(quat const& q, vector& v) noexcept 164 | { 165 | auto const vq(vec(q)); 166 | 167 | auto const t(cross(vq, v)); 168 | 169 | auto const u(scalar_vector(q) * t.data_ + cross(vq, t).data_); 170 | 171 | v.data_ += u + u; 172 | 173 | return v; 174 | } 175 | 176 | } 177 | 178 | #endif // VXL_ROTATION_HPP 179 | -------------------------------------------------------------------------------- /rsqrt.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_RSQRT_HPP 2 | # define VXL_RSQRT_HPP 3 | # pragma once 4 | 5 | #include "vector.hpp" 6 | 7 | #if defined(__SSE__) 8 | 9 | # include "xmmintrin.h" 10 | 11 | #endif // __SSE__ 12 | 13 | #if defined(__SSE3__) 14 | 15 | #include "pmmintrin.h" 16 | 17 | #endif // __SSE3__ 18 | 19 | #if defined (__SSE4_1__) 20 | 21 | #include "smmintrin.h" 22 | 23 | #endif // __SSE4_1__ 24 | 25 | #if defined(__ARM_NEON) 26 | 27 | #include "arm_neon.h" 28 | 29 | #endif // __ARM_NEON 30 | 31 | namespace vxl 32 | { 33 | 34 | // reciprocal square root and square root 35 | // https://en.wikipedia.org/wiki/Fast_inverse_square_root 36 | 37 | #if defined(__ARM_NEON) 38 | 39 | //__attribute__ ((noinline)) 40 | inline auto rsqrt(vector const& xx) noexcept 41 | { 42 | using vector_type = typename vector_traits::vector_type; 43 | 44 | auto const x(float32x2_t(vector_type{xx.data_})); 45 | 46 | auto r(vrsqrte_f32(x)); 47 | 48 | r *= vrsqrts_f32(x, r * r); 49 | 50 | return vector{vector_type(r * (vrsqrts_f32(x, r * r)))[0]}; 51 | } 52 | 53 | //__attribute__ ((noinline)) 54 | inline auto rsqrt(vector const& x) noexcept 55 | { 56 | using vector_type = typename vector_traits::vector_type; 57 | 58 | auto r(vrsqrte_f32(float32x2_t(x.data_))); 59 | 60 | r *= vrsqrts_f32(float32x2_t(x.data_), r * r); 61 | 62 | return vector{ 63 | vector_type( 64 | r * (vrsqrts_f32(float32x2_t(x.data_), r * r)) 65 | ) 66 | }; 67 | } 68 | 69 | //__attribute__ ((noinline)) 70 | inline auto rsqrt(vector const& x) noexcept 71 | { 72 | using vector_type = typename vector_traits::vector_type; 73 | 74 | auto r(vrsqrteq_f32(float32x4_t(x.data_))); 75 | 76 | r *= vrsqrtsq_f32(float32x4_t(x.data_), r * r); 77 | 78 | return vector{ 79 | vector_type( 80 | r * (vrsqrtsq_f32(float32x4_t(x.data_), r * r)) 81 | ) 82 | }; 83 | } 84 | 85 | //__attribute__ ((noinline)) 86 | inline auto rsqrt(vector const& x) noexcept 87 | { 88 | using vector_type = typename vector_traits::vector_type; 89 | 90 | auto r(vrsqrteq_f32(float32x4_t(x.data_))); 91 | 92 | r *= vrsqrtsq_f32(float32x4_t(x.data_), r * r); 93 | 94 | return vector{ 95 | vector_type( 96 | r * (vrsqrtsq_f32(float32x4_t(x.data_), r * r)) 97 | ) 98 | }; 99 | } 100 | 101 | #elif defined(__SSE__) 102 | 103 | //__attribute__ ((noinline)) 104 | inline auto rsqrt(vector const& xx) noexcept 105 | { 106 | using vector_type = typename vector_traits::vector_type; 107 | 108 | vector_type const x{xx.data_}; 109 | 110 | auto const r(_mm_rsqrt_ss(x)); 111 | 112 | return vector{ 113 | (r * (cvector(1.5f) - cvector(.5f) * x * r * r))[0] 114 | }; 115 | } 116 | 117 | //__attribute__ ((noinline)) 118 | inline auto rsqrt(vector const& x) noexcept 119 | { 120 | auto const r(_mm_rsqrt_ps(x.data_)); 121 | 122 | return vector{ 123 | r * (cvector(1.5f) - cvector(.5f) * x.data_ * r * r) 124 | }; 125 | } 126 | 127 | //__attribute__ ((noinline)) 128 | inline auto rsqrt(vector const& x) noexcept 129 | { 130 | auto const r(_mm_rsqrt_ps(x.data_)); 131 | 132 | return vector{ 133 | r * (cvector(1.5f) - cvector(.5f) * x.data_ * r * r) 134 | }; 135 | } 136 | 137 | //__attribute__ ((noinline)) 138 | inline auto rsqrt(vector const& x) noexcept 139 | { 140 | auto const r(_mm_rsqrt_ps(x.data_)); 141 | 142 | return vector{ 143 | r * (cvector(1.5f) - cvector(.5f) * x.data_ * r * r) 144 | }; 145 | } 146 | 147 | #endif 148 | 149 | namespace detail 150 | { 151 | 152 | namespace rsqrt 153 | { 154 | 155 | template 156 | inline constexpr T magic_constant; 157 | 158 | template <> 159 | inline constexpr auto magic_constant{0x5f375a86}; 160 | 161 | template <> 162 | inline constexpr auto magic_constant{0x5fe6eb50c7b537a9}; 163 | 164 | } 165 | 166 | } 167 | 168 | template 169 | //__attribute__ ((noinline)) 170 | inline constexpr auto rsqrt(vector const& xx) noexcept 171 | { 172 | using int_value_type = typename vector_traits::int_value_type; 173 | using int_vector_type = typename vector_traits::int_vector_type; 174 | using vector_type = typename vector_traits::vector_type; 175 | 176 | auto& x(xx.ref()); 177 | auto& xi((int_vector_type&)(x)); 178 | 179 | // magic step 180 | auto r(cvector(detail::rsqrt::magic_constant) - 181 | (xi >> cvector(1))); 182 | auto& rr((vector_type&)r); 183 | 184 | auto const xhalf(cvector(T(.5)) * x); 185 | 186 | constexpr auto c(cvector(T(1.5))); 187 | 188 | rr *= c - xhalf * rr * rr; 189 | rr *= c - xhalf * rr * rr; 190 | 191 | return vector{rr * (c - xhalf * rr * rr)}; 192 | } 193 | 194 | template 195 | //__attribute__ ((noinline)) 196 | inline constexpr auto sqrt(vector const& x) noexcept 197 | { 198 | return x * rsqrt(x); 199 | } 200 | 201 | } 202 | 203 | #endif // VXL_RSQRT_HPP 204 | -------------------------------------------------------------------------------- /sinf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_SINF_HPP 2 | # define VXL_SINF_HPP 3 | # pragma once 4 | 5 | #include 6 | 7 | #include "vector.hpp" 8 | 9 | namespace vxl 10 | { 11 | 12 | namespace detail 13 | { 14 | 15 | namespace constantsf 16 | { 17 | inline constexpr auto FOPI = 1.27323954473516f; 18 | 19 | inline constexpr auto DP1 = 0.78515625f; 20 | inline constexpr auto DP2 = 2.4187564849853515625e-4f; 21 | inline constexpr auto DP3 = 3.77489497744594108e-8f; 22 | } 23 | 24 | } 25 | 26 | template 27 | inline constexpr vector sin(vector xx) noexcept 28 | { 29 | using int_value_type = typename vector_traits::int_value_type; 30 | using int_vector_type = typename vector_traits::int_vector_type; 31 | 32 | auto& x(xx.ref()); 33 | auto& xi((int_vector_type&)(x)); 34 | 35 | auto sign(int_vector_type(x) & cvector(1 << 31)); 36 | 37 | xi &= cvector(~(1 << 31)); 38 | 39 | auto j(convert( 40 | cvector(detail::constantsf::FOPI) * x)); 41 | 42 | j += j & cvector(1); 43 | 44 | auto y(convert(j)); 45 | 46 | sign ^= (j & cvector(4)) << 47 | cvector(29); 48 | 49 | x = ((x - y * cvector(detail::constantsf::DP1)) - 50 | y * cvector(detail::constantsf::DP2)) - 51 | y * cvector(detail::constantsf::DP3); 52 | 53 | auto const z(x * x); 54 | 55 | auto const y1(((cvector(2.443315711809948e-5f) * z - 56 | cvector(1.388731625493765e-3f)) * z + 57 | cvector(4.166664568298827e-2f)) * z * z - 58 | cvector(.5f) * z + cvector(1.f)); 59 | 60 | auto const y2((((cvector(-1.9515295891e-4f) * z + 61 | cvector(8.3321608736e-3f)) * z - 62 | cvector(1.6666654611e-1f)) * z * x) + x); 63 | 64 | auto r(select(y1, y2, int_vector_type(cvector(2) == j))); 65 | (int_vector_type&)(r) ^= sign; 66 | 67 | return {r}; 68 | } 69 | 70 | template 71 | inline constexpr vector cos(vector xx) noexcept 72 | { 73 | using int_value_type = typename vector_traits::int_value_type; 74 | using int_vector_type = typename vector_traits::int_vector_type; 75 | 76 | auto& x(xx.ref()); 77 | auto& xi((int_vector_type&)(x)); 78 | 79 | xi &= cvector(~(1 << 31)); 80 | 81 | auto j(convert( 82 | cvector(detail::constantsf::FOPI) * x)); 83 | 84 | j += j & cvector(1); 85 | 86 | auto y(convert(j)); 87 | 88 | auto const sign((~(j - cvector(2)) & 89 | cvector(4)) << cvector(29)); 90 | 91 | x = ((x - y * cvector(detail::constantsf::DP1)) - 92 | y * cvector(detail::constantsf::DP2)) - 93 | y * cvector(detail::constantsf::DP3); 94 | 95 | auto const z(x * x); 96 | 97 | auto const y1(((cvector(2.443315711809948e-5f) * z - 98 | cvector(1.388731625493765e-3f)) * z + 99 | cvector(4.166664568298827e-2f)) * z * z - 100 | cvector(.5f) * z + cvector(1.f)); 101 | 102 | auto const y2((((cvector(-1.9515295891e-4f) * z + 103 | cvector(8.3321608736e-3f)) * z - 104 | cvector(1.6666654611e-1f)) * z * x) + x); 105 | 106 | auto r(select(y2, y1, int_vector_type(cvector(2) == j))); 107 | (int_vector_type&)(r) ^= sign; 108 | 109 | return {r}; 110 | } 111 | 112 | template 113 | inline constexpr std::pair, vector > 114 | //__attribute__ ((noinline)) 115 | sincos(vector xx) noexcept 116 | { 117 | using int_value_type = typename vector_traits::int_value_type; 118 | using int_vector_type = typename vector_traits::int_vector_type; 119 | 120 | auto& x(xx.ref()); 121 | auto& xi((int_vector_type&)(x)); 122 | 123 | auto sign_sin(xi & cvector(1 << 31)); 124 | 125 | xi &= cvector(~(1 << 31)); 126 | 127 | auto j(convert( 128 | cvector(detail::constantsf::FOPI) * x)); 129 | 130 | j += j & cvector(1); 131 | 132 | auto y(convert(j)); 133 | 134 | sign_sin ^= (j & cvector(4)) << 135 | cvector(29); 136 | 137 | // shift bit at index 2 into 1, if it is set and bit at index 1 is not set 138 | auto const sign_cos((~(j - cvector(2)) & 139 | cvector(4)) << cvector(29)); 140 | 141 | x = ((x - y * cvector(detail::constantsf::DP1)) - 142 | y * cvector(detail::constantsf::DP2)) - 143 | y * cvector(detail::constantsf::DP3); 144 | 145 | auto const z(x * x); 146 | 147 | auto const y1(((cvector(2.443315711809948e-5f) * z - 148 | cvector(1.388731625493765e-3f)) * z + 149 | cvector(4.166664568298827e-2f)) * z * z - 150 | cvector(.5f) * z + cvector(1.f)); 151 | 152 | auto const y2((((cvector(-1.9515295891e-4f) * z + 153 | cvector(8.3321608736e-3f)) * z - 154 | cvector(1.6666654611e-1f)) * z * x) + x); 155 | 156 | int_vector_type const sel(cvector(2) == j); 157 | 158 | auto p1(select(y1, y2, sel)); 159 | (int_vector_type&)(p1) ^= sign_sin; 160 | 161 | auto p2(select(y2, y1, sel)); 162 | (int_vector_type&)(p2) ^= sign_cos; 163 | 164 | return {{p1}, {p2}}; 165 | } 166 | 167 | } 168 | 169 | #endif // VXL_SINF_HPP 170 | -------------------------------------------------------------------------------- /tanf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_TANF_HPP 2 | # define VXL_TANF_HPP 3 | # pragma once 4 | 5 | #include "sinf.hpp" 6 | 7 | namespace vxl 8 | { 9 | 10 | template 11 | inline constexpr vector tan(vector xx) noexcept 12 | { 13 | using int_value_type = typename vector_traits::int_value_type; 14 | using int_vector_type = typename vector_traits::int_vector_type; 15 | 16 | auto& x(xx.ref()); 17 | auto& xi((int_vector_type&)(x)); 18 | 19 | auto sign(int_vector_type(x) & cvector(1 << 31)); 20 | 21 | xi &= cvector(~(1 << 31)); 22 | 23 | auto j(convert( 24 | cvector(detail::constantsf::FOPI) * x)); 25 | 26 | j += j & cvector(1); 27 | 28 | auto y(convert(j)); 29 | 30 | auto const z(((x - y * cvector(detail::constantsf::DP1)) - 31 | y * cvector(detail::constantsf::DP2)) - 32 | y * cvector(detail::constantsf::DP3)); 33 | 34 | auto const zz(z * z); 35 | 36 | y = select( 37 | (((((cvector(9.38540185543e-3f) * zz + 38 | cvector(3.11992232697e-3f)) * zz + 39 | cvector(2.44301354525e-2f)) * zz + 40 | cvector(5.34112807005e-2f)) * zz + 41 | cvector(1.33387994085e-1f)) * zz + 42 | cvector(3.33331568548e-1f)) * zz * z + z, 43 | z, 44 | int_vector_type(x > cvector(1e-4f)) 45 | ); 46 | 47 | auto r(select( 48 | cvector(-1.f) / y, 49 | y, 50 | int_vector_type(cvector(0) < 51 | (j & cvector(2)))) 52 | ); 53 | (int_vector_type&)(r) ^= sign; 54 | 55 | return {r}; 56 | } 57 | 58 | template 59 | inline constexpr vector cot(vector xx) noexcept 60 | { 61 | using int_value_type = typename vector_traits::int_value_type; 62 | using int_vector_type = typename vector_traits::int_vector_type; 63 | 64 | auto& x(xx.ref()); 65 | auto& xi((int_vector_type&)(x)); 66 | 67 | auto sign(int_vector_type(x) & cvector(1 << 31)); 68 | 69 | xi &= cvector(~(1 << 31)); 70 | 71 | auto j(convert( 72 | cvector(detail::constantsf::FOPI) * x)); 73 | 74 | j += j & cvector(1); 75 | 76 | auto y(convert(j)); 77 | 78 | auto const z(((x - y * cvector(detail::constantsf::DP1)) - 79 | y * cvector(detail::constantsf::DP2)) - 80 | y * cvector(detail::constantsf::DP3)); 81 | 82 | auto const zz(z * z); 83 | 84 | y = select( 85 | (((((cvector(9.38540185543e-3f) * zz + 86 | cvector(3.11992232697e-3f)) * zz + 87 | cvector(2.44301354525e-2f)) * zz + 88 | cvector(5.34112807005e-2f)) * zz + 89 | cvector(1.33387994085e-1f)) * zz + 90 | cvector(3.33331568548e-1f)) * zz * z + z, 91 | z, 92 | int_vector_type(x > cvector(1e-4f)) 93 | ); 94 | 95 | auto r(select( 96 | -y, 97 | cvector(1.f) / y, 98 | int_vector_type(cvector(0) < 99 | (j & cvector(2)))) 100 | ); 101 | (int_vector_type&)(r) ^= sign; 102 | 103 | return {r}; 104 | } 105 | 106 | } 107 | 108 | #endif // VXL_TANF_HPP 109 | -------------------------------------------------------------------------------- /tr_aabb_aabb.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_TR_AABB_AABB_HPP 2 | #define VXL_TR_AABB_AABB_HPP 3 | #pragma once 4 | 5 | #include "aabb.hpp" 6 | 7 | #include "matrix.hpp" 8 | 9 | namespace vxl 10 | { 11 | 12 | ////////////////////////////////////////////////////////////////////////////// 13 | template 14 | inline aabb_type tr_mm_aabb_aabb( 15 | matrix const& m, 16 | aabb_type const& a, 17 | default_tag const = {}) noexcept 18 | { 19 | aabb_type b{{}}; 20 | 21 | for (unsigned i{}; i != 3; ++i) 22 | { 23 | b[0].data_[i] = b[1].data_[i] = m(i, 3); 24 | 25 | auto const c(col(m, i)); 26 | 27 | auto const a0i(a[0](i)); 28 | auto const a1i(a[1](i)); 29 | 30 | a[0](i) < a[1](i) ? 31 | b[0].data_ += ::vxl::cvector(a0i) * c.data_, 32 | b[1].data_ += ::vxl::cvector(a1i) * c.data_: 33 | b[0].data_ += ::vxl::cvector(a1i) * c.data_, 34 | b[1].data_ += ::vxl::cvector(a0i) * c.data_; 35 | } 36 | 37 | return b; 38 | } 39 | 40 | ////////////////////////////////////////////////////////////////////////////// 41 | template 42 | inline aabb_type tr_mm_aabb_aabb( 43 | matrix const& m, 44 | aabb_type const& a, 45 | no_translation_tag const) noexcept 46 | { 47 | aabb_type b{{}}; 48 | 49 | for (unsigned i{}; i != 3; ++i) 50 | { 51 | auto const c(col(m, i)); 52 | 53 | auto const a0i(a[0](i)); 54 | auto const a1i(a[1](i)); 55 | 56 | a[0](i) < a[1](i) ? 57 | b[0].data_ += ::vxl::cvector(a0i) * c.data_, 58 | b[1].data_ += ::vxl::cvector(a1i) * c.data_: 59 | b[0].data_ += ::vxl::cvector(a1i) * c.data_, 60 | b[1].data_ += ::vxl::cvector(a0i) * c.data_; 61 | } 62 | 63 | return b; 64 | } 65 | 66 | } 67 | 68 | #endif // VXL_TR_AABB_AABB_HPP 69 | -------------------------------------------------------------------------------- /tri.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_TRI_HPP 2 | # define VXL_TRI_HPP 3 | # pragma once 4 | 5 | namespace vxl 6 | { 7 | 8 | // triangle type, 3 vertices of arbitrary dimension 9 | template 10 | using triangle_t = std::array, 3>; 11 | 12 | } 13 | 14 | #endif // VXL_TRI_HPP 15 | -------------------------------------------------------------------------------- /vector.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "cross.hpp" 6 | 7 | #include "dot.hpp" 8 | 9 | #include "onb.hpp" 10 | 11 | #include "minmax.hpp" 12 | 13 | #include "rsqrt.hpp" 14 | 15 | #include "sinf.hpp" 16 | 17 | #include "tanf.hpp" 18 | 19 | int main() 20 | { 21 | auto a(vxl::make_vector(1.f, 2.f, 3.f)); 22 | vxl::vector b{1.f, 1.f, 1.f}; 23 | 24 | std::cout << (a == a) << std::endl; 25 | std::cout << (a == b) << std::endl; 26 | std::cout << (a >= b) << std::endl; 27 | std::cout << (a <= b) << std::endl; 28 | 29 | std::cout << vxl::all(a) << std::endl; 30 | std::cout << vxl::any(b) << std::endl; 31 | 32 | std::cout << vxl::dot(a, b).data_[0] << std::endl; 33 | std::cout << vxl::cross(a, b) << std::endl; 34 | std::cout << vxl::sqrt(a) << std::endl; 35 | 36 | vxl::vector c{1.f, 2.f, -3.f, 4.f}; 37 | 38 | { 39 | auto const sc(vxl::sincos(c)); 40 | std::cout << sc.first << std::endl; 41 | std::cout << sc.second << std::endl; 42 | } 43 | 44 | std::cout << vxl::sin(c) << std::endl; 45 | std::cout << vxl::cos(c) << std::endl; 46 | std::cout << vxl::tan(c) << std::endl; 47 | std::cout << vxl::cot(c) << std::endl; 48 | 49 | { 50 | vxl::vector d{1.f}; 51 | 52 | auto const sc(vxl::sincos(d)); 53 | std::cout << sc.first(0) << std::endl; 54 | std::cout << sc.second(0) << std::endl; 55 | } 56 | 57 | { 58 | vxl::vector d{3.}; 59 | std::cout << vxl::sqrt(d)(0) << std::endl; 60 | } 61 | 62 | std::cout << vxl::max(a) << std::endl; 63 | std::cout << vxl::min(a) << std::endl; 64 | 65 | vxl::vector const u{3.0f, 2.0f, 1.0f}; 66 | std::cout << vxl::ortho(u) << std::endl; 67 | std::cout << vxl::ortho(u, vxl::canonical_tag{}) << std::endl; 68 | 69 | vxl::vector e{1., 2., 3., 4.}; 70 | std::cout << (e == e) << std::endl; 71 | std::cout << vxl::norm2(e) << std::endl; 72 | 73 | std::cout << vxl::dot( 74 | vxl::vector{1, 2}, 75 | vxl::vector{3, 4} 76 | ).data_[0] << 77 | std::endl; 78 | 79 | return std::hash()(a); 80 | } 81 | -------------------------------------------------------------------------------- /vector.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VXL_VECTOR_HPP 2 | # define VXL_VECTOR_HPP 3 | # pragma once 4 | 5 | #if __cplusplus <= 201402L 6 | # error "You need a c++17 compiler" 7 | #endif // __cplusplus 8 | 9 | #if defined(__SSE__) 10 | # include "xmmintrin.h" 11 | #endif 12 | 13 | #if defined(__SSE2__) 14 | # include "emmintrin.h" 15 | #endif 16 | 17 | #if defined(__SSE4_1__) 18 | # include "smmintrin.h" 19 | #endif // __SSE4_1__ 20 | 21 | #if defined(__AVX__) 22 | # include "immintrin.h" 23 | #endif // __AVX__ 24 | 25 | #if defined(__ARM_NEON) 26 | # include "arm_neon.h" 27 | #endif // __ARM_NEON 28 | 29 | #include 30 | 31 | #include 32 | 33 | #include 34 | 35 | #include 36 | 37 | #include 38 | 39 | #include 40 | 41 | #include 42 | 43 | #include 44 | 45 | namespace vxl 46 | { 47 | 48 | using canonical_tag = struct { }; 49 | using default_tag = struct { }; 50 | using no_translation_tag = struct { }; 51 | 52 | template 53 | struct front 54 | { 55 | using type = A; 56 | }; 57 | 58 | template 59 | using front_t = typename front::type; 60 | 61 | template 62 | struct all_of : std::integral_constant{}> 63 | { 64 | }; 65 | 66 | template 67 | struct all_of : std::integral_constant 68 | { 69 | }; 70 | 71 | template struct vector_traits; 72 | 73 | template <> 74 | struct vector_traits 75 | { 76 | enum : unsigned { size = 1 }; 77 | 78 | using value_type = std::int32_t; 79 | 80 | using int_value_type = value_type; 81 | using uint_value_type = std::uint32_t; 82 | 83 | using vector_type = value_type; 84 | using int_vector_type = vector_type; 85 | }; 86 | 87 | template <> 88 | struct vector_traits 89 | { 90 | enum : unsigned { size = 2 }; 91 | 92 | using value_type = std::int32_t; 93 | 94 | using int_value_type = value_type; 95 | using uint_value_type = std::uint32_t; 96 | 97 | #if defined(__SSE__) 98 | using vector_type = value_type __attribute__((vector_size(16))); 99 | #else 100 | using vector_type = value_type __attribute__((vector_size(8))); 101 | #endif // __SSE__ 102 | using int_vector_type = vector_type; 103 | }; 104 | 105 | template <> 106 | struct vector_traits 107 | { 108 | enum : unsigned { size = 3 }; 109 | 110 | using value_type = std::int32_t; 111 | 112 | using int_value_type = value_type; 113 | using uint_value_type = std::uint32_t; 114 | 115 | using vector_type = value_type __attribute__((vector_size(16))); 116 | using int_vector_type = vector_type; 117 | }; 118 | 119 | template <> 120 | struct vector_traits 121 | { 122 | enum : unsigned { size = 4 }; 123 | 124 | using value_type = std::int32_t; 125 | 126 | using int_value_type = value_type; 127 | using uint_value_type = std::uint32_t; 128 | 129 | using vector_type = value_type __attribute__((vector_size(16))); 130 | using int_vector_type = vector_type; 131 | }; 132 | 133 | template <> 134 | struct vector_traits 135 | { 136 | enum : unsigned { size = 1 }; 137 | 138 | using value_type = std::uint32_t; 139 | 140 | using int_value_type = std::int32_t; 141 | using uint_value_type = std::uint32_t; 142 | 143 | using vector_type = value_type; 144 | using int_vector_type = vector_type; 145 | }; 146 | 147 | template <> 148 | struct vector_traits 149 | { 150 | enum : unsigned { size = 2 }; 151 | 152 | using value_type = std::uint32_t; 153 | 154 | using int_value_type = std::int32_t; 155 | using uint_value_type = std::uint32_t; 156 | 157 | #if defined(__SSE__) 158 | using vector_type = value_type __attribute__((vector_size(16))); 159 | #else 160 | using vector_type = value_type __attribute__((vector_size(8))); 161 | #endif // __SSE__ 162 | #if defined(__SSE__) 163 | using int_vector_type = int_value_type __attribute__((vector_size(16))); 164 | #else 165 | using int_vector_type = int_value_type __attribute__((vector_size(8))); 166 | #endif // __SSE__ 167 | }; 168 | 169 | template <> 170 | struct vector_traits 171 | { 172 | enum : unsigned { size = 3 }; 173 | 174 | using value_type = std::uint32_t; 175 | 176 | using int_value_type = std::int32_t; 177 | using uint_value_type = std::uint32_t; 178 | 179 | using vector_type = value_type __attribute__((vector_size(16))); 180 | using int_vector_type = int_value_type __attribute__((vector_size(16))); 181 | }; 182 | 183 | template <> 184 | struct vector_traits 185 | { 186 | enum : unsigned { size = 4 }; 187 | 188 | using value_type = std::uint32_t; 189 | 190 | using int_value_type = std::int32_t; 191 | using uint_value_type = std::uint32_t; 192 | 193 | using vector_type = value_type __attribute__((vector_size(16))); 194 | using int_vector_type = int_value_type __attribute__((vector_size(16))); 195 | }; 196 | 197 | template <> 198 | struct vector_traits 199 | { 200 | enum : unsigned { size = 1 }; 201 | 202 | using value_type = std::int64_t; 203 | 204 | using int_value_type = value_type; 205 | using uint_value_type = std::uint64_t; 206 | 207 | using vector_type = value_type; 208 | using int_vector_type = vector_type; 209 | }; 210 | 211 | template <> 212 | struct vector_traits 213 | { 214 | enum : unsigned { size = 2 }; 215 | 216 | using value_type = std::int64_t; 217 | 218 | using int_value_type = value_type; 219 | using uint_value_type = std::uint64_t; 220 | 221 | using vector_type = value_type __attribute__((vector_size(16))); 222 | using int_vector_type = vector_type; 223 | }; 224 | 225 | template <> 226 | struct vector_traits 227 | { 228 | enum : unsigned { size = 3 }; 229 | 230 | using value_type = std::int64_t; 231 | 232 | using int_value_type = value_type; 233 | using uint_value_type = std::uint64_t; 234 | 235 | using vector_type = value_type __attribute__((vector_size(32))); 236 | using int_vector_type = vector_type; 237 | }; 238 | 239 | template <> 240 | struct vector_traits 241 | { 242 | enum : unsigned { size = 4 }; 243 | 244 | using value_type = std::int64_t; 245 | 246 | using int_value_type = value_type; 247 | using uint_value_type = std::uint64_t; 248 | 249 | using vector_type = value_type __attribute__((vector_size(32))); 250 | using int_vector_type = vector_type; 251 | }; 252 | 253 | template <> 254 | struct vector_traits 255 | { 256 | enum : unsigned { size = 1 }; 257 | 258 | using value_type = float; 259 | 260 | using int_value_type = std::int32_t; 261 | using uint_value_type = std::uint32_t; 262 | 263 | using vector_type = value_type; 264 | using int_vector_type = int_value_type; 265 | }; 266 | 267 | template <> 268 | struct vector_traits 269 | { 270 | enum : unsigned { size = 2 }; 271 | 272 | using value_type = float; 273 | 274 | using int_value_type = std::int32_t; 275 | using uint_value_type = std::uint32_t; 276 | 277 | #if defined(__SSE__) 278 | using vector_type = value_type __attribute__((vector_size(16))); 279 | #else 280 | using vector_type = value_type __attribute__((vector_size(8))); 281 | #endif // __SSE__ 282 | #if defined(__SSE__) 283 | using int_vector_type = int_value_type __attribute__((vector_size(16))); 284 | #else 285 | using int_vector_type = int_value_type __attribute__((vector_size(8))); 286 | #endif // __SSE__ 287 | }; 288 | 289 | template <> 290 | struct vector_traits 291 | { 292 | enum : unsigned { size = 3 }; 293 | 294 | using value_type = float; 295 | 296 | using int_value_type = std::int32_t; 297 | using uint_value_type = std::uint32_t; 298 | 299 | using vector_type = value_type __attribute__((vector_size(16))); 300 | using int_vector_type = int_value_type __attribute__((vector_size(16))); 301 | }; 302 | 303 | template <> 304 | struct vector_traits 305 | { 306 | enum : unsigned { size = 4 }; 307 | 308 | using value_type = float; 309 | 310 | using int_value_type = std::int32_t; 311 | using uint_value_type = std::uint32_t; 312 | 313 | using vector_type = value_type __attribute__((vector_size(16))); 314 | using int_vector_type = int_value_type __attribute__((vector_size(16))); 315 | }; 316 | 317 | template <> 318 | struct vector_traits 319 | { 320 | enum : unsigned { size = 1 }; 321 | 322 | using value_type = double; 323 | 324 | using int_value_type = std::int64_t; 325 | using uint_value_type = std::uint64_t; 326 | 327 | using vector_type = value_type; 328 | using int_vector_type = int_value_type; 329 | }; 330 | 331 | template <> 332 | struct vector_traits 333 | { 334 | enum : unsigned { size = 2 }; 335 | 336 | using value_type = double; 337 | 338 | using int_value_type = std::int64_t; 339 | using uint_value_type = std::uint64_t; 340 | 341 | using vector_type = value_type __attribute__((vector_size(16))); 342 | using int_vector_type = int_value_type __attribute__((vector_size(16))); 343 | }; 344 | 345 | template <> 346 | struct vector_traits 347 | { 348 | enum : unsigned { size = 3 }; 349 | 350 | using value_type = double; 351 | 352 | using int_value_type = std::int64_t; 353 | using uint_value_type = std::uint64_t; 354 | 355 | using vector_type = value_type __attribute__((vector_size(32))); 356 | using int_vector_type = int_value_type __attribute__((vector_size(32))); 357 | }; 358 | 359 | template <> 360 | struct vector_traits 361 | { 362 | enum : unsigned { size = 4 }; 363 | 364 | using value_type = double; 365 | 366 | using int_value_type = std::int64_t; 367 | using uint_value_type = std::uint64_t; 368 | 369 | using vector_type = value_type __attribute__((vector_size(32))); 370 | using int_vector_type = int_value_type __attribute__((vector_size(32))); 371 | }; 372 | 373 | template 374 | struct deduce 375 | { 376 | }; 377 | 378 | template 379 | struct deduce{} && 382 | !std::is_array{} && 383 | sizeof(std::declval()[0]) 384 | > 385 | > 386 | { 387 | using value_type = std::decay_t()[0])>; 388 | 389 | enum : unsigned 390 | { 391 | size = sizeof(T) / sizeof(value_type) 392 | }; 393 | 394 | using vector_type = 395 | typename vector_traits::vector_type; 396 | using int_vector_type = 397 | typename vector_traits::int_vector_type; 398 | }; 399 | 400 | template 401 | struct deduce{}> > 402 | { 403 | using value_type = std::decay_t())>; 404 | 405 | enum : unsigned 406 | { 407 | size = sizeof(T) / sizeof(value_type) 408 | }; 409 | 410 | using vector_type = 411 | typename vector_traits::vector_type; 412 | using int_vector_type = 413 | typename vector_traits::int_vector_type; 414 | }; 415 | 416 | template 417 | struct is_vector : std::false_type 418 | { 419 | }; 420 | 421 | template 422 | struct is_vector{} && 425 | !std::is_array{} && 426 | sizeof(std::declval()[0]) 427 | > 428 | > : std::true_type 429 | { 430 | }; 431 | 432 | #if defined(__clang__) 433 | 434 | namespace detail::vector 435 | { 436 | 437 | template 438 | //__attribute__ ((noinline)) 439 | constexpr std::enable_if_t< 440 | is_vector{} && is_vector{}, 441 | V 442 | > 443 | select(V const a, V const b, U const c, std::index_sequence) noexcept 444 | { 445 | return V{(c[Is] ? a[Is] : b[Is])...}; 446 | } 447 | 448 | } 449 | 450 | template 451 | //__attribute__ ((noinline)) 452 | constexpr std::enable_if_t< 453 | is_vector{} && is_vector{}, 454 | V 455 | > 456 | select(V const a, V const b, U const c) noexcept 457 | { 458 | static_assert(sizeof(U) == sizeof(V)); 459 | 460 | return detail::vector::select(a, 461 | b, 462 | c, 463 | std::make_index_sequence::size>() 464 | ); 465 | } 466 | #else 467 | template 468 | constexpr std::enable_if_t< 469 | is_vector{} && is_vector{}, 470 | V 471 | > 472 | select(V const a, V const b, U const c) noexcept 473 | { 474 | return c ? a : b; 475 | } 476 | #endif 477 | 478 | template 479 | //__attribute__ ((noinline)) 480 | constexpr std::enable_if_t< 481 | !is_vector{} && !is_vector{}, 482 | V 483 | > 484 | select(V const a, V const b, U const c) noexcept 485 | { 486 | static_assert(sizeof(U) == sizeof(V)); 487 | 488 | // https://markplusplus.wordpress.com/2007/03/14/fast-sse-select-operation/ 489 | // https://markplusplus.wordpress.com/2008/04/08/revisiting-fast-sse-select/ 490 | 491 | // ((a ^ b) & c) ^ b 492 | // auto const r((((U&)(b) ^ (U&)(a)) & c) ^ (U&)(a)); 493 | 494 | // (c & a) | (~c & b) 495 | // auto const r((c & (U&)(a)) | (~c & (U&)(b))); 496 | 497 | // return (V&)(r); 498 | 499 | return c ? a : b; 500 | } 501 | 502 | // convert 503 | namespace detail::vector 504 | { 505 | 506 | template 508 | constexpr auto convert(C const& v, 509 | std::index_sequence const) noexcept 510 | { 511 | return typename vxl::vector_traits::vector_type{R(v[Is])...}; 512 | } 513 | 514 | } 515 | 516 | template 517 | constexpr std::enable_if_t< 518 | !is_vector{} && std::is_arithmetic{}, 519 | typename vxl::vector_traits::vector_type 520 | > 521 | convert(V const v) noexcept 522 | { 523 | return v; 524 | } 525 | 526 | template 527 | constexpr std::enable_if_t< 528 | is_vector{}, 529 | typename vxl::vector_traits::vector_type 530 | > 531 | convert(V const& v) noexcept 532 | { 533 | return detail::vector::convert::value_type, M>( 534 | v, 535 | std::make_index_sequence::size))>() 536 | ); 537 | } 538 | 539 | template 540 | struct vector 541 | { 542 | enum : unsigned { dimension = vector_traits::size }; 543 | 544 | using traits_type = vector_traits; 545 | 546 | using int_vector_type = typename vector_traits::int_vector_type; 547 | using vector_type = typename vector_traits::vector_type; 548 | using value_type = typename vector_traits::value_type; 549 | 550 | vector_type data_; 551 | 552 | // element access 553 | #ifndef NDEBUG 554 | template 555 | constexpr std::enable_if_t<1 == M, T> operator()( 556 | unsigned const i) const noexcept 557 | { 558 | assert(!i); 559 | return data_; 560 | } 561 | #else 562 | template 563 | constexpr std::enable_if_t<1 == M, T> operator()(unsigned) const noexcept 564 | { 565 | return data_; 566 | } 567 | #endif // NDEBUG 568 | 569 | template 570 | constexpr std::enable_if_t<(M > 1), T> operator()( 571 | unsigned const i) const noexcept 572 | { 573 | return data_[i]; 574 | } 575 | 576 | template 577 | constexpr std::enable_if_t<1 == M> 578 | set_element(unsigned, T const v) noexcept 579 | { 580 | data_ = v; 581 | } 582 | 583 | template 584 | constexpr std::enable_if_t<(M > 1)> 585 | set_element(unsigned const i, T const v) noexcept 586 | { 587 | data_[i] = v; 588 | } 589 | 590 | // ref 591 | constexpr auto& ref() const noexcept {return data_;} 592 | constexpr auto& ref() noexcept {return data_;} 593 | }; 594 | 595 | // convert another vector 596 | template 597 | constexpr auto convert(vector const& v) noexcept 598 | { 599 | return vxl::vector{ 600 | detail::vector::convert( 601 | v.data_, 602 | std::make_index_sequence() 603 | ) 604 | }; 605 | } 606 | 607 | // convert container into vector 608 | template 609 | constexpr auto make_vector(C const& c) noexcept -> 610 | decltype(std::size(C()), typename C::value_type(), 611 | vector()) 612 | { 613 | return { 614 | detail::vector::convert())>( 616 | c, 617 | std::make_index_sequence() 618 | ) 619 | }; 620 | } 621 | 622 | // convert container into vector 623 | template 624 | constexpr auto make_vector(C const& c) noexcept -> 625 | decltype(std::size(C()), typename C::value_type(), 626 | vector()) 627 | { 628 | return { 629 | detail::vector::convert( 631 | c, 632 | std::make_index_sequence() 633 | ) 634 | }; 635 | } 636 | 637 | template >, std::decay_t>... 641 | >{} && 642 | std::is_arithmetic>>{} 643 | > 644 | > 645 | constexpr auto make_vector(A const ...a) noexcept 646 | { 647 | vector>, sizeof...(A)> r{}; 648 | 649 | unsigned i{}; 650 | 651 | ( 652 | ( 653 | r.data_[i++] = a 654 | ), 655 | ... 656 | ); 657 | 658 | return r; 659 | } 660 | 661 | template 662 | constexpr vector operator*(T const a, vector const& b) noexcept 663 | { 664 | return { a * b.data_ }; 665 | } 666 | 667 | template 668 | constexpr vector operator*(vector const& a, T const b) noexcept 669 | { 670 | return { b * a.data_ }; 671 | } 672 | 673 | template 674 | constexpr vector operator+(vector const& a) noexcept 675 | { 676 | return a; 677 | } 678 | 679 | template 680 | constexpr vector operator-(vector const& a) noexcept 681 | { 682 | return { -a.data_ }; 683 | } 684 | 685 | template 686 | constexpr vector operator+(vector const& a, 687 | vector const& b) noexcept 688 | { 689 | return { a.data_ + b.data_ }; 690 | } 691 | 692 | template 693 | constexpr vector operator-(vector const& a, 694 | vector const& b) noexcept 695 | { 696 | return { a.data_ - b.data_ }; 697 | } 698 | 699 | template 700 | constexpr vector operator*(vector const& a, 701 | vector const& b) noexcept 702 | { 703 | return { a.data_ * b.data_ }; 704 | } 705 | 706 | template 707 | constexpr vector operator/(vector const& a, 708 | vector const& b) noexcept 709 | { 710 | return { a.data_ / b.data_ }; 711 | } 712 | 713 | template 714 | constexpr vector& operator+=(vector& a, 715 | vector const& b) noexcept 716 | { 717 | return a.data_ += b.data_, a; 718 | } 719 | 720 | template 721 | constexpr vector& operator-=(vector& a, 722 | vector const& b) noexcept 723 | { 724 | return a.data_ -= b.data_, a; 725 | } 726 | 727 | template 728 | constexpr vector& operator*=(vector& a, 729 | vector const& b) noexcept 730 | { 731 | return a.data_ *= b.data_, a; 732 | } 733 | 734 | template 735 | constexpr vector& operator/=(vector& a, 736 | vector const& b) noexcept 737 | { 738 | return a.data_ /= b.data_, a; 739 | } 740 | 741 | // assignment 742 | namespace detail::vector 743 | { 744 | 745 | template 746 | constexpr typename vxl::vector_traits::vector_type 747 | cvector(T const c, std::index_sequence) noexcept 748 | { 749 | return typename vxl::vector_traits::vector_type{(c + T(Is - Is))...}; 750 | } 751 | 752 | } 753 | 754 | template 755 | constexpr auto cvector(T const c) noexcept 756 | { 757 | return detail::vector::cvector(c, 758 | std::make_index_sequence() 759 | ); 760 | } 761 | 762 | namespace detail::vector 763 | { 764 | 765 | template 766 | struct swizzle_indices : 767 | std::conditional_t< 768 | sizeof...(I) < N, 769 | swizzle_indices, 770 | std::integer_sequence 771 | > 772 | { 773 | static_assert(sizeof...(I) <= N); 774 | }; 775 | 776 | template 777 | constexpr auto clang_swizzle(T&& a, T&& b, 778 | std::integer_sequence const) noexcept 779 | { 780 | return __builtin_shufflevector(a, b, I...); 781 | } 782 | 783 | } 784 | 785 | // swizzle 786 | template 787 | constexpr void swizzle(vector& v) 788 | { 789 | #if defined(__clang__) 790 | v.data_ = detail::vector::clang_swizzle(v.data_, 791 | v.data_, 792 | detail::vector::swizzle_indices() 795 | ); 796 | #else 797 | using int_vector_type = typename vector_traits::int_vector_type; 798 | 799 | v.data_ = __builtin_shuffle(v.data_, v.data_, int_vector_type{I...}); 800 | #endif 801 | } 802 | 803 | template 804 | constexpr vector swizzled(vector const& v) noexcept 805 | { 806 | #if defined(__clang__) 807 | return { 808 | detail::vector::clang_swizzle(v.data_, 809 | v.data_, 810 | detail::vector::swizzle_indices() 813 | ) 814 | }; 815 | #else 816 | using int_vector_type = typename vector_traits::int_vector_type; 817 | 818 | return { 819 | __builtin_shuffle(v.data_, int_vector_type{I...}) 820 | }; 821 | #endif 822 | } 823 | 824 | template 825 | constexpr vector swizzled(vector const& a, 826 | vector const& b) noexcept 827 | { 828 | #if defined(__clang__) 829 | return { 830 | detail::vector::clang_swizzle(a.data_, 831 | b.data_, 832 | detail::vector::swizzle_indices() 835 | ) 836 | }; 837 | #else 838 | using int_vector_type = typename vector_traits::int_vector_type; 839 | 840 | return { 841 | __builtin_shuffle(a.data_, 842 | b.data_, 843 | int_vector_type{(I < N ? I : I - N + sizeof(a.data_) / sizeof(T))...} 844 | ) 845 | }; 846 | #endif 847 | } 848 | 849 | namespace detail::vector 850 | { 851 | 852 | template 853 | constexpr T pow(T const e, T const x = B) noexcept 854 | { 855 | return !e ? 1 : 1 == e ? x : (e % 2 ? x : 1) * pow(e / 2, x * x); 856 | } 857 | 858 | template 859 | constexpr T log(T const n, T const e = 0) noexcept 860 | { 861 | return pow(e) >= n ? e : log(n, e + 1); 862 | } 863 | 864 | template 865 | constexpr typename vxl::vector_traits::int_vector_type 866 | abs_mask(std::index_sequence) noexcept 867 | { 868 | using int_vector_type = typename vector_traits::int_vector_type; 869 | using vector_type = typename vector_traits::vector_type; 870 | 871 | return ~int_vector_type(vector_type{(T(-.0) + T(Is - Is))...}); 872 | } 873 | 874 | } 875 | 876 | // abs 877 | #if defined(__ARM_NEON) 878 | inline auto abs(vector const& v) noexcept 879 | { 880 | using vector_type = typename vector_traits::vector_type; 881 | 882 | return vector{ 883 | vector_type(vabs_f32(float32x2_t(v.data_))) 884 | }; 885 | } 886 | 887 | inline auto abs(vector const& v) noexcept 888 | { 889 | using vector_type = typename vector_traits::vector_type; 890 | 891 | return vector{ 892 | vector_type(vabsq_f32(float32x4_t(v.data_))) 893 | }; 894 | } 895 | 896 | inline auto abs(vector const& v) noexcept 897 | { 898 | using vector_type = typename vector_traits::vector_type; 899 | 900 | return vector{ 901 | vector_type(vabsq_f32(float32x4_t(v.data_))) 902 | }; 903 | } 904 | 905 | #endif 906 | 907 | template 908 | constexpr auto abs(vector const& v) noexcept 909 | { 910 | using int_vector_type = typename vector_traits::int_vector_type; 911 | using vector_type = typename vector_traits::vector_type; 912 | 913 | return vector{ 914 | vector_type(int_vector_type(v.data_) & 915 | detail::vector::abs_mask(std::make_index_sequence()) 916 | ) 917 | }; 918 | } 919 | 920 | template 921 | constexpr void mabs(vector& v) noexcept 922 | { 923 | using int_vector_type = typename vector_traits::int_vector_type; 924 | using vector_type = typename vector_traits::vector_type; 925 | 926 | v.data_ = vector_type(int_vector_type(v.data_) & 927 | detail::vector::abs_mask(std::make_index_sequence()) 928 | ); 929 | } 930 | 931 | namespace detail::vector 932 | { 933 | 934 | template 935 | //__attribute__ ((noinline)) 936 | constexpr auto 937 | lin_shuffler(typename vector_traits::vector_type const& v, 938 | std::index_sequence) noexcept 939 | { 940 | #if defined(__clang__) 941 | return __builtin_shufflevector(v, v, ((I + Is) % N)...); 942 | #else 943 | using int_vector_type = typename vector_traits::int_vector_type; 944 | 945 | return __builtin_shuffle(v, int_vector_type{((I + Is) % N)...}); 946 | #endif 947 | } 948 | 949 | template 950 | //__attribute__ ((noinline)) 951 | constexpr auto 952 | pow2_shuffler(typename vector_traits::vector_type const& v, 953 | std::index_sequence) noexcept 954 | { 955 | #if defined(__clang__) 956 | return __builtin_shufflevector(v, v, ((pow<2>(I) + Is) % N)...); 957 | #else 958 | using int_vector_type = typename vector_traits::int_vector_type; 959 | 960 | return __builtin_shuffle(v, int_vector_type{((pow<2>(I) + Is) % N)...}); 961 | #endif 962 | } 963 | 964 | template 965 | constexpr typename vxl::vector_traits::int_vector_type 966 | than_mask(std::index_sequence const) noexcept 967 | { 968 | // generate mask for each Is 969 | return typename vxl::vector_traits::int_vector_type{ 970 | ( 971 | Is < N - 1 - I ? 0 : ~0 972 | )... 973 | }; 974 | } 975 | 976 | // l = a < b = {a[i] < b[i] ? ~0 : 0} 977 | // e = a == b = {a[i] == b[i] ? ~0 : 0} 978 | // a < b == l[3] v e[3]l[2] v e[3]e[2]l[1] v e[3]e[2]e[1]l[0] 979 | template 980 | constexpr bool than(typename vector_traits::int_vector_type v, 981 | typename vector_traits::int_vector_type e, 982 | std::index_sequence const) noexcept 983 | { 984 | ( 985 | ( 986 | // calculate terms 987 | v &= lin_shuffler< 988 | typename vector_traits::int_value_type, N, Is + 1>( 989 | e, 990 | std::make_index_sequence() 991 | ) | 992 | than_mask< 993 | typename vector_traits::int_value_type, N, Is>( 994 | std::make_index_sequence() 995 | ) // some of the es are masked at each iteration 996 | ), 997 | ... 998 | ); 999 | 1000 | // ORing all terms together 1001 | ( 1002 | ( 1003 | v |= pow2_shuffler::int_value_type, N, Is>( 1004 | v, 1005 | std::make_index_sequence() 1006 | ) 1007 | ), 1008 | ... 1009 | ); 1010 | 1011 | return v[0]; 1012 | } 1013 | 1014 | #if defined(__SSE2__) 1015 | template 1016 | constexpr std::enable_if_t<(2 == N) && (4 == sizeof(T)), bool> 1017 | all_zeros(typename vector_traits::int_vector_type const v, 1018 | std::index_sequence) noexcept 1019 | { 1020 | return !(_mm_movemask_epi8(__m128i(v)) & 0xff); 1021 | } 1022 | 1023 | template 1024 | constexpr std::enable_if_t<(3 == N) && (4 == sizeof(T)), bool> 1025 | all_zeros(typename vector_traits::int_vector_type const v, 1026 | std::index_sequence) noexcept 1027 | { 1028 | return !(_mm_movemask_epi8(__m128i(v)) & 0xfff); 1029 | } 1030 | 1031 | template 1032 | constexpr std::enable_if_t< 1033 | ((4 == N) && (4 == sizeof(T))) || ((2 == N) && (8 == sizeof(T))), 1034 | bool 1035 | > 1036 | all_zeros(typename vector_traits::int_vector_type const v, 1037 | std::index_sequence) noexcept 1038 | { 1039 | return !_mm_movemask_epi8(__m128i(v)); 1040 | } 1041 | 1042 | template 1043 | constexpr std::enable_if_t<(2 == N) && (8 == sizeof(T)), bool> 1044 | all_zeros(typename vector_traits::int_vector_type const v, 1045 | std::index_sequence) noexcept 1046 | { 1047 | return !(_mm_movemask_pd(__m128d(v))); 1048 | } 1049 | 1050 | template 1051 | constexpr std::enable_if_t< 1052 | (16 < sizeof(typename vector_traits::int_vector_type)), 1053 | bool 1054 | > 1055 | all_ones(typename vector_traits::int_vector_type v, 1056 | std::index_sequence) noexcept 1057 | { 1058 | return !~( 1059 | ( 1060 | v &= pow2_shuffler::int_value_type, N, Is>( 1061 | v, 1062 | std::make_index_sequence() 1063 | ) 1064 | ), 1065 | ... 1066 | )[0]; 1067 | } 1068 | 1069 | template 1070 | constexpr std::enable_if_t< 1071 | (16 < sizeof(typename vector_traits::int_vector_type)), 1072 | bool 1073 | > 1074 | all_zeros(typename vector_traits::int_vector_type v, 1075 | std::index_sequence) noexcept 1076 | { 1077 | return !( 1078 | ( 1079 | v |= pow2_shuffler::int_value_type, N, Is>( 1080 | v, 1081 | std::make_index_sequence() 1082 | ) 1083 | ), 1084 | ... 1085 | )[0]; 1086 | } 1087 | #elif defined(__SSE__) 1088 | template 1089 | constexpr std::enable_if_t<(2 == N) && (4 == sizeof(T)), bool> 1090 | all_zeros(typename vector_traits::int_vector_type const v, 1091 | std::index_sequence) noexcept 1092 | { 1093 | return !(_mm_movemask_ps(__m128(v)) & 0x3); 1094 | } 1095 | 1096 | template 1097 | constexpr std::enable_if_t<(3 == N) && (4 == sizeof(T)), bool> 1098 | all_zeros(typename vector_traits::int_vector_type const v, 1099 | std::index_sequence) noexcept 1100 | { 1101 | return !(_mm_movemask_ps(__m128(v)) & 0x7); 1102 | } 1103 | 1104 | template 1105 | constexpr std::enable_if_t< 1106 | ((4 == N) && (4 == sizeof(T))) || ((2 == N) && (8 == sizeof(T))), 1107 | bool 1108 | > 1109 | all_zeros(typename vector_traits::int_vector_type const v, 1110 | std::index_sequence) noexcept 1111 | { 1112 | return !_mm_movemask_ps(__m128(v)); 1113 | } 1114 | 1115 | template 1116 | constexpr std::enable_if_t< 1117 | (16 < sizeof(typename vector_traits::int_vector_type)), 1118 | bool 1119 | > 1120 | all_ones(typename vector_traits::int_vector_type v, 1121 | std::index_sequence) noexcept 1122 | { 1123 | return !~( 1124 | ( 1125 | v &= pow2_shuffler::int_value_type, N, Is>( 1126 | v, 1127 | std::make_index_sequence() 1128 | ) 1129 | ), 1130 | ... 1131 | )[0]; 1132 | } 1133 | 1134 | template 1135 | constexpr std::enable_if_t< 1136 | (16 < sizeof(typename vector_traits::int_vector_type)), 1137 | bool 1138 | > 1139 | all_zeros(typename vector_traits::int_vector_type v, 1140 | std::index_sequence) noexcept 1141 | { 1142 | return !( 1143 | ( 1144 | v |= pow2_shuffler::int_value_type, N, Is>( 1145 | v, 1146 | std::make_index_sequence() 1147 | ) 1148 | ), 1149 | ... 1150 | )[0]; 1151 | } 1152 | #elif defined(__ARM_NEON) 1153 | // just one byte out of each vector needs to be tested for 0 1154 | template 1155 | //__attribute__ ((noinline)) 1156 | constexpr std::enable_if_t< 1157 | (2 == N) && (4 == sizeof(T)), 1158 | bool 1159 | > 1160 | all_zeros(typename vector_traits::int_vector_type const v, 1161 | std::index_sequence) noexcept 1162 | { 1163 | return !int16x4_t( 1164 | vtbl1_s8( 1165 | int8x8_t(int32x2_t(v)), 1166 | int8x8_t{0, 4} 1167 | ) 1168 | )[0]; 1169 | } 1170 | 1171 | template 1172 | //__attribute__ ((noinline)) 1173 | constexpr std::enable_if_t< 1174 | (3 == N) && (4 == sizeof(T)), 1175 | bool 1176 | > 1177 | all_zeros(typename vector_traits::int_vector_type const v, 1178 | std::index_sequence) noexcept 1179 | { 1180 | return !int32x2_t( 1181 | vtbl2_s8( 1182 | int8x8x2_t{ 1183 | int8x8_t(vget_low_s32(int32x4_t(v))), 1184 | int8x8_t(vget_high_s32(int32x4_t(v))) 1185 | }, 1186 | int8x8_t{0, 4, 8} 1187 | ) 1188 | )[0]; 1189 | } 1190 | 1191 | template 1192 | //__attribute__ ((noinline)) 1193 | constexpr std::enable_if_t< 1194 | (4 == N) && (4 == sizeof(T)), 1195 | bool 1196 | > 1197 | all_zeros(typename vector_traits::int_vector_type const v, 1198 | std::index_sequence) noexcept 1199 | { 1200 | return !int32x2_t( 1201 | vtbl2_s8( 1202 | int8x8x2_t{ 1203 | int8x8_t(vget_low_s32(int32x4_t(v))), 1204 | int8x8_t(vget_high_s32(int32x4_t(v))) 1205 | }, 1206 | int8x8_t{0, 4, 8, 12} 1207 | ) 1208 | )[0]; 1209 | } 1210 | 1211 | template 1212 | //__attribute__ ((noinline)) 1213 | constexpr std::enable_if_t< 1214 | (2 == N) && (8 == sizeof(T)), 1215 | bool 1216 | > 1217 | all_zeros(typename vector_traits::int_vector_type const v, 1218 | std::index_sequence) noexcept 1219 | { 1220 | return !int16x4_t( 1221 | vtbl2_s8( 1222 | int8x8x2_t{ 1223 | int8x8_t(vget_low_s64(int64x2_t(v))), 1224 | int8x8_t(vget_high_s64(int64x2_t(v))) 1225 | }, 1226 | int8x8_t{0, 8} 1227 | ) 1228 | )[0]; 1229 | } 1230 | 1231 | template 1232 | constexpr std::enable_if_t< 1233 | (16 < sizeof(typename vector_traits::int_vector_type)), 1234 | bool 1235 | > 1236 | all_ones(typename vector_traits::int_vector_type v, 1237 | std::index_sequence) noexcept 1238 | { 1239 | return !~( 1240 | ( 1241 | v &= pow2_shuffler::int_value_type, N, Is>( 1242 | v, 1243 | std::make_index_sequence() 1244 | ) 1245 | ), 1246 | ... 1247 | )[0]; 1248 | } 1249 | 1250 | template 1251 | constexpr std::enable_if_t< 1252 | (16 < sizeof(typename vector_traits::int_vector_type)), 1253 | bool 1254 | > 1255 | all_zeros(typename vector_traits::int_vector_type v, 1256 | std::index_sequence) noexcept 1257 | { 1258 | return !( 1259 | ( 1260 | v |= pow2_shuffler::int_value_type, N, Is>( 1261 | v, 1262 | std::make_index_sequence() 1263 | ) 1264 | ), 1265 | ... 1266 | )[0]; 1267 | } 1268 | #else 1269 | template 1270 | constexpr bool all_ones( 1271 | typename vector_traits::int_vector_type v, 1272 | std::index_sequence) noexcept 1273 | { 1274 | return !~( 1275 | ( 1276 | v &= pow2_shuffler::int_value_type, N, Is>( 1277 | v, 1278 | std::make_index_sequence() 1279 | ) 1280 | ), 1281 | ... 1282 | )[0]; 1283 | } 1284 | 1285 | template 1286 | constexpr bool all_zeros( 1287 | typename vector_traits::int_vector_type v, 1288 | std::index_sequence) noexcept 1289 | { 1290 | return !( 1291 | ( 1292 | v |= pow2_shuffler::int_value_type, N, Is>( 1293 | v, 1294 | std::make_index_sequence() 1295 | ) 1296 | ), 1297 | ... 1298 | )[0]; 1299 | } 1300 | #endif 1301 | 1302 | } 1303 | 1304 | // comparison 1305 | // the only reason for the existance of comparison operators are the 1306 | // associative containers, for which they are a requirement 1307 | template 1308 | //__attribute__ ((noinline)) 1309 | constexpr bool operator==(vector const& l, 1310 | vector const& r) noexcept 1311 | { 1312 | return detail::vector::all_zeros(l.data_ != r.data_, 1313 | std::make_index_sequence(N)>() 1314 | ); 1315 | } 1316 | 1317 | template 1318 | constexpr bool operator!=(vector const& l, 1319 | vector const& r) noexcept 1320 | { 1321 | return !operator==(l, r); 1322 | } 1323 | 1324 | template 1325 | constexpr bool operator<(vector const& l, 1326 | vector const& r) noexcept 1327 | { 1328 | return detail::vector::than( 1329 | l.data_ < r.data_, 1330 | l.data_ == r.data_, 1331 | std::make_index_sequence() 1332 | ); 1333 | } 1334 | 1335 | template 1336 | constexpr bool operator>(vector const& l, 1337 | vector const& r) noexcept 1338 | { 1339 | return r < l; 1340 | } 1341 | 1342 | template 1343 | constexpr bool operator<=(vector const& l, 1344 | vector const& r) noexcept 1345 | { 1346 | return !(r < l); 1347 | } 1348 | 1349 | template 1350 | constexpr bool operator>=(vector const& l, 1351 | vector const& r) noexcept 1352 | { 1353 | return !(l < r); 1354 | } 1355 | 1356 | template 1357 | //__attribute__ ((noinline)) 1358 | constexpr bool all(vector const& l) 1359 | { 1360 | return detail::vector::all_zeros( 1361 | l.data_ == cvector(T(0)), 1362 | std::make_index_sequence(N)>() 1363 | ); 1364 | } 1365 | 1366 | template 1367 | //__attribute__ ((noinline)) 1368 | constexpr bool any(vector const& l) noexcept 1369 | { 1370 | return !detail::vector::all_zeros( 1371 | l.data_ != cvector(T(0)), 1372 | std::make_index_sequence(N)>() 1373 | ); 1374 | } 1375 | 1376 | // zero 1377 | template 1378 | constexpr void zero(vector& v) noexcept 1379 | { 1380 | v.data_ = decltype(v.data_){}; 1381 | } 1382 | 1383 | template 1384 | static constexpr auto zero_vector() noexcept 1385 | { 1386 | return vector{}; 1387 | } 1388 | 1389 | // stream operator 1390 | template 1391 | std::ostream& operator<<(std::ostream& os, vector const& v) 1392 | { 1393 | os << '('; 1394 | 1395 | for (unsigned i{}; i != N - 1; ++i) 1396 | { 1397 | os << v.data_[i] << ", "; 1398 | } 1399 | 1400 | return os << v.data_[N - 1] << ')'; 1401 | } 1402 | 1403 | } 1404 | 1405 | namespace std 1406 | { 1407 | 1408 | template 1409 | struct hash> 1410 | { 1411 | template 1412 | static constexpr auto make_hash(vxl::vector const& v, 1413 | std::index_sequence) 1414 | noexcept(noexcept(std::declval>()(std::declval()))) 1415 | { 1416 | auto s{std::hash()(v(0))}; 1417 | 1418 | return ((s ^= std::hash()(v(I + 1)) + 0x9e3779b9 + 1419 | (s << 6) + (s >> 2)), ...), s; 1420 | } 1421 | 1422 | constexpr auto operator()(vxl::vector const& v) const 1423 | noexcept(noexcept(std::declval>()(std::declval()))) 1424 | { 1425 | return make_hash(v, std::make_index_sequence()); 1426 | } 1427 | }; 1428 | 1429 | } 1430 | 1431 | #endif // VXL_VECTOR_HPP 1432 | --------------------------------------------------------------------------------