├── README.md
├── MIT.txt
└── Inc
    ├── XColors.h
    ├── XCollision.h
    ├── XPackedVector.h
    ├── XMathConvert.inl
    └── XMathMisc.inl


/README.md:
--------------------------------------------------------------------------------
 1 | # XMath
 2 | Modified DirectXMath for cross-platform compiling
 3 | 
 4 | Tested on MSVC LinuxGCC OSXClang IOSClang AndroidGCC
 5 | 
 6 | ##Todo
 7 | Completed column order matrix mathematics
 8 | 
 9 | ##Sample predefines for including:
10 | 
11 | ```cpp
12 | #if defined(BUILD_ARCH_ARM)
13 | #	if defined(__ARM_NEON) && BUILD_INTRINSICS_LEVEL > 0
14 | #		define _XM_ARM_NEON_INTRINSICS_
15 | #	else
16 | #		define _XM_NO_INTRINSICS_
17 | #	endif
18 | #else
19 | #	if BUILD_INTRINSICS_LEVEL > 0
20 | #		define _XM_SSE_INTRINSICS_
21 | #	endif
22 | #	if BUILD_INTRINSICS_LEVEL > 1
23 | #		define _XM_SSE3_INTRINSICS_
24 | #		define _XM_SSE4_INTRINSICS_
25 | #		define _XM_AVX_INTRINSICS_
26 | #	endif
27 | #	if BUILD_INTRINSICS_LEVEL > 2
28 | #		define _XM_F16C_INTRINSICS_
29 | #	endif
30 | #endif
31 | #if defined(VE_COMPILER_GCC) || defined(BUILD_PLATFORM_IOS)
32 | #	define _XM_NO_CALL_CONVENTION_
33 | #endif
34 | #if defined(BUILD_PLATFORM_IOS) || defined(BUILD_PLATFORM_ANDROID)
35 | #	define _XM_ARM_NEON_NO_ALIGN_
36 | #endif
37 | //#define _XM_NO_INTRINSICS_
38 | #include "XMath/XMath.h"
39 | ```
40 | 


--------------------------------------------------------------------------------
/MIT.txt:
--------------------------------------------------------------------------------
 1 |                                The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Microsoft Corp
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 
 6 | software and associated documentation files (the "Software"), to deal in the Software 
 7 | without restriction, including without limitation the rights to use, copy, modify, 
 8 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
 9 | permit persons to whom the Software is furnished to do so, subject to the following 
10 | conditions: 
11 | 
12 | The above copyright notice and this permission notice shall be included in all copies 
13 | or substantial portions of the Software.  
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
16 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
17 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
18 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
19 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
20 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | 


--------------------------------------------------------------------------------
/Inc/XColors.h:
--------------------------------------------------------------------------------
  1 | //-------------------------------------------------------------------------------------
  2 | // DirectXColors.h -- C++ Color Math library
  3 | //
  4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  7 | // PARTICULAR PURPOSE.
  8 | //  
  9 | // Copyright (c) Microsoft Corporation. All rights reserved.
 10 | //-------------------------------------------------------------------------------------
 11 | 
 12 | #pragma once
 13 | 
 14 | #include "XMath.h"
 15 | 
 16 | namespace XMath
 17 | {
 18 | 
 19 | namespace Colors
 20 | {
 21 |     // Standard colors (Red/Green/Blue/Alpha)
 22 |     XMGLOBALCONST XMVECTORF32 AliceBlue            = {0.941176534f, 0.972549081f, 1.000000000f, 1.000000000f};
 23 |     XMGLOBALCONST XMVECTORF32 AntiqueWhite         = {0.980392218f, 0.921568692f, 0.843137324f, 1.000000000f};
 24 |     XMGLOBALCONST XMVECTORF32 Aqua                 = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
 25 |     XMGLOBALCONST XMVECTORF32 Aquamarine           = {0.498039246f, 1.000000000f, 0.831372619f, 1.000000000f};
 26 |     XMGLOBALCONST XMVECTORF32 Azure                = {0.941176534f, 1.000000000f, 1.000000000f, 1.000000000f};
 27 |     XMGLOBALCONST XMVECTORF32 Beige                = {0.960784376f, 0.960784376f, 0.862745166f, 1.000000000f};
 28 |     XMGLOBALCONST XMVECTORF32 Bisque               = {1.000000000f, 0.894117713f, 0.768627524f, 1.000000000f};
 29 |     XMGLOBALCONST XMVECTORF32 Black                = {0.000000000f, 0.000000000f, 0.000000000f, 1.000000000f};
 30 |     XMGLOBALCONST XMVECTORF32 BlanchedAlmond       = {1.000000000f, 0.921568692f, 0.803921640f, 1.000000000f};
 31 |     XMGLOBALCONST XMVECTORF32 Blue                 = {0.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
 32 |     XMGLOBALCONST XMVECTORF32 BlueViolet           = {0.541176498f, 0.168627456f, 0.886274576f, 1.000000000f};
 33 |     XMGLOBALCONST XMVECTORF32 Brown                = {0.647058845f, 0.164705887f, 0.164705887f, 1.000000000f};
 34 |     XMGLOBALCONST XMVECTORF32 BurlyWood            = {0.870588303f, 0.721568644f, 0.529411793f, 1.000000000f};
 35 |     XMGLOBALCONST XMVECTORF32 CadetBlue            = {0.372549027f, 0.619607866f, 0.627451003f, 1.000000000f};
 36 |     XMGLOBALCONST XMVECTORF32 Chartreuse           = {0.498039246f, 1.000000000f, 0.000000000f, 1.000000000f};
 37 |     XMGLOBALCONST XMVECTORF32 Chocolate            = {0.823529482f, 0.411764741f, 0.117647067f, 1.000000000f};
 38 |     XMGLOBALCONST XMVECTORF32 Coral                = {1.000000000f, 0.498039246f, 0.313725501f, 1.000000000f};
 39 |     XMGLOBALCONST XMVECTORF32 CornflowerBlue       = {0.392156899f, 0.584313750f, 0.929411829f, 1.000000000f};
 40 |     XMGLOBALCONST XMVECTORF32 Cornsilk             = {1.000000000f, 0.972549081f, 0.862745166f, 1.000000000f};
 41 |     XMGLOBALCONST XMVECTORF32 Crimson              = {0.862745166f, 0.078431375f, 0.235294133f, 1.000000000f};
 42 |     XMGLOBALCONST XMVECTORF32 Cyan                 = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
 43 |     XMGLOBALCONST XMVECTORF32 DarkBlue             = {0.000000000f, 0.000000000f, 0.545098066f, 1.000000000f};
 44 |     XMGLOBALCONST XMVECTORF32 DarkCyan             = {0.000000000f, 0.545098066f, 0.545098066f, 1.000000000f};
 45 |     XMGLOBALCONST XMVECTORF32 DarkGoldenrod        = {0.721568644f, 0.525490224f, 0.043137256f, 1.000000000f};
 46 |     XMGLOBALCONST XMVECTORF32 DarkGray             = {0.662745118f, 0.662745118f, 0.662745118f, 1.000000000f};
 47 |     XMGLOBALCONST XMVECTORF32 DarkGreen            = {0.000000000f, 0.392156899f, 0.000000000f, 1.000000000f};
 48 |     XMGLOBALCONST XMVECTORF32 DarkKhaki            = {0.741176486f, 0.717647076f, 0.419607878f, 1.000000000f};
 49 |     XMGLOBALCONST XMVECTORF32 DarkMagenta          = {0.545098066f, 0.000000000f, 0.545098066f, 1.000000000f};
 50 |     XMGLOBALCONST XMVECTORF32 DarkOliveGreen       = {0.333333343f, 0.419607878f, 0.184313729f, 1.000000000f};
 51 |     XMGLOBALCONST XMVECTORF32 DarkOrange           = {1.000000000f, 0.549019635f, 0.000000000f, 1.000000000f};
 52 |     XMGLOBALCONST XMVECTORF32 DarkOrchid           = {0.600000024f, 0.196078449f, 0.800000072f, 1.000000000f};
 53 |     XMGLOBALCONST XMVECTORF32 DarkRed              = {0.545098066f, 0.000000000f, 0.000000000f, 1.000000000f};
 54 |     XMGLOBALCONST XMVECTORF32 DarkSalmon           = {0.913725555f, 0.588235319f, 0.478431404f, 1.000000000f};
 55 |     XMGLOBALCONST XMVECTORF32 DarkSeaGreen         = {0.560784340f, 0.737254918f, 0.545098066f, 1.000000000f};
 56 |     XMGLOBALCONST XMVECTORF32 DarkSlateBlue        = {0.282352954f, 0.239215702f, 0.545098066f, 1.000000000f};
 57 |     XMGLOBALCONST XMVECTORF32 DarkSlateGray        = {0.184313729f, 0.309803933f, 0.309803933f, 1.000000000f};
 58 |     XMGLOBALCONST XMVECTORF32 DarkTurquoise        = {0.000000000f, 0.807843208f, 0.819607913f, 1.000000000f};
 59 |     XMGLOBALCONST XMVECTORF32 DarkViolet           = {0.580392182f, 0.000000000f, 0.827451050f, 1.000000000f};
 60 |     XMGLOBALCONST XMVECTORF32 DeepPink             = {1.000000000f, 0.078431375f, 0.576470613f, 1.000000000f};
 61 |     XMGLOBALCONST XMVECTORF32 DeepSkyBlue          = {0.000000000f, 0.749019623f, 1.000000000f, 1.000000000f};
 62 |     XMGLOBALCONST XMVECTORF32 DimGray              = {0.411764741f, 0.411764741f, 0.411764741f, 1.000000000f};
 63 |     XMGLOBALCONST XMVECTORF32 DodgerBlue           = {0.117647067f, 0.564705908f, 1.000000000f, 1.000000000f};
 64 |     XMGLOBALCONST XMVECTORF32 Firebrick            = {0.698039234f, 0.133333340f, 0.133333340f, 1.000000000f};
 65 |     XMGLOBALCONST XMVECTORF32 FloralWhite          = {1.000000000f, 0.980392218f, 0.941176534f, 1.000000000f};
 66 |     XMGLOBALCONST XMVECTORF32 ForestGreen          = {0.133333340f, 0.545098066f, 0.133333340f, 1.000000000f};
 67 |     XMGLOBALCONST XMVECTORF32 Fuchsia              = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
 68 |     XMGLOBALCONST XMVECTORF32 Gainsboro            = {0.862745166f, 0.862745166f, 0.862745166f, 1.000000000f};
 69 |     XMGLOBALCONST XMVECTORF32 GhostWhite           = {0.972549081f, 0.972549081f, 1.000000000f, 1.000000000f};
 70 |     XMGLOBALCONST XMVECTORF32 Gold                 = {1.000000000f, 0.843137324f, 0.000000000f, 1.000000000f};
 71 |     XMGLOBALCONST XMVECTORF32 Goldenrod            = {0.854902029f, 0.647058845f, 0.125490203f, 1.000000000f};
 72 |     XMGLOBALCONST XMVECTORF32 Gray                 = {0.501960814f, 0.501960814f, 0.501960814f, 1.000000000f};
 73 |     XMGLOBALCONST XMVECTORF32 Green                = {0.000000000f, 0.501960814f, 0.000000000f, 1.000000000f};
 74 |     XMGLOBALCONST XMVECTORF32 GreenYellow          = {0.678431392f, 1.000000000f, 0.184313729f, 1.000000000f};
 75 |     XMGLOBALCONST XMVECTORF32 Honeydew             = {0.941176534f, 1.000000000f, 0.941176534f, 1.000000000f};
 76 |     XMGLOBALCONST XMVECTORF32 HotPink              = {1.000000000f, 0.411764741f, 0.705882370f, 1.000000000f};
 77 |     XMGLOBALCONST XMVECTORF32 IndianRed            = {0.803921640f, 0.360784322f, 0.360784322f, 1.000000000f};
 78 |     XMGLOBALCONST XMVECTORF32 Indigo               = {0.294117659f, 0.000000000f, 0.509803951f, 1.000000000f};
 79 |     XMGLOBALCONST XMVECTORF32 Ivory                = {1.000000000f, 1.000000000f, 0.941176534f, 1.000000000f};
 80 |     XMGLOBALCONST XMVECTORF32 Khaki                = {0.941176534f, 0.901960850f, 0.549019635f, 1.000000000f};
 81 |     XMGLOBALCONST XMVECTORF32 Lavender             = {0.901960850f, 0.901960850f, 0.980392218f, 1.000000000f};
 82 |     XMGLOBALCONST XMVECTORF32 LavenderBlush        = {1.000000000f, 0.941176534f, 0.960784376f, 1.000000000f};
 83 |     XMGLOBALCONST XMVECTORF32 LawnGreen            = {0.486274540f, 0.988235354f, 0.000000000f, 1.000000000f};
 84 |     XMGLOBALCONST XMVECTORF32 LemonChiffon         = {1.000000000f, 0.980392218f, 0.803921640f, 1.000000000f};
 85 |     XMGLOBALCONST XMVECTORF32 LightBlue            = {0.678431392f, 0.847058892f, 0.901960850f, 1.000000000f};
 86 |     XMGLOBALCONST XMVECTORF32 LightCoral           = {0.941176534f, 0.501960814f, 0.501960814f, 1.000000000f};
 87 |     XMGLOBALCONST XMVECTORF32 LightCyan            = {0.878431439f, 1.000000000f, 1.000000000f, 1.000000000f};
 88 |     XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = {0.980392218f, 0.980392218f, 0.823529482f, 1.000000000f};
 89 |     XMGLOBALCONST XMVECTORF32 LightGreen           = {0.564705908f, 0.933333397f, 0.564705908f, 1.000000000f};
 90 |     XMGLOBALCONST XMVECTORF32 LightGray            = {0.827451050f, 0.827451050f, 0.827451050f, 1.000000000f};
 91 |     XMGLOBALCONST XMVECTORF32 LightPink            = {1.000000000f, 0.713725507f, 0.756862819f, 1.000000000f};
 92 |     XMGLOBALCONST XMVECTORF32 LightSalmon          = {1.000000000f, 0.627451003f, 0.478431404f, 1.000000000f};
 93 |     XMGLOBALCONST XMVECTORF32 LightSeaGreen        = {0.125490203f, 0.698039234f, 0.666666687f, 1.000000000f};
 94 |     XMGLOBALCONST XMVECTORF32 LightSkyBlue         = {0.529411793f, 0.807843208f, 0.980392218f, 1.000000000f};
 95 |     XMGLOBALCONST XMVECTORF32 LightSlateGray       = {0.466666698f, 0.533333361f, 0.600000024f, 1.000000000f};
 96 |     XMGLOBALCONST XMVECTORF32 LightSteelBlue       = {0.690196097f, 0.768627524f, 0.870588303f, 1.000000000f};
 97 |     XMGLOBALCONST XMVECTORF32 LightYellow          = {1.000000000f, 1.000000000f, 0.878431439f, 1.000000000f};
 98 |     XMGLOBALCONST XMVECTORF32 Lime                 = {0.000000000f, 1.000000000f, 0.000000000f, 1.000000000f};
 99 |     XMGLOBALCONST XMVECTORF32 LimeGreen            = {0.196078449f, 0.803921640f, 0.196078449f, 1.000000000f};
100 |     XMGLOBALCONST XMVECTORF32 Linen                = {0.980392218f, 0.941176534f, 0.901960850f, 1.000000000f};
101 |     XMGLOBALCONST XMVECTORF32 Magenta              = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
102 |     XMGLOBALCONST XMVECTORF32 Maroon               = {0.501960814f, 0.000000000f, 0.000000000f, 1.000000000f};
103 |     XMGLOBALCONST XMVECTORF32 MediumAquamarine     = {0.400000036f, 0.803921640f, 0.666666687f, 1.000000000f};
104 |     XMGLOBALCONST XMVECTORF32 MediumBlue           = {0.000000000f, 0.000000000f, 0.803921640f, 1.000000000f};
105 |     XMGLOBALCONST XMVECTORF32 MediumOrchid         = {0.729411781f, 0.333333343f, 0.827451050f, 1.000000000f};
106 |     XMGLOBALCONST XMVECTORF32 MediumPurple         = {0.576470613f, 0.439215720f, 0.858823597f, 1.000000000f};
107 |     XMGLOBALCONST XMVECTORF32 MediumSeaGreen       = {0.235294133f, 0.701960802f, 0.443137288f, 1.000000000f};
108 |     XMGLOBALCONST XMVECTORF32 MediumSlateBlue      = {0.482352972f, 0.407843173f, 0.933333397f, 1.000000000f};
109 |     XMGLOBALCONST XMVECTORF32 MediumSpringGreen    = {0.000000000f, 0.980392218f, 0.603921592f, 1.000000000f};
110 |     XMGLOBALCONST XMVECTORF32 MediumTurquoise      = {0.282352954f, 0.819607913f, 0.800000072f, 1.000000000f};
111 |     XMGLOBALCONST XMVECTORF32 MediumVioletRed      = {0.780392230f, 0.082352944f, 0.521568656f, 1.000000000f};
112 |     XMGLOBALCONST XMVECTORF32 MidnightBlue         = {0.098039225f, 0.098039225f, 0.439215720f, 1.000000000f};
113 |     XMGLOBALCONST XMVECTORF32 MintCream            = {0.960784376f, 1.000000000f, 0.980392218f, 1.000000000f};
114 |     XMGLOBALCONST XMVECTORF32 MistyRose            = {1.000000000f, 0.894117713f, 0.882353008f, 1.000000000f};
115 |     XMGLOBALCONST XMVECTORF32 Moccasin             = {1.000000000f, 0.894117713f, 0.709803939f, 1.000000000f};
116 |     XMGLOBALCONST XMVECTORF32 NavajoWhite          = {1.000000000f, 0.870588303f, 0.678431392f, 1.000000000f};
117 |     XMGLOBALCONST XMVECTORF32 Navy                 = {0.000000000f, 0.000000000f, 0.501960814f, 1.000000000f};
118 |     XMGLOBALCONST XMVECTORF32 OldLace              = {0.992156923f, 0.960784376f, 0.901960850f, 1.000000000f};
119 |     XMGLOBALCONST XMVECTORF32 Olive                = {0.501960814f, 0.501960814f, 0.000000000f, 1.000000000f};
120 |     XMGLOBALCONST XMVECTORF32 OliveDrab            = {0.419607878f, 0.556862772f, 0.137254909f, 1.000000000f};
121 |     XMGLOBALCONST XMVECTORF32 Orange               = {1.000000000f, 0.647058845f, 0.000000000f, 1.000000000f};
122 |     XMGLOBALCONST XMVECTORF32 OrangeRed            = {1.000000000f, 0.270588249f, 0.000000000f, 1.000000000f};
123 |     XMGLOBALCONST XMVECTORF32 Orchid               = {0.854902029f, 0.439215720f, 0.839215755f, 1.000000000f};
124 |     XMGLOBALCONST XMVECTORF32 PaleGoldenrod        = {0.933333397f, 0.909803987f, 0.666666687f, 1.000000000f};
125 |     XMGLOBALCONST XMVECTORF32 PaleGreen            = {0.596078455f, 0.984313786f, 0.596078455f, 1.000000000f};
126 |     XMGLOBALCONST XMVECTORF32 PaleTurquoise        = {0.686274529f, 0.933333397f, 0.933333397f, 1.000000000f};
127 |     XMGLOBALCONST XMVECTORF32 PaleVioletRed        = {0.858823597f, 0.439215720f, 0.576470613f, 1.000000000f};
128 |     XMGLOBALCONST XMVECTORF32 PapayaWhip           = {1.000000000f, 0.937254965f, 0.835294187f, 1.000000000f};
129 |     XMGLOBALCONST XMVECTORF32 PeachPuff            = {1.000000000f, 0.854902029f, 0.725490212f, 1.000000000f};
130 |     XMGLOBALCONST XMVECTORF32 Peru                 = {0.803921640f, 0.521568656f, 0.247058839f, 1.000000000f};
131 |     XMGLOBALCONST XMVECTORF32 Pink                 = {1.000000000f, 0.752941251f, 0.796078503f, 1.000000000f};
132 |     XMGLOBALCONST XMVECTORF32 Plum                 = {0.866666734f, 0.627451003f, 0.866666734f, 1.000000000f};
133 |     XMGLOBALCONST XMVECTORF32 PowderBlue           = {0.690196097f, 0.878431439f, 0.901960850f, 1.000000000f};
134 |     XMGLOBALCONST XMVECTORF32 Purple               = {0.501960814f, 0.000000000f, 0.501960814f, 1.000000000f};
135 |     XMGLOBALCONST XMVECTORF32 Red                  = {1.000000000f, 0.000000000f, 0.000000000f, 1.000000000f};
136 |     XMGLOBALCONST XMVECTORF32 RosyBrown            = {0.737254918f, 0.560784340f, 0.560784340f, 1.000000000f};
137 |     XMGLOBALCONST XMVECTORF32 RoyalBlue            = {0.254901975f, 0.411764741f, 0.882353008f, 1.000000000f};
138 |     XMGLOBALCONST XMVECTORF32 SaddleBrown          = {0.545098066f, 0.270588249f, 0.074509807f, 1.000000000f};
139 |     XMGLOBALCONST XMVECTORF32 Salmon               = {0.980392218f, 0.501960814f, 0.447058856f, 1.000000000f};
140 |     XMGLOBALCONST XMVECTORF32 SandyBrown           = {0.956862807f, 0.643137276f, 0.376470625f, 1.000000000f};
141 |     XMGLOBALCONST XMVECTORF32 SeaGreen             = {0.180392161f, 0.545098066f, 0.341176480f, 1.000000000f};
142 |     XMGLOBALCONST XMVECTORF32 SeaShell             = {1.000000000f, 0.960784376f, 0.933333397f, 1.000000000f};
143 |     XMGLOBALCONST XMVECTORF32 Sienna               = {0.627451003f, 0.321568638f, 0.176470593f, 1.000000000f};
144 |     XMGLOBALCONST XMVECTORF32 Silver               = {0.752941251f, 0.752941251f, 0.752941251f, 1.000000000f};
145 |     XMGLOBALCONST XMVECTORF32 SkyBlue              = {0.529411793f, 0.807843208f, 0.921568692f, 1.000000000f};
146 |     XMGLOBALCONST XMVECTORF32 SlateBlue            = {0.415686309f, 0.352941185f, 0.803921640f, 1.000000000f};
147 |     XMGLOBALCONST XMVECTORF32 SlateGray            = {0.439215720f, 0.501960814f, 0.564705908f, 1.000000000f};
148 |     XMGLOBALCONST XMVECTORF32 Snow                 = {1.000000000f, 0.980392218f, 0.980392218f, 1.000000000f};
149 |     XMGLOBALCONST XMVECTORF32 SpringGreen          = {0.000000000f, 1.000000000f, 0.498039246f, 1.000000000f};
150 |     XMGLOBALCONST XMVECTORF32 SteelBlue            = {0.274509817f, 0.509803951f, 0.705882370f, 1.000000000f};
151 |     XMGLOBALCONST XMVECTORF32 Tan                  = {0.823529482f, 0.705882370f, 0.549019635f, 1.000000000f};
152 |     XMGLOBALCONST XMVECTORF32 Teal                 = {0.000000000f, 0.501960814f, 0.501960814f, 1.000000000f};
153 |     XMGLOBALCONST XMVECTORF32 Thistle              = {0.847058892f, 0.749019623f, 0.847058892f, 1.000000000f};
154 |     XMGLOBALCONST XMVECTORF32 Tomato               = {1.000000000f, 0.388235331f, 0.278431386f, 1.000000000f};
155 |     XMGLOBALCONST XMVECTORF32 Transparent          = {0.000000000f, 0.000000000f, 0.000000000f, 0.000000000f};
156 |     XMGLOBALCONST XMVECTORF32 Turquoise            = {0.250980407f, 0.878431439f, 0.815686345f, 1.000000000f};
157 |     XMGLOBALCONST XMVECTORF32 Violet               = {0.933333397f, 0.509803951f, 0.933333397f, 1.000000000f};
158 |     XMGLOBALCONST XMVECTORF32 Wheat                = {0.960784376f, 0.870588303f, 0.701960802f, 1.000000000f};
159 |     XMGLOBALCONST XMVECTORF32 White                = {1.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
160 |     XMGLOBALCONST XMVECTORF32 WhiteSmoke           = {0.960784376f, 0.960784376f, 0.960784376f, 1.000000000f};
161 |     XMGLOBALCONST XMVECTORF32 Yellow               = {1.000000000f, 1.000000000f, 0.000000000f, 1.000000000f};
162 |     XMGLOBALCONST XMVECTORF32 YellowGreen          = {0.603921592f, 0.803921640f, 0.196078449f, 1.000000000f};
163 | 
164 | }; // namespace Colors
165 | 
166 | }; // namespace XMath
167 | 


--------------------------------------------------------------------------------
/Inc/XCollision.h:
--------------------------------------------------------------------------------
  1 | //-------------------------------------------------------------------------------------
  2 | // XCollision.h -- C++ Collision Math library
  3 | //
  4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  7 | // PARTICULAR PURPOSE.
  8 | //  
  9 | // Copyright (c) Microsoft Corporation. All rights reserved.
 10 | //-------------------------------------------------------------------------------------
 11 | 
 12 | #pragma once
 13 | 
 14 | #include "XMath.h"
 15 | 
 16 | namespace XMath
 17 | {
 18 | 
 19 | enum ContainmentType
 20 | {
 21 | 	DISJOINT = 0,
 22 | 	INTERSECTS = 1,
 23 | 	CONTAINS = 2,
 24 | };
 25 | 
 26 | enum PlaneIntersectionType
 27 | {
 28 | 	FRONT = 0,
 29 | 	INTERSECTING = 1,
 30 | 	BACK = 2,
 31 | };
 32 | 
 33 | struct BoundingBox;
 34 | struct BoundingOrientedBox;
 35 | struct BoundingFrustum;
 36 | 
 37 | #ifdef _MSC_VER
 38 | #	pragma warning(push)
 39 | #	pragma warning(disable:4324 4820)
 40 | //	C4324: alignment padding warnings
 41 | //	C4820: Off by default noise
 42 | #endif
 43 | 
 44 | //-------------------------------------------------------------------------------------
 45 | // Bounding sphere
 46 | //-------------------------------------------------------------------------------------
 47 | struct BoundingSphere
 48 | {
 49 | 	XMFLOAT3 Center;            // Center of the sphere.
 50 | 	float Radius;               // Radius of the sphere.
 51 | 
 52 | 								// Creators
 53 | 	BoundingSphere() : Center(0, 0, 0), Radius(1.f) {}
 54 | 	XM_CONSTEXPR BoundingSphere(_In_ const XMFLOAT3& center, _In_ float radius)
 55 | 		: Center(center), Radius(radius) {}
 56 | 	BoundingSphere(_In_ const BoundingSphere& sp)
 57 | 		: Center(sp.Center), Radius(sp.Radius) {}
 58 | 
 59 | 	// Methods
 60 | 	BoundingSphere& operator=(_In_ const BoundingSphere& sp) { Center = sp.Center; Radius = sp.Radius; return *this; }
 61 | 
 62 | 	void    XM_CALLCONV     Transform(_Out_ BoundingSphere& Out, _In_ FXMMATRIX M) const;
 63 | 	void    XM_CALLCONV     Transform(_Out_ BoundingSphere& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const;
 64 | 	// Transform the sphere
 65 | 
 66 | 	ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR Point) const;
 67 | 	ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const;
 68 | 	ContainmentType Contains(_In_ const BoundingSphere& sh) const;
 69 | 	ContainmentType Contains(_In_ const BoundingBox& box) const;
 70 | 	ContainmentType Contains(_In_ const BoundingOrientedBox& box) const;
 71 | 	ContainmentType Contains(_In_ const BoundingFrustum& fr) const;
 72 | 
 73 | 	bool Intersects(_In_ const BoundingSphere& sh) const;
 74 | 	bool Intersects(_In_ const BoundingBox& box) const;
 75 | 	bool Intersects(_In_ const BoundingOrientedBox& box) const;
 76 | 	bool Intersects(_In_ const BoundingFrustum& fr) const;
 77 | 
 78 | 	bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const;
 79 | 	// Triangle-sphere test
 80 | 
 81 | 	PlaneIntersectionType    XM_CALLCONV     Intersects(_In_ FXMVECTOR Plane) const;
 82 | 	// Plane-sphere test
 83 | 
 84 | 	bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const;
 85 | 	// Ray-sphere test
 86 | 
 87 | 	ContainmentType     XM_CALLCONV     ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
 88 | 		_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const;
 89 | 	// Test sphere against six planes (see BoundingFrustum::GetPlanes)
 90 | 
 91 | 	// Static methods
 92 | 	static void CreateMerged(_Out_ BoundingSphere& Out, _In_ const BoundingSphere& S1, _In_ const BoundingSphere& S2);
 93 | 
 94 | 	static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingBox& box);
 95 | 	static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingOrientedBox& box);
 96 | 
 97 | 	static void CreateFromPoints(_Out_ BoundingSphere& Out, _In_ size_t Count,
 98 | 		_In_reads_bytes_(sizeof(XMFLOAT3) + Stride*(Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride);
 99 | 
100 | 	static void CreateFromFrustum(_Out_ BoundingSphere& Out, _In_ const BoundingFrustum& fr);
101 | };
102 | 
103 | //-------------------------------------------------------------------------------------
104 | // Axis-aligned bounding box
105 | //-------------------------------------------------------------------------------------
106 | struct BoundingBox
107 | {
108 | 	static const size_t CORNER_COUNT = 8;
109 | 
110 | 	XMFLOAT3 Center;            // Center of the box.
111 | 	XMFLOAT3 Extents;           // Distance from the center to each side.
112 | 
113 | 								// Creators
114 | 	BoundingBox() : Center(0, 0, 0), Extents(1.f, 1.f, 1.f) {}
115 | 	XM_CONSTEXPR BoundingBox(_In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents)
116 | 		: Center(center), Extents(extents) {}
117 | 	BoundingBox(_In_ const BoundingBox& box) : Center(box.Center), Extents(box.Extents) {}
118 | 
119 | 	// Methods
120 | 	BoundingBox& operator=(_In_ const BoundingBox& box) { Center = box.Center; Extents = box.Extents; return *this; }
121 | 
122 | 	void    XM_CALLCONV     Transform(_Out_ BoundingBox& Out, _In_ FXMMATRIX M) const;
123 | 	void    XM_CALLCONV     Transform(_Out_ BoundingBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const;
124 | 
125 | 	void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const;
126 | 	// Gets the 8 corners of the box
127 | 
128 | 	ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR Point) const;
129 | 	ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const;
130 | 	ContainmentType Contains(_In_ const BoundingSphere& sh) const;
131 | 	ContainmentType Contains(_In_ const BoundingBox& box) const;
132 | 	ContainmentType Contains(_In_ const BoundingOrientedBox& box) const;
133 | 	ContainmentType Contains(_In_ const BoundingFrustum& fr) const;
134 | 
135 | 	bool Intersects(_In_ const BoundingSphere& sh) const;
136 | 	bool Intersects(_In_ const BoundingBox& box) const;
137 | 	bool Intersects(_In_ const BoundingOrientedBox& box) const;
138 | 	bool Intersects(_In_ const BoundingFrustum& fr) const;
139 | 
140 | 	bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const;
141 | 	// Triangle-Box test
142 | 
143 | 	PlaneIntersectionType    XM_CALLCONV     Intersects(_In_ FXMVECTOR Plane) const;
144 | 	// Plane-box test
145 | 
146 | 	bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const;
147 | 	// Ray-Box test
148 | 
149 | 	ContainmentType     XM_CALLCONV     ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
150 | 		_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const;
151 | 	// Test box against six planes (see BoundingFrustum::GetPlanes)
152 | 
153 | 	// Static methods
154 | 	static void CreateMerged(_Out_ BoundingBox& Out, _In_ const BoundingBox& b1, _In_ const BoundingBox& b2);
155 | 
156 | 	static void CreateFromSphere(_Out_ BoundingBox& Out, _In_ const BoundingSphere& sh);
157 | 
158 | 	static void    XM_CALLCONV     CreateFromPoints(_Out_ BoundingBox& Out, _In_ FXMVECTOR pt1, _In_ FXMVECTOR pt2);
159 | 	static void CreateFromPoints(_Out_ BoundingBox& Out, _In_ size_t Count,
160 | 		_In_reads_bytes_(sizeof(XMFLOAT3) + Stride*(Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride);
161 | };
162 | 
163 | //-------------------------------------------------------------------------------------
164 | // Oriented bounding box
165 | //-------------------------------------------------------------------------------------
166 | struct BoundingOrientedBox
167 | {
168 | 	static const size_t CORNER_COUNT = 8;
169 | 
170 | 	XMFLOAT3 Center;            // Center of the box.
171 | 	XMFLOAT3 Extents;           // Distance from the center to each side.
172 | 	XMFLOAT4 Orientation;       // Unit quaternion representing rotation (box -> world).
173 | 
174 | 								// Creators
175 | 	BoundingOrientedBox() : Center(0, 0, 0), Extents(1.f, 1.f, 1.f), Orientation(0, 0, 0, 1.f) {}
176 | 	XM_CONSTEXPR BoundingOrientedBox(_In_ const XMFLOAT3& _Center, _In_ const XMFLOAT3& _Extents, _In_ const XMFLOAT4& _Orientation)
177 | 		: Center(_Center), Extents(_Extents), Orientation(_Orientation) {}
178 | 	BoundingOrientedBox(_In_ const BoundingOrientedBox& box)
179 | 		: Center(box.Center), Extents(box.Extents), Orientation(box.Orientation) {}
180 | 
181 | 	// Methods
182 | 	BoundingOrientedBox& operator=(_In_ const BoundingOrientedBox& box) { Center = box.Center; Extents = box.Extents; Orientation = box.Orientation; return *this; }
183 | 
184 | 	void    XM_CALLCONV     Transform(_Out_ BoundingOrientedBox& Out, _In_ FXMMATRIX M) const;
185 | 	void    XM_CALLCONV     Transform(_Out_ BoundingOrientedBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const;
186 | 
187 | 	void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const;
188 | 	// Gets the 8 corners of the box
189 | 
190 | 	ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR Point) const;
191 | 	ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const;
192 | 	ContainmentType Contains(_In_ const BoundingSphere& sh) const;
193 | 	ContainmentType Contains(_In_ const BoundingBox& box) const;
194 | 	ContainmentType Contains(_In_ const BoundingOrientedBox& box) const;
195 | 	ContainmentType Contains(_In_ const BoundingFrustum& fr) const;
196 | 
197 | 	bool Intersects(_In_ const BoundingSphere& sh) const;
198 | 	bool Intersects(_In_ const BoundingBox& box) const;
199 | 	bool Intersects(_In_ const BoundingOrientedBox& box) const;
200 | 	bool Intersects(_In_ const BoundingFrustum& fr) const;
201 | 
202 | 	bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const;
203 | 	// Triangle-OrientedBox test
204 | 
205 | 	PlaneIntersectionType    XM_CALLCONV     Intersects(_In_ FXMVECTOR Plane) const;
206 | 	// Plane-OrientedBox test
207 | 
208 | 	bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const;
209 | 	// Ray-OrientedBox test
210 | 
211 | 	ContainmentType     XM_CALLCONV     ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
212 | 		_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const;
213 | 	// Test OrientedBox against six planes (see BoundingFrustum::GetPlanes)
214 | 
215 | 	// Static methods
216 | 	static void CreateFromBoundingBox(_Out_ BoundingOrientedBox& Out, _In_ const BoundingBox& box);
217 | 
218 | 	static void CreateFromPoints(_Out_ BoundingOrientedBox& Out, _In_ size_t Count,
219 | 		_In_reads_bytes_(sizeof(XMFLOAT3) + Stride*(Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride);
220 | };
221 | 
222 | //-------------------------------------------------------------------------------------
223 | // Bounding frustum
224 | //-------------------------------------------------------------------------------------
225 | struct BoundingFrustum
226 | {
227 | 	static const size_t CORNER_COUNT = 8;
228 | 
229 | 	XMFLOAT3 Origin;            // Origin of the frustum (and projection).
230 | 	XMFLOAT4 Orientation;       // Quaternion representing rotation.
231 | 
232 | 	float RightSlope;           // Positive X slope (X/Z).
233 | 	float LeftSlope;            // Negative X slope.
234 | 	float TopSlope;             // Positive Y slope (Y/Z).
235 | 	float BottomSlope;          // Negative Y slope.
236 | 	float Near, Far;            // Z of the near plane and far plane.
237 | 
238 | 								// Creators
239 | 	BoundingFrustum() : Origin(0, 0, 0), Orientation(0, 0, 0, 1.f), RightSlope(1.f), LeftSlope(-1.f),
240 | 		TopSlope(1.f), BottomSlope(-1.f), Near(0), Far(1.f) {}
241 | 	XM_CONSTEXPR BoundingFrustum(_In_ const XMFLOAT3& _Origin, _In_ const XMFLOAT4& _Orientation,
242 | 		_In_ float _RightSlope, _In_ float _LeftSlope, _In_ float _TopSlope, _In_ float _BottomSlope,
243 | 		_In_ float _Near, _In_ float _Far)
244 | 		: Origin(_Origin), Orientation(_Orientation),
245 | 		RightSlope(_RightSlope), LeftSlope(_LeftSlope), TopSlope(_TopSlope), BottomSlope(_BottomSlope),
246 | 		Near(_Near), Far(_Far) {}
247 | 	BoundingFrustum(_In_ const BoundingFrustum& fr)
248 | 		: Origin(fr.Origin), Orientation(fr.Orientation), RightSlope(fr.RightSlope), LeftSlope(fr.LeftSlope),
249 | 		TopSlope(fr.TopSlope), BottomSlope(fr.BottomSlope), Near(fr.Near), Far(fr.Far) {}
250 | 	BoundingFrustum(_In_ CXMMATRIX Projection) { CreateFromMatrix(*this, Projection); }
251 | 
252 | 	// Methods
253 | 	BoundingFrustum& operator=(_In_ const BoundingFrustum& fr) {
254 | 		Origin = fr.Origin; Orientation = fr.Orientation;
255 | 		RightSlope = fr.RightSlope; LeftSlope = fr.LeftSlope;
256 | 		TopSlope = fr.TopSlope; BottomSlope = fr.BottomSlope;
257 | 		Near = fr.Near; Far = fr.Far; return *this;
258 | 	}
259 | 
260 | 	void    XM_CALLCONV     Transform(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX M) const;
261 | 	void    XM_CALLCONV     Transform(_Out_ BoundingFrustum& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const;
262 | 
263 | 	void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const;
264 | 	// Gets the 8 corners of the frustum
265 | 
266 | 	ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR Point) const;
267 | 	ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const;
268 | 	ContainmentType Contains(_In_ const BoundingSphere& sp) const;
269 | 	ContainmentType Contains(_In_ const BoundingBox& box) const;
270 | 	ContainmentType Contains(_In_ const BoundingOrientedBox& box) const;
271 | 	ContainmentType Contains(_In_ const BoundingFrustum& fr) const;
272 | 	// Frustum-Frustum test
273 | 
274 | 	bool Intersects(_In_ const BoundingSphere& sh) const;
275 | 	bool Intersects(_In_ const BoundingBox& box) const;
276 | 	bool Intersects(_In_ const BoundingOrientedBox& box) const;
277 | 	bool Intersects(_In_ const BoundingFrustum& fr) const;
278 | 
279 | 	bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const;
280 | 	// Triangle-Frustum test
281 | 
282 | 	PlaneIntersectionType    XM_CALLCONV     Intersects(_In_ FXMVECTOR Plane) const;
283 | 	// Plane-Frustum test
284 | 
285 | 	bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR rayOrigin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const;
286 | 	// Ray-Frustum test
287 | 
288 | 	ContainmentType     XM_CALLCONV     ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
289 | 		_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const;
290 | 	// Test frustum against six planes (see BoundingFrustum::GetPlanes)
291 | 
292 | 	void GetPlanes(_Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, _Out_opt_ XMVECTOR* RightPlane,
293 | 		_Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, _Out_opt_ XMVECTOR* BottomPlane) const;
294 | 	// Create 6 Planes representation of Frustum
295 | 
296 | 	// Static methods
297 | 	static void     XM_CALLCONV     CreateFromMatrix(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX Projection);
298 | };
299 | 
300 | //-----------------------------------------------------------------------------
301 | // Triangle intersection testing routines.
302 | //-----------------------------------------------------------------------------
303 | namespace TriangleTests
304 | {
305 | 	bool                    XM_CALLCONV     Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, _In_ HXMVECTOR V2, _Out_ float& Dist);
306 | 	// Ray-Triangle
307 | 
308 | 	bool                    XM_CALLCONV     Intersects(_In_ FXMVECTOR A0, _In_ FXMVECTOR A1, _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, _In_ HXMVECTOR B1, _In_ HXMVECTOR B2);
309 | 	// Triangle-Triangle
310 | 
311 | 	PlaneIntersectionType   XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, _In_ GXMVECTOR Plane);
312 | 	// Plane-Triangle
313 | 
314 | 	ContainmentType         XM_CALLCONV     ContainedBy(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2,
315 | 		_In_ GXMVECTOR Plane0, _In_ HXMVECTOR Plane1, _In_ HXMVECTOR Plane2,
316 | 		_In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5);
317 | 	// Test a triangle against six planes at once (see BoundingFrustum::GetPlanes)
318 | };
319 | 
320 | #ifdef _MSC_VER
321 | #	pragma warning(pop)
322 | #endif
323 | 
324 | /****************************************************************************
325 | *
326 | * Implementation
327 | *
328 | ****************************************************************************/
329 | 
330 | #ifdef _MSC_VER
331 | #	pragma warning(push)
332 | #	pragma warning(disable : 4068 4365 4616 6001)
333 | //	C4068/4616: ignore unknown pragmas
334 | //	C4365: Off by default noise
335 | //	C6001: False positives
336 | #	pragma prefast(push)
337 | #	pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
338 | #endif
339 | 
340 | #include "XCollision.inl"
341 | 
342 | #ifdef _MSC_VER
343 | #	pragma prefast(pop)
344 | #	pragma warning(pop)
345 | #endif
346 | 
347 | }; // namespace XMath
348 | 


--------------------------------------------------------------------------------
/Inc/XPackedVector.h:
--------------------------------------------------------------------------------
   1 | //-------------------------------------------------------------------------------------
   2 | // XPackedVector.h -- SIMD C++ Math library
   3 | //
   4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
   5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
   6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
   7 | // PARTICULAR PURPOSE.
   8 | //  
   9 | // Copyright (c) Microsoft Corporation. All rights reserved.
  10 | //-------------------------------------------------------------------------------------
  11 | 
  12 | #pragma once
  13 | 
  14 | #include "XMath.h"
  15 | 
  16 | namespace XMath
  17 | {
  18 | 
  19 | #ifdef _MSC_VER
  20 | #	pragma warning(push)
  21 | #	pragma warning(disable:4201 4365 4324)
  22 | //	C4201: nonstandard extension used
  23 | //	C4365: Off by default noise
  24 | //	C4324: alignment padding warnings
  25 | #endif
  26 | 
  27 | //------------------------------------------------------------------------------
  28 | // ARGB Color; 8-8-8-8 bit unsigned normalized integer components packed into
  29 | // a 32 bit integer.  The normalized color is packed into 32 bits using 8 bit
  30 | // unsigned, normalized integers for the alpha, red, green, and blue components.
  31 | // The alpha component is stored in the most significant bits and the blue
  32 | // component in the least significant bits (A8R8G8B8):
  33 | // [32] aaaaaaaa rrrrrrrr gggggggg bbbbbbbb [0]
  34 | struct XMCOLOR
  35 | {
  36 | 	union
  37 | 	{
  38 | 		struct
  39 | 		{
  40 | 			uint8_t b;  // Blue:    0/255 to 255/255
  41 | 			uint8_t g;  // Green:   0/255 to 255/255
  42 | 			uint8_t r;  // Red:     0/255 to 255/255
  43 | 			uint8_t a;  // Alpha:   0/255 to 255/255
  44 | 		};
  45 | 		uint32_t c;
  46 | 	};
  47 | 
  48 | 	XMCOLOR() XM_CTOR_DEFAULT
  49 | 		XM_CONSTEXPR XMCOLOR(uint32_t Color) : c(Color) {}
  50 | 	XMCOLOR(float _r, float _g, float _b, float _a);
  51 | 	explicit XMCOLOR(_In_reads_(4) const float *pArray);
  52 | 
  53 | 	operator uint32_t () const { return c; }
  54 | 
  55 | 	XMCOLOR& operator= (const XMCOLOR& Color) { c = Color.c; return *this; }
  56 | 	XMCOLOR& operator= (const uint32_t Color) { c = Color; return *this; }
  57 | };
  58 | 
  59 | //------------------------------------------------------------------------------
  60 | // 16 bit floating point number consisting of a sign bit, a 5 bit biased 
  61 | // exponent, and a 10 bit mantissa
  62 | typedef uint16_t HALF;
  63 | 
  64 | //------------------------------------------------------------------------------
  65 | // 2D Vector; 16 bit floating point components
  66 | struct XMHALF2
  67 | {
  68 | 	union
  69 | 	{
  70 | 		struct
  71 | 		{
  72 | 			HALF x;
  73 | 			HALF y;
  74 | 		};
  75 | 		uint32_t v;
  76 | 	};
  77 | 
  78 | 	XMHALF2() XM_CTOR_DEFAULT
  79 | 		explicit XM_CONSTEXPR XMHALF2(uint32_t Packed) : v(Packed) {}
  80 | 	XM_CONSTEXPR XMHALF2(HALF _x, HALF _y) : x(_x), y(_y) {}
  81 | 	explicit XMHALF2(_In_reads_(2) const HALF *pArray) : x(pArray[0]), y(pArray[1]) {}
  82 | 	XMHALF2(float _x, float _y);
  83 | 	explicit XMHALF2(_In_reads_(2) const float *pArray);
  84 | 
  85 | 	XMHALF2& operator= (const XMHALF2& Half2) { x = Half2.x; y = Half2.y; return *this; }
  86 | 	XMHALF2& operator= (uint32_t Packed) { v = Packed; return *this; }
  87 | };
  88 | 
  89 | //------------------------------------------------------------------------------
  90 | // 2D Vector; 16 bit signed normalized integer components
  91 | struct XMSHORTN2
  92 | {
  93 | 	union
  94 | 	{
  95 | 		struct
  96 | 		{
  97 | 			int16_t x;
  98 | 			int16_t y;
  99 | 		};
 100 | 		uint32_t v;
 101 | 	};
 102 | 
 103 | 	XMSHORTN2() XM_CTOR_DEFAULT
 104 | 		explicit XM_CONSTEXPR XMSHORTN2(uint32_t Packed) : v(Packed) {}
 105 | 	XM_CONSTEXPR XMSHORTN2(int16_t _x, int16_t _y) : x(_x), y(_y) {}
 106 | 	explicit XMSHORTN2(_In_reads_(2) const int16_t *pArray) : x(pArray[0]), y(pArray[1]) {}
 107 | 	XMSHORTN2(float _x, float _y);
 108 | 	explicit XMSHORTN2(_In_reads_(2) const float *pArray);
 109 | 
 110 | 	XMSHORTN2& operator= (const XMSHORTN2& ShortN2) { x = ShortN2.x; y = ShortN2.y; return *this; }
 111 | 	XMSHORTN2& operator= (uint32_t Packed) { v = Packed; return *this; }
 112 | };
 113 | 
 114 | // 2D Vector; 16 bit signed integer components
 115 | struct XMSHORT2
 116 | {
 117 | 	union
 118 | 	{
 119 | 		struct
 120 | 		{
 121 | 			int16_t x;
 122 | 			int16_t y;
 123 | 		};
 124 | 		uint32_t v;
 125 | 	};
 126 | 
 127 | 	XMSHORT2() XM_CTOR_DEFAULT
 128 | 		explicit XM_CONSTEXPR XMSHORT2(uint32_t Packed) : v(Packed) {}
 129 | 	XM_CONSTEXPR XMSHORT2(int16_t _x, int16_t _y) : x(_x), y(_y) {}
 130 | 	explicit XMSHORT2(_In_reads_(2) const int16_t *pArray) : x(pArray[0]), y(pArray[1]) {}
 131 | 	XMSHORT2(float _x, float _y);
 132 | 	explicit XMSHORT2(_In_reads_(2) const float *pArray);
 133 | 
 134 | 	XMSHORT2& operator= (const XMSHORT2& Short2) { x = Short2.x; y = Short2.y; return *this; }
 135 | 	XMSHORT2& operator= (uint32_t Packed) { v = Packed; return *this; }
 136 | };
 137 | 
 138 | // 2D Vector; 16 bit unsigned normalized integer components
 139 | struct XMUSHORTN2
 140 | {
 141 | 	union
 142 | 	{
 143 | 		struct
 144 | 		{
 145 | 			uint16_t x;
 146 | 			uint16_t y;
 147 | 		};
 148 | 		uint32_t v;
 149 | 	};
 150 | 
 151 | 	XMUSHORTN2() XM_CTOR_DEFAULT
 152 | 		explicit XM_CONSTEXPR XMUSHORTN2(uint32_t Packed) : v(Packed) {}
 153 | 	XM_CONSTEXPR XMUSHORTN2(uint16_t _x, uint16_t _y) : x(_x), y(_y) {}
 154 | 	explicit XMUSHORTN2(_In_reads_(2) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]) {}
 155 | 	XMUSHORTN2(float _x, float _y);
 156 | 	explicit XMUSHORTN2(_In_reads_(2) const float *pArray);
 157 | 
 158 | 	XMUSHORTN2& operator= (const XMUSHORTN2& UShortN2) { x = UShortN2.x; y = UShortN2.y; return *this; }
 159 | 	XMUSHORTN2& operator= (uint32_t Packed) { v = Packed; return *this; }
 160 | };
 161 | 
 162 | // 2D Vector; 16 bit unsigned integer components
 163 | struct XMUSHORT2
 164 | {
 165 | 	union
 166 | 	{
 167 | 		struct
 168 | 		{
 169 | 			uint16_t x;
 170 | 			uint16_t y;
 171 | 		};
 172 | 		uint32_t v;
 173 | 	};
 174 | 
 175 | 	XMUSHORT2() XM_CTOR_DEFAULT
 176 | 		explicit XM_CONSTEXPR XMUSHORT2(uint32_t Packed) : v(Packed) {}
 177 | 	XM_CONSTEXPR XMUSHORT2(uint16_t _x, uint16_t _y) : x(_x), y(_y) {}
 178 | 	explicit XMUSHORT2(_In_reads_(2) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]) {}
 179 | 	XMUSHORT2(float _x, float _y);
 180 | 	explicit XMUSHORT2(_In_reads_(2) const float *pArray);
 181 | 
 182 | 	XMUSHORT2& operator= (const XMUSHORT2& UShort2) { x = UShort2.x; y = UShort2.y; return *this; }
 183 | 	XMUSHORT2& operator= (uint32_t Packed) { v = Packed; return *this; }
 184 | };
 185 | 
 186 | //------------------------------------------------------------------------------
 187 | // 2D Vector; 8 bit signed normalized integer components
 188 | struct XMBYTEN2
 189 | {
 190 | 	union
 191 | 	{
 192 | 		struct
 193 | 		{
 194 | 			int8_t x;
 195 | 			int8_t y;
 196 | 		};
 197 | 		uint16_t v;
 198 | 	};
 199 | 
 200 | 	XMBYTEN2() XM_CTOR_DEFAULT
 201 | 		explicit XM_CONSTEXPR XMBYTEN2(uint16_t Packed) : v(Packed) {}
 202 | 	XM_CONSTEXPR XMBYTEN2(int8_t _x, int8_t _y) : x(_x), y(_y) {}
 203 | 	explicit XMBYTEN2(_In_reads_(2) const int8_t *pArray) : x(pArray[0]), y(pArray[1]) {}
 204 | 	XMBYTEN2(float _x, float _y);
 205 | 	explicit XMBYTEN2(_In_reads_(2) const float *pArray);
 206 | 
 207 | 	XMBYTEN2& operator= (const XMBYTEN2& ByteN2) { x = ByteN2.x; y = ByteN2.y; return *this; }
 208 | 	XMBYTEN2& operator= (uint16_t Packed) { v = Packed; return *this; }
 209 | };
 210 | 
 211 | // 2D Vector; 8 bit signed integer components
 212 | struct XMBYTE2
 213 | {
 214 | 	union
 215 | 	{
 216 | 		struct
 217 | 		{
 218 | 			int8_t x;
 219 | 			int8_t y;
 220 | 		};
 221 | 		uint16_t v;
 222 | 	};
 223 | 
 224 | 	XMBYTE2() XM_CTOR_DEFAULT
 225 | 		explicit XM_CONSTEXPR XMBYTE2(uint16_t Packed) : v(Packed) {}
 226 | 	XM_CONSTEXPR XMBYTE2(int8_t _x, int8_t _y) : x(_x), y(_y) {}
 227 | 	explicit XMBYTE2(_In_reads_(2) const int8_t *pArray) : x(pArray[0]), y(pArray[1]) {}
 228 | 	XMBYTE2(float _x, float _y);
 229 | 	explicit XMBYTE2(_In_reads_(2) const float *pArray);
 230 | 
 231 | 	XMBYTE2& operator= (const XMBYTE2& Byte2) { x = Byte2.x; y = Byte2.y; return *this; }
 232 | 	XMBYTE2& operator= (uint16_t Packed) { v = Packed; return *this; }
 233 | };
 234 | 
 235 | // 2D Vector; 8 bit unsigned normalized integer components
 236 | struct XMUBYTEN2
 237 | {
 238 | 	union
 239 | 	{
 240 | 		struct
 241 | 		{
 242 | 			uint8_t x;
 243 | 			uint8_t y;
 244 | 		};
 245 | 		uint16_t v;
 246 | 	};
 247 | 
 248 | 	XMUBYTEN2() XM_CTOR_DEFAULT
 249 | 		explicit XM_CONSTEXPR XMUBYTEN2(uint16_t Packed) : v(Packed) {}
 250 | 	XM_CONSTEXPR XMUBYTEN2(uint8_t _x, uint8_t _y) : x(_x), y(_y) {}
 251 | 	explicit XMUBYTEN2(_In_reads_(2) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]) {}
 252 | 	XMUBYTEN2(float _x, float _y);
 253 | 	explicit XMUBYTEN2(_In_reads_(2) const float *pArray);
 254 | 
 255 | 	XMUBYTEN2& operator= (const XMUBYTEN2& UByteN2) { x = UByteN2.x; y = UByteN2.y; return *this; }
 256 | 	XMUBYTEN2& operator= (uint16_t Packed) { v = Packed; return *this; }
 257 | };
 258 | 
 259 | // 2D Vector; 8 bit unsigned integer components
 260 | struct XMUBYTE2
 261 | {
 262 | 	union
 263 | 	{
 264 | 		struct
 265 | 		{
 266 | 			uint8_t x;
 267 | 			uint8_t y;
 268 | 		};
 269 | 		uint16_t v;
 270 | 	};
 271 | 
 272 | 	XMUBYTE2() XM_CTOR_DEFAULT
 273 | 		explicit XM_CONSTEXPR XMUBYTE2(uint16_t Packed) : v(Packed) {}
 274 | 	XM_CONSTEXPR XMUBYTE2(uint8_t _x, uint8_t _y) : x(_x), y(_y) {}
 275 | 	explicit XMUBYTE2(_In_reads_(2) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]) {}
 276 | 	XMUBYTE2(float _x, float _y);
 277 | 	explicit XMUBYTE2(_In_reads_(2) const float *pArray);
 278 | 
 279 | 	XMUBYTE2& operator= (const XMUBYTE2& UByte2) { x = UByte2.x; y = UByte2.y; return *this; }
 280 | 	XMUBYTE2& operator= (uint16_t Packed) { v = Packed; return *this; }
 281 | };
 282 | 
 283 | //------------------------------------------------------------------------------
 284 | // 3D vector: 5/6/5 unsigned integer components
 285 | struct XMU565
 286 | {
 287 | 	union
 288 | 	{
 289 | 		struct
 290 | 		{
 291 | 			uint16_t x : 5;    // 0 to 31
 292 | 			uint16_t y : 6;    // 0 to 63
 293 | 			uint16_t z : 5;    // 0 to 31
 294 | 		};
 295 | 		uint16_t v;
 296 | 	};
 297 | 
 298 | 	XMU565() XM_CTOR_DEFAULT
 299 | 		explicit XM_CONSTEXPR XMU565(uint16_t Packed) : v(Packed) {}
 300 | 	XM_CONSTEXPR XMU565(uint8_t _x, uint8_t _y, uint8_t _z) : x(_x), y(_y), z(_z) {}
 301 | 	explicit XMU565(_In_reads_(3) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {}
 302 | 	XMU565(float _x, float _y, float _z);
 303 | 	explicit XMU565(_In_reads_(3) const float *pArray);
 304 | 
 305 | 	operator uint16_t () const { return v; }
 306 | 
 307 | 	XMU565& operator= (const XMU565& U565) { v = U565.v; return *this; }
 308 | 	XMU565& operator= (uint16_t Packed) { v = Packed; return *this; }
 309 | };
 310 | 
 311 | //------------------------------------------------------------------------------
 312 | // 3D vector: 11/11/10 floating-point components
 313 | // The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent
 314 | // and 6-bit mantissa for x component, a 5-bit biased exponent and
 315 | // 6-bit mantissa for y component, a 5-bit biased exponent and a 5-bit
 316 | // mantissa for z. The z component is stored in the most significant bits
 317 | // and the x component in the least significant bits. No sign bits so
 318 | // all partial-precision numbers are positive.
 319 | // (Z10Y11X11): [32] ZZZZZzzz zzzYYYYY yyyyyyXX XXXxxxxx [0]
 320 | struct XMFLOAT3PK
 321 | {
 322 | 	union
 323 | 	{
 324 | 		struct
 325 | 		{
 326 | 			uint32_t xm : 6; // x-mantissa
 327 | 			uint32_t xe : 5; // x-exponent
 328 | 			uint32_t ym : 6; // y-mantissa
 329 | 			uint32_t ye : 5; // y-exponent
 330 | 			uint32_t zm : 5; // z-mantissa
 331 | 			uint32_t ze : 5; // z-exponent
 332 | 		};
 333 | 		uint32_t v;
 334 | 	};
 335 | 
 336 | 	XMFLOAT3PK() XM_CTOR_DEFAULT
 337 | 		explicit XM_CONSTEXPR XMFLOAT3PK(uint32_t Packed) : v(Packed) {}
 338 | 	XMFLOAT3PK(float _x, float _y, float _z);
 339 | 	explicit XMFLOAT3PK(_In_reads_(3) const float *pArray);
 340 | 
 341 | 	operator uint32_t () const { return v; }
 342 | 
 343 | 	XMFLOAT3PK& operator= (const XMFLOAT3PK& float3pk) { v = float3pk.v; return *this; }
 344 | 	XMFLOAT3PK& operator= (uint32_t Packed) { v = Packed; return *this; }
 345 | };
 346 | 
 347 | //------------------------------------------------------------------------------
 348 | // 3D vector: 9/9/9 floating-point components with shared 5-bit exponent
 349 | // The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent
 350 | // with 9-bit mantissa for the x, y, and z component. The shared exponent
 351 | // is stored in the most significant bits and the x component mantissa is in
 352 | // the least significant bits. No sign bits so all partial-precision numbers
 353 | // are positive.
 354 | // (E5Z9Y9X9): [32] EEEEEzzz zzzzzzyy yyyyyyyx xxxxxxxx [0]
 355 | struct XMFLOAT3SE
 356 | {
 357 | 	union
 358 | 	{
 359 | 		struct
 360 | 		{
 361 | 			uint32_t xm : 9; // x-mantissa
 362 | 			uint32_t ym : 9; // y-mantissa
 363 | 			uint32_t zm : 9; // z-mantissa
 364 | 			uint32_t e : 5; // shared exponent
 365 | 		};
 366 | 		uint32_t v;
 367 | 	};
 368 | 
 369 | 	XMFLOAT3SE() XM_CTOR_DEFAULT
 370 | 		explicit XM_CONSTEXPR XMFLOAT3SE(uint32_t Packed) : v(Packed) {}
 371 | 	XMFLOAT3SE(float _x, float _y, float _z);
 372 | 	explicit XMFLOAT3SE(_In_reads_(3) const float *pArray);
 373 | 
 374 | 	operator uint32_t () const { return v; }
 375 | 
 376 | 	XMFLOAT3SE& operator= (const XMFLOAT3SE& float3se) { v = float3se.v; return *this; }
 377 | 	XMFLOAT3SE& operator= (uint32_t Packed) { v = Packed; return *this; }
 378 | };
 379 | 
 380 | //------------------------------------------------------------------------------
 381 | // 4D Vector; 16 bit floating point components
 382 | struct XMHALF4
 383 | {
 384 | 	union
 385 | 	{
 386 | 		struct
 387 | 		{
 388 | 			HALF x;
 389 | 			HALF y;
 390 | 			HALF z;
 391 | 			HALF w;
 392 | 		};
 393 | 		uint64_t v;
 394 | 	};
 395 | 
 396 | 	XMHALF4() XM_CTOR_DEFAULT
 397 | 		explicit XM_CONSTEXPR XMHALF4(uint64_t Packed) : v(Packed) {}
 398 | 	XM_CONSTEXPR XMHALF4(HALF _x, HALF _y, HALF _z, HALF _w) : x(_x), y(_y), z(_z), w(_w) {}
 399 | 	explicit XMHALF4(_In_reads_(4) const HALF *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 400 | 	XMHALF4(float _x, float _y, float _z, float _w);
 401 | 	explicit XMHALF4(_In_reads_(4) const float *pArray);
 402 | 
 403 | 	XMHALF4& operator= (const XMHALF4& Half4) { x = Half4.x; y = Half4.y; z = Half4.z; w = Half4.w; return *this; }
 404 | 	XMHALF4& operator= (uint64_t Packed) { v = Packed; return *this; }
 405 | };
 406 | 
 407 | //------------------------------------------------------------------------------
 408 | // 4D Vector; 16 bit signed normalized integer components
 409 | struct XMSHORTN4
 410 | {
 411 | 	union
 412 | 	{
 413 | 		struct
 414 | 		{
 415 | 			int16_t x;
 416 | 			int16_t y;
 417 | 			int16_t z;
 418 | 			int16_t w;
 419 | 		};
 420 | 		uint64_t v;
 421 | 	};
 422 | 
 423 | 	XMSHORTN4() XM_CTOR_DEFAULT
 424 | 		explicit XM_CONSTEXPR XMSHORTN4(uint64_t Packed) : v(Packed) {}
 425 | 	XM_CONSTEXPR XMSHORTN4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) : x(_x), y(_y), z(_z), w(_w) {}
 426 | 	explicit XMSHORTN4(_In_reads_(4) const int16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 427 | 	XMSHORTN4(float _x, float _y, float _z, float _w);
 428 | 	explicit XMSHORTN4(_In_reads_(4) const float *pArray);
 429 | 
 430 | 	XMSHORTN4& operator= (const XMSHORTN4& ShortN4) { x = ShortN4.x; y = ShortN4.y; z = ShortN4.z; w = ShortN4.w; return *this; }
 431 | 	XMSHORTN4& operator= (uint64_t Packed) { v = Packed; return *this; }
 432 | };
 433 | 
 434 | // 4D Vector; 16 bit signed integer components
 435 | struct XMSHORT4
 436 | {
 437 | 	union
 438 | 	{
 439 | 		struct
 440 | 		{
 441 | 			int16_t x;
 442 | 			int16_t y;
 443 | 			int16_t z;
 444 | 			int16_t w;
 445 | 		};
 446 | 		uint64_t v;
 447 | 	};
 448 | 
 449 | 	XMSHORT4() XM_CTOR_DEFAULT
 450 | 		explicit XM_CONSTEXPR XMSHORT4(uint64_t Packed) : v(Packed) {}
 451 | 	XM_CONSTEXPR XMSHORT4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) : x(_x), y(_y), z(_z), w(_w) {}
 452 | 	explicit XMSHORT4(_In_reads_(4) const int16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 453 | 	XMSHORT4(float _x, float _y, float _z, float _w);
 454 | 	explicit XMSHORT4(_In_reads_(4) const float *pArray);
 455 | 
 456 | 	XMSHORT4& operator= (const XMSHORT4& Short4) { x = Short4.x; y = Short4.y; z = Short4.z; w = Short4.w; return *this; }
 457 | 	XMSHORT4& operator= (uint64_t Packed) { v = Packed; return *this; }
 458 | };
 459 | 
 460 | // 4D Vector; 16 bit unsigned normalized integer components
 461 | struct XMUSHORTN4
 462 | {
 463 | 	union
 464 | 	{
 465 | 		struct
 466 | 		{
 467 | 			uint16_t x;
 468 | 			uint16_t y;
 469 | 			uint16_t z;
 470 | 			uint16_t w;
 471 | 		};
 472 | 		uint64_t v;
 473 | 	};
 474 | 
 475 | 	XMUSHORTN4() XM_CTOR_DEFAULT
 476 | 		explicit XM_CONSTEXPR XMUSHORTN4(uint64_t Packed) : v(Packed) {}
 477 | 	XM_CONSTEXPR XMUSHORTN4(uint16_t _x, uint16_t _y, uint16_t _z, uint16_t _w) : x(_x), y(_y), z(_z), w(_w) {}
 478 | 	explicit XMUSHORTN4(_In_reads_(4) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 479 | 	XMUSHORTN4(float _x, float _y, float _z, float _w);
 480 | 	explicit XMUSHORTN4(_In_reads_(4) const float *pArray);
 481 | 
 482 | 	XMUSHORTN4& operator= (const XMUSHORTN4& UShortN4) { x = UShortN4.x; y = UShortN4.y; z = UShortN4.z; w = UShortN4.w; return *this; }
 483 | 	XMUSHORTN4& operator= (uint64_t Packed) { v = Packed; return *this; }
 484 | };
 485 | 
 486 | // 4D Vector; 16 bit unsigned integer components
 487 | struct XMUSHORT4
 488 | {
 489 | 	union
 490 | 	{
 491 | 		struct
 492 | 		{
 493 | 			uint16_t x;
 494 | 			uint16_t y;
 495 | 			uint16_t z;
 496 | 			uint16_t w;
 497 | 		};
 498 | 		uint64_t v;
 499 | 	};
 500 | 
 501 | 	XMUSHORT4() XM_CTOR_DEFAULT
 502 | 		explicit XM_CONSTEXPR XMUSHORT4(uint64_t Packed) : v(Packed) {}
 503 | 	XM_CONSTEXPR XMUSHORT4(uint16_t _x, uint16_t _y, uint16_t _z, uint16_t _w) : x(_x), y(_y), z(_z), w(_w) {}
 504 | 	explicit XMUSHORT4(_In_reads_(4) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 505 | 	XMUSHORT4(float _x, float _y, float _z, float _w);
 506 | 	explicit XMUSHORT4(_In_reads_(4) const float *pArray);
 507 | 
 508 | 	XMUSHORT4& operator= (const XMUSHORT4& UShort4) { x = UShort4.x; y = UShort4.y; z = UShort4.z; w = UShort4.w; return *this; }
 509 | 	XMUSHORT4& operator= (uint32_t Packed) { v = Packed; return *this; }
 510 | };
 511 | 
 512 | //------------------------------------------------------------------------------
 513 | // 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer
 514 | // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, 
 515 | // normalized integer for the w component and 10 bit signed, normalized 
 516 | // integers for the z, y, and x components.  The w component is stored in the 
 517 | // most significant bits and the x component in the least significant bits
 518 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
 519 | struct XMXDECN4
 520 | {
 521 | 	union
 522 | 	{
 523 | 		struct
 524 | 		{
 525 | 			int32_t x : 10;    // -511/511 to 511/511
 526 | 			int32_t y : 10;    // -511/511 to 511/511
 527 | 			int32_t z : 10;    // -511/511 to 511/511
 528 | 			uint32_t w : 2;     //      0/3 to     3/3
 529 | 		};
 530 | 		uint32_t v;
 531 | 	};
 532 | 
 533 | 	XMXDECN4() XM_CTOR_DEFAULT
 534 | 		explicit XM_CONSTEXPR XMXDECN4(uint32_t Packed) : v(Packed) {}
 535 | 	XMXDECN4(float _x, float _y, float _z, float _w);
 536 | 	explicit XMXDECN4(_In_reads_(4) const float *pArray);
 537 | 
 538 | 	operator uint32_t () const { return v; }
 539 | 
 540 | 	XMXDECN4& operator= (const XMXDECN4& XDecN4) { v = XDecN4.v; return *this; }
 541 | 	XMXDECN4& operator= (uint32_t Packed) { v = Packed; return *this; }
 542 | };
 543 | 
 544 | // 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer
 545 | // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned
 546 | // integer for the w component and 10 bit signed integers for the 
 547 | // z, y, and x components.  The w component is stored in the 
 548 | // most significant bits and the x component in the least significant bits
 549 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
 550 | struct XM_DEPRECATED XMXDEC4
 551 | {
 552 | 	union
 553 | 	{
 554 | 		struct
 555 | 		{
 556 | 			int32_t x : 10;    // -511 to 511
 557 | 			int32_t y : 10;    // -511 to 511
 558 | 			int32_t z : 10;    // -511 to 511
 559 | 			uint32_t w : 2;     // 0 to 3
 560 | 		};
 561 | 		uint32_t v;
 562 | 	};
 563 | 
 564 | 	XMXDEC4() XM_CTOR_DEFAULT
 565 | 		explicit XM_CONSTEXPR XMXDEC4(uint32_t Packed) : v(Packed) {}
 566 | 	XMXDEC4(float _x, float _y, float _z, float _w);
 567 | 	explicit XMXDEC4(_In_reads_(4) const float *pArray);
 568 | 
 569 | 	operator uint32_t () const { return v; }
 570 | 
 571 | 	XMXDEC4& operator= (const XMXDEC4& XDec4) { v = XDec4.v; return *this; }
 572 | 	XMXDEC4& operator= (uint32_t Packed) { v = Packed; return *this; }
 573 | };
 574 | 
 575 | // 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer
 576 | // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit signed, 
 577 | // normalized integer for the w component and 10 bit signed, normalized 
 578 | // integers for the z, y, and x components.  The w component is stored in the 
 579 | // most significant bits and the x component in the least significant bits
 580 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
 581 | struct XM_DEPRECATED XMDECN4
 582 | {
 583 | 	union
 584 | 	{
 585 | 		struct
 586 | 		{
 587 | 			int32_t x : 10;    // -511/511 to 511/511
 588 | 			int32_t y : 10;    // -511/511 to 511/511
 589 | 			int32_t z : 10;    // -511/511 to 511/511
 590 | 			int32_t w : 2;     //     -1/1 to     1/1
 591 | 		};
 592 | 		uint32_t v;
 593 | 	};
 594 | 
 595 | 	XMDECN4() XM_CTOR_DEFAULT
 596 | 		explicit XM_CONSTEXPR XMDECN4(uint32_t Packed) : v(Packed) {}
 597 | 	XMDECN4(float _x, float _y, float _z, float _w);
 598 | 	explicit XMDECN4(_In_reads_(4) const float *pArray);
 599 | 
 600 | 	operator uint32_t () const { return v; }
 601 | 
 602 | 	XMDECN4& operator= (const XMDECN4& DecN4) { v = DecN4.v; return *this; }
 603 | 	XMDECN4& operator= (uint32_t Packed) { v = Packed; return *this; }
 604 | };
 605 | 
 606 | // 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer
 607 | // The 4D Vector is packed into 32 bits as follows: a 2 bit signed, 
 608 | // integer for the w component and 10 bit signed integers for the 
 609 | // z, y, and x components.  The w component is stored in the 
 610 | // most significant bits and the x component in the least significant bits
 611 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
 612 | struct XM_DEPRECATED XMDEC4
 613 | {
 614 | 	union
 615 | 	{
 616 | 		struct
 617 | 		{
 618 | 			int32_t  x : 10;    // -511 to 511
 619 | 			int32_t  y : 10;    // -511 to 511
 620 | 			int32_t  z : 10;    // -511 to 511
 621 | 			int32_t  w : 2;     //   -1 to   1
 622 | 		};
 623 | 		uint32_t v;
 624 | 	};
 625 | 
 626 | 	XMDEC4() XM_CTOR_DEFAULT
 627 | 		explicit XM_CONSTEXPR XMDEC4(uint32_t Packed) : v(Packed) {}
 628 | 	XMDEC4(float _x, float _y, float _z, float _w);
 629 | 	explicit XMDEC4(_In_reads_(4) const float *pArray);
 630 | 
 631 | 	operator uint32_t () const { return v; }
 632 | 
 633 | 	XMDEC4& operator= (const XMDEC4& Dec4) { v = Dec4.v; return *this; }
 634 | 	XMDEC4& operator= (uint32_t Packed) { v = Packed; return *this; }
 635 | };
 636 | 
 637 | // 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer
 638 | // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, 
 639 | // normalized integer for the w component and 10 bit unsigned, normalized 
 640 | // integers for the z, y, and x components.  The w component is stored in the 
 641 | // most significant bits and the x component in the least significant bits
 642 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
 643 | struct XMUDECN4
 644 | {
 645 | 	union
 646 | 	{
 647 | 		struct
 648 | 		{
 649 | 			uint32_t x : 10;    // 0/1023 to 1023/1023
 650 | 			uint32_t y : 10;    // 0/1023 to 1023/1023
 651 | 			uint32_t z : 10;    // 0/1023 to 1023/1023
 652 | 			uint32_t w : 2;     //    0/3 to       3/3
 653 | 		};
 654 | 		uint32_t v;
 655 | 	};
 656 | 
 657 | 	XMUDECN4() XM_CTOR_DEFAULT
 658 | 		explicit XM_CONSTEXPR XMUDECN4(uint32_t Packed) : v(Packed) {}
 659 | 	XMUDECN4(float _x, float _y, float _z, float _w);
 660 | 	explicit XMUDECN4(_In_reads_(4) const float *pArray);
 661 | 
 662 | 	operator uint32_t () const { return v; }
 663 | 
 664 | 	XMUDECN4& operator= (const XMUDECN4& UDecN4) { v = UDecN4.v; return *this; }
 665 | 	XMUDECN4& operator= (uint32_t Packed) { v = Packed; return *this; }
 666 | };
 667 | 
 668 | // 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer
 669 | // The 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, 
 670 | // integer for the w component and 10 bit unsigned integers 
 671 | // for the z, y, and x components.  The w component is stored in the 
 672 | // most significant bits and the x component in the least significant bits
 673 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
 674 | struct XMUDEC4
 675 | {
 676 | 	union
 677 | 	{
 678 | 		struct
 679 | 		{
 680 | 			uint32_t x : 10;    // 0 to 1023
 681 | 			uint32_t y : 10;    // 0 to 1023
 682 | 			uint32_t z : 10;    // 0 to 1023
 683 | 			uint32_t w : 2;     // 0 to    3
 684 | 		};
 685 | 		uint32_t v;
 686 | 	};
 687 | 
 688 | 	XMUDEC4() XM_CTOR_DEFAULT
 689 | 		explicit XM_CONSTEXPR XMUDEC4(uint32_t Packed) : v(Packed) {}
 690 | 	XMUDEC4(float _x, float _y, float _z, float _w);
 691 | 	explicit XMUDEC4(_In_reads_(4) const float *pArray);
 692 | 
 693 | 	operator uint32_t () const { return v; }
 694 | 
 695 | 	XMUDEC4& operator= (const XMUDEC4& UDec4) { v = UDec4.v; return *this; }
 696 | 	XMUDEC4& operator= (uint32_t Packed) { v = Packed; return *this; }
 697 | };
 698 | 
 699 | //------------------------------------------------------------------------------
 700 | // 4D Vector; 8 bit signed normalized integer components
 701 | struct XMBYTEN4
 702 | {
 703 | 	union
 704 | 	{
 705 | 		struct
 706 | 		{
 707 | 			int8_t x;
 708 | 			int8_t y;
 709 | 			int8_t z;
 710 | 			int8_t w;
 711 | 		};
 712 | 		uint32_t v;
 713 | 	};
 714 | 
 715 | 	XMBYTEN4() XM_CTOR_DEFAULT
 716 | 		XM_CONSTEXPR XMBYTEN4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
 717 | 	explicit XM_CONSTEXPR XMBYTEN4(uint32_t Packed) : v(Packed) {}
 718 | 	explicit XMBYTEN4(_In_reads_(4) const int8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 719 | 	XMBYTEN4(float _x, float _y, float _z, float _w);
 720 | 	explicit XMBYTEN4(_In_reads_(4) const float *pArray);
 721 | 
 722 | 	XMBYTEN4& operator= (const XMBYTEN4& ByteN4) { x = ByteN4.x; y = ByteN4.y; z = ByteN4.z; w = ByteN4.w; return *this; }
 723 | 	XMBYTEN4& operator= (uint32_t Packed) { v = Packed; return *this; }
 724 | };
 725 | 
 726 | // 4D Vector; 8 bit signed integer components
 727 | struct XMBYTE4
 728 | {
 729 | 	union
 730 | 	{
 731 | 		struct
 732 | 		{
 733 | 			int8_t x;
 734 | 			int8_t y;
 735 | 			int8_t z;
 736 | 			int8_t w;
 737 | 		};
 738 | 		uint32_t v;
 739 | 	};
 740 | 
 741 | 	XMBYTE4() XM_CTOR_DEFAULT
 742 | 		XM_CONSTEXPR XMBYTE4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
 743 | 	explicit XM_CONSTEXPR XMBYTE4(uint32_t Packed) : v(Packed) {}
 744 | 	explicit XMBYTE4(_In_reads_(4) const int8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 745 | 	XMBYTE4(float _x, float _y, float _z, float _w);
 746 | 	explicit XMBYTE4(_In_reads_(4) const float *pArray);
 747 | 
 748 | 	XMBYTE4& operator= (const XMBYTE4& Byte4) { x = Byte4.x; y = Byte4.y; z = Byte4.z; w = Byte4.w; return *this; }
 749 | 	XMBYTE4& operator= (uint32_t Packed) { v = Packed; return *this; }
 750 | };
 751 | 
 752 | // 4D Vector; 8 bit unsigned normalized integer components
 753 | struct XMUBYTEN4
 754 | {
 755 | 	union
 756 | 	{
 757 | 		struct
 758 | 		{
 759 | 			uint8_t x;
 760 | 			uint8_t y;
 761 | 			uint8_t z;
 762 | 			uint8_t w;
 763 | 		};
 764 | 		uint32_t v;
 765 | 	};
 766 | 
 767 | 	XMUBYTEN4() XM_CTOR_DEFAULT
 768 | 		XM_CONSTEXPR XMUBYTEN4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
 769 | 	explicit XM_CONSTEXPR XMUBYTEN4(uint32_t Packed) : v(Packed) {}
 770 | 	explicit XMUBYTEN4(_In_reads_(4) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 771 | 	XMUBYTEN4(float _x, float _y, float _z, float _w);
 772 | 	explicit XMUBYTEN4(_In_reads_(4) const float *pArray);
 773 | 
 774 | 	XMUBYTEN4& operator= (const XMUBYTEN4& UByteN4) { x = UByteN4.x; y = UByteN4.y; z = UByteN4.z; w = UByteN4.w; return *this; }
 775 | 	XMUBYTEN4& operator= (uint32_t Packed) { v = Packed; return *this; }
 776 | };
 777 | 
 778 | // 4D Vector; 8 bit unsigned integer components
 779 | struct XMUBYTE4
 780 | {
 781 | 	union
 782 | 	{
 783 | 		struct
 784 | 		{
 785 | 			uint8_t x;
 786 | 			uint8_t y;
 787 | 			uint8_t z;
 788 | 			uint8_t w;
 789 | 		};
 790 | 		uint32_t v;
 791 | 	};
 792 | 
 793 | 	XMUBYTE4() XM_CTOR_DEFAULT
 794 | 		XM_CONSTEXPR XMUBYTE4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
 795 | 	explicit XM_CONSTEXPR XMUBYTE4(uint32_t Packed) : v(Packed) {}
 796 | 	explicit XMUBYTE4(_In_reads_(4) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 797 | 	XMUBYTE4(float _x, float _y, float _z, float _w);
 798 | 	explicit XMUBYTE4(_In_reads_(4) const float *pArray);
 799 | 
 800 | 	XMUBYTE4& operator= (const XMUBYTE4& UByte4) { x = UByte4.x; y = UByte4.y; z = UByte4.z; w = UByte4.w; return *this; }
 801 | 	XMUBYTE4& operator= (uint32_t Packed) { v = Packed; return *this; }
 802 | };
 803 | 
 804 | //------------------------------------------------------------------------------
 805 | // 4D vector; 4 bit unsigned integer components
 806 | struct XMUNIBBLE4
 807 | {
 808 | 	union
 809 | 	{
 810 | 		struct
 811 | 		{
 812 | 			uint16_t x : 4;    // 0 to 15
 813 | 			uint16_t y : 4;    // 0 to 15
 814 | 			uint16_t z : 4;    // 0 to 15
 815 | 			uint16_t w : 4;    // 0 to 15
 816 | 		};
 817 | 		uint16_t v;
 818 | 	};
 819 | 
 820 | 	XMUNIBBLE4() XM_CTOR_DEFAULT
 821 | 		explicit XM_CONSTEXPR XMUNIBBLE4(uint16_t Packed) : v(Packed) {}
 822 | 	XM_CONSTEXPR XMUNIBBLE4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
 823 | 	explicit XMUNIBBLE4(_In_reads_(4) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
 824 | 	XMUNIBBLE4(float _x, float _y, float _z, float _w);
 825 | 	explicit XMUNIBBLE4(_In_reads_(4) const float *pArray);
 826 | 
 827 | 	operator uint16_t () const { return v; }
 828 | 
 829 | 	XMUNIBBLE4& operator= (const XMUNIBBLE4& UNibble4) { v = UNibble4.v; return *this; }
 830 | 	XMUNIBBLE4& operator= (uint16_t Packed) { v = Packed; return *this; }
 831 | };
 832 | 
 833 | //------------------------------------------------------------------------------
 834 | // 4D vector: 5/5/5/1 unsigned integer components
 835 | struct XMU555
 836 | {
 837 | 	union
 838 | 	{
 839 | 		struct
 840 | 		{
 841 | 			uint16_t x : 5;    // 0 to 31
 842 | 			uint16_t y : 5;    // 0 to 31
 843 | 			uint16_t z : 5;    // 0 to 31
 844 | 			uint16_t w : 1;    // 0 or 1
 845 | 		};
 846 | 		uint16_t v;
 847 | 	};
 848 | 
 849 | 	XMU555() XM_CTOR_DEFAULT
 850 | 		explicit XM_CONSTEXPR XMU555(uint16_t Packed) : v(Packed) {}
 851 | 	XM_CONSTEXPR XMU555(uint8_t _x, uint8_t _y, uint8_t _z, bool _w) : x(_x), y(_y), z(_z), w(_w ? 0x1 : 0) {}
 852 | 	XMU555(_In_reads_(3) const uint8_t *pArray, _In_ bool _w) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(_w ? 0x1 : 0) {}
 853 | 	XMU555(float _x, float _y, float _z, bool _w);
 854 | 	XMU555(_In_reads_(3) const float *pArray, _In_ bool _w);
 855 | 
 856 | 	operator uint16_t () const { return v; }
 857 | 
 858 | 	XMU555& operator= (const XMU555& U555) { v = U555.v; return *this; }
 859 | 	XMU555& operator= (uint16_t Packed) { v = Packed; return *this; }
 860 | };
 861 | 
 862 | #ifdef _MSC_VER
 863 | #	pragma warning(pop)
 864 | #endif
 865 | 
 866 | /****************************************************************************
 867 | *
 868 | * Data conversion operations
 869 | *
 870 | ****************************************************************************/
 871 | 
 872 | float           XMConvertHalfToFloat(HALF Value);
 873 | float*          XMConvertHalfToFloatStream(_Out_writes_bytes_(sizeof(float) + OutputStride*(HalfCount - 1)) float* pOutputStream,
 874 | 	_In_ size_t OutputStride,
 875 | 	_In_reads_bytes_(sizeof(HALF) + InputStride*(HalfCount - 1)) const HALF* pInputStream,
 876 | 	_In_ size_t InputStride, _In_ size_t HalfCount);
 877 | HALF            XMConvertFloatToHalf(float Value);
 878 | HALF*           XMConvertFloatToHalfStream(_Out_writes_bytes_(sizeof(HALF) + OutputStride*(FloatCount - 1)) HALF* pOutputStream,
 879 | 	_In_ size_t OutputStride,
 880 | 	_In_reads_bytes_(sizeof(float) + InputStride*(FloatCount - 1)) const float* pInputStream,
 881 | 	_In_ size_t InputStride, _In_ size_t FloatCount);
 882 | 
 883 | /****************************************************************************
 884 | *
 885 | * Load operations
 886 | *
 887 | ****************************************************************************/
 888 | 
 889 | XMVECTOR    XM_CALLCONV     XMLoadColor(_In_ const XMCOLOR* pSource);
 890 | 
 891 | XMVECTOR    XM_CALLCONV     XMLoadHalf2(_In_ const XMHALF2* pSource);
 892 | XMVECTOR    XM_CALLCONV     XMLoadShortN2(_In_ const XMSHORTN2* pSource);
 893 | XMVECTOR    XM_CALLCONV     XMLoadShort2(_In_ const XMSHORT2* pSource);
 894 | XMVECTOR    XM_CALLCONV     XMLoadUShortN2(_In_ const XMUSHORTN2* pSource);
 895 | XMVECTOR    XM_CALLCONV     XMLoadUShort2(_In_ const XMUSHORT2* pSource);
 896 | XMVECTOR    XM_CALLCONV     XMLoadByteN2(_In_ const XMBYTEN2* pSource);
 897 | XMVECTOR    XM_CALLCONV     XMLoadByte2(_In_ const XMBYTE2* pSource);
 898 | XMVECTOR    XM_CALLCONV     XMLoadUByteN2(_In_ const XMUBYTEN2* pSource);
 899 | XMVECTOR    XM_CALLCONV     XMLoadUByte2(_In_ const XMUBYTE2* pSource);
 900 | 
 901 | XMVECTOR    XM_CALLCONV     XMLoadU565(_In_ const XMU565* pSource);
 902 | XMVECTOR    XM_CALLCONV     XMLoadFloat3PK(_In_ const XMFLOAT3PK* pSource);
 903 | XMVECTOR    XM_CALLCONV     XMLoadFloat3SE(_In_ const XMFLOAT3SE* pSource);
 904 | 
 905 | XMVECTOR    XM_CALLCONV     XMLoadHalf4(_In_ const XMHALF4* pSource);
 906 | XMVECTOR    XM_CALLCONV     XMLoadShortN4(_In_ const XMSHORTN4* pSource);
 907 | XMVECTOR    XM_CALLCONV     XMLoadShort4(_In_ const XMSHORT4* pSource);
 908 | XMVECTOR    XM_CALLCONV     XMLoadUShortN4(_In_ const XMUSHORTN4* pSource);
 909 | XMVECTOR    XM_CALLCONV     XMLoadUShort4(_In_ const XMUSHORT4* pSource);
 910 | XMVECTOR    XM_CALLCONV     XMLoadXDecN4(_In_ const XMXDECN4* pSource);
 911 | XMVECTOR    XM_CALLCONV     XMLoadUDecN4(_In_ const XMUDECN4* pSource);
 912 | XMVECTOR    XM_CALLCONV     XMLoadUDecN4_XR(_In_ const XMUDECN4* pSource);
 913 | XMVECTOR    XM_CALLCONV     XMLoadUDec4(_In_ const XMUDEC4* pSource);
 914 | XMVECTOR    XM_CALLCONV     XMLoadByteN4(_In_ const XMBYTEN4* pSource);
 915 | XMVECTOR    XM_CALLCONV     XMLoadByte4(_In_ const XMBYTE4* pSource);
 916 | XMVECTOR    XM_CALLCONV     XMLoadUByteN4(_In_ const XMUBYTEN4* pSource);
 917 | XMVECTOR    XM_CALLCONV     XMLoadUByte4(_In_ const XMUBYTE4* pSource);
 918 | XMVECTOR    XM_CALLCONV     XMLoadUNibble4(_In_ const XMUNIBBLE4* pSource);
 919 | XMVECTOR    XM_CALLCONV     XMLoadU555(_In_ const XMU555* pSource);
 920 | 
 921 | #ifdef _MSC_VER
 922 | #	pragma warning(push)
 923 | #	pragma warning(disable : 4996)
 924 | //	C4996: ignore deprecation warning
 925 | #endif
 926 | 
 927 | XMVECTOR    XM_DEPRECATED XM_CALLCONV XMLoadDecN4(_In_ const XMDECN4* pSource);
 928 | XMVECTOR    XM_DEPRECATED XM_CALLCONV XMLoadDec4(_In_ const XMDEC4* pSource);
 929 | XMVECTOR    XM_DEPRECATED XM_CALLCONV XMLoadXDec4(_In_ const XMXDEC4* pSource);
 930 | 
 931 | #ifdef _MSC_VER
 932 | #	pragma warning(pop)
 933 | #endif
 934 | 
 935 | /****************************************************************************
 936 | *
 937 | * Store operations
 938 | *
 939 | ****************************************************************************/
 940 | 
 941 | void    XM_CALLCONV     XMStoreColor(_Out_ XMCOLOR* pDestination, _In_ FXMVECTOR V);
 942 | 
 943 | void    XM_CALLCONV     XMStoreHalf2(_Out_ XMHALF2* pDestination, _In_ FXMVECTOR V);
 944 | void    XM_CALLCONV     XMStoreShortN2(_Out_ XMSHORTN2* pDestination, _In_ FXMVECTOR V);
 945 | void    XM_CALLCONV     XMStoreShort2(_Out_ XMSHORT2* pDestination, _In_ FXMVECTOR V);
 946 | void    XM_CALLCONV     XMStoreUShortN2(_Out_ XMUSHORTN2* pDestination, _In_ FXMVECTOR V);
 947 | void    XM_CALLCONV     XMStoreUShort2(_Out_ XMUSHORT2* pDestination, _In_ FXMVECTOR V);
 948 | void    XM_CALLCONV     XMStoreByteN2(_Out_ XMBYTEN2* pDestination, _In_ FXMVECTOR V);
 949 | void    XM_CALLCONV     XMStoreByte2(_Out_ XMBYTE2* pDestination, _In_ FXMVECTOR V);
 950 | void    XM_CALLCONV     XMStoreUByteN2(_Out_ XMUBYTEN2* pDestination, _In_ FXMVECTOR V);
 951 | void    XM_CALLCONV     XMStoreUByte2(_Out_ XMUBYTE2* pDestination, _In_ FXMVECTOR V);
 952 | 
 953 | void    XM_CALLCONV     XMStoreU565(_Out_ XMU565* pDestination, _In_ FXMVECTOR V);
 954 | void    XM_CALLCONV     XMStoreFloat3PK(_Out_ XMFLOAT3PK* pDestination, _In_ FXMVECTOR V);
 955 | void    XM_CALLCONV     XMStoreFloat3SE(_Out_ XMFLOAT3SE* pDestination, _In_ FXMVECTOR V);
 956 | 
 957 | void    XM_CALLCONV     XMStoreHalf4(_Out_ XMHALF4* pDestination, _In_ FXMVECTOR V);
 958 | void    XM_CALLCONV     XMStoreShortN4(_Out_ XMSHORTN4* pDestination, _In_ FXMVECTOR V);
 959 | void    XM_CALLCONV     XMStoreShort4(_Out_ XMSHORT4* pDestination, _In_ FXMVECTOR V);
 960 | void    XM_CALLCONV     XMStoreUShortN4(_Out_ XMUSHORTN4* pDestination, _In_ FXMVECTOR V);
 961 | void    XM_CALLCONV     XMStoreUShort4(_Out_ XMUSHORT4* pDestination, _In_ FXMVECTOR V);
 962 | void    XM_CALLCONV     XMStoreXDecN4(_Out_ XMXDECN4* pDestination, _In_ FXMVECTOR V);
 963 | void    XM_CALLCONV     XMStoreUDecN4(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V);
 964 | void    XM_CALLCONV     XMStoreUDecN4_XR(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V);
 965 | void    XM_CALLCONV     XMStoreUDec4(_Out_ XMUDEC4* pDestination, _In_ FXMVECTOR V);
 966 | void    XM_CALLCONV     XMStoreByteN4(_Out_ XMBYTEN4* pDestination, _In_ FXMVECTOR V);
 967 | void    XM_CALLCONV     XMStoreByte4(_Out_ XMBYTE4* pDestination, _In_ FXMVECTOR V);
 968 | void    XM_CALLCONV     XMStoreUByteN4(_Out_ XMUBYTEN4* pDestination, _In_ FXMVECTOR V);
 969 | void    XM_CALLCONV     XMStoreUByte4(_Out_ XMUBYTE4* pDestination, _In_ FXMVECTOR V);
 970 | void    XM_CALLCONV     XMStoreUNibble4(_Out_ XMUNIBBLE4* pDestination, _In_ FXMVECTOR V);
 971 | void    XM_CALLCONV     XMStoreU555(_Out_ XMU555* pDestination, _In_ FXMVECTOR V);
 972 | 
 973 | #ifdef _MSC_VER
 974 | #	pragma warning(push)
 975 | #	pragma warning(disable : 4996)
 976 | //	C4996: ignore deprecation warning
 977 | #endif
 978 | 
 979 | void    XM_DEPRECATED XM_CALLCONV XMStoreDecN4(_Out_ XMDECN4* pDestination, _In_ FXMVECTOR V);
 980 | void    XM_DEPRECATED XM_CALLCONV XMStoreDec4(_Out_ XMDEC4* pDestination, _In_ FXMVECTOR V);
 981 | void    XM_DEPRECATED XM_CALLCONV XMStoreXDec4(_Out_ XMXDEC4* pDestination, _In_ FXMVECTOR V);
 982 | 
 983 | #ifdef _MSC_VER
 984 | #	pragma warning(pop)
 985 | #endif
 986 | 
 987 | /****************************************************************************
 988 | *
 989 | * Implementation
 990 | *
 991 | ****************************************************************************/
 992 | 
 993 | #ifdef _MSC_VER
 994 | #	pragma warning(push)
 995 | #	pragma warning(disable:4068 4214 4204 4365 4616 6001 6101)
 996 | //	C4068/4616: ignore unknown pragmas
 997 | //	C4214/4204: nonstandard extension used
 998 | //	C4365: Off by default noise
 999 | //	C6001/6101: False positives
1000 | #	pragma prefast(push)
1001 | #	pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
1002 | #endif
1003 | 
1004 | #include "XPackedVector.inl"
1005 | 
1006 | #ifdef _MSC_VER
1007 | #	pragma prefast(pop)
1008 | #	pragma warning(pop)
1009 | #endif
1010 | 
1011 | }; // namespace XMath
1012 | 


--------------------------------------------------------------------------------
/Inc/XMathConvert.inl:
--------------------------------------------------------------------------------
   1 | //-------------------------------------------------------------------------------------
   2 | // XMathConvert.inl -- SIMD C++ Math library
   3 | //
   4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
   5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
   6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
   7 | // PARTICULAR PURPOSE.
   8 | //  
   9 | // Copyright (c) Microsoft Corporation. All rights reserved.
  10 | //-------------------------------------------------------------------------------------
  11 | 
  12 | #pragma once
  13 | 
  14 | /****************************************************************************
  15 | *
  16 | * Data conversion
  17 | *
  18 | ****************************************************************************/
  19 | 
  20 | //------------------------------------------------------------------------------
  21 | 
  22 | #ifdef _MSC_VER
  23 | #	pragma warning(push)
  24 | #	pragma warning(disable:4701)
  25 | //	C4701: false positives
  26 | #endif
  27 | 
  28 | inline XMVECTOR XM_CALLCONV XMConvertVectorIntToFloat
  29 | (
  30 | 	FXMVECTOR    VInt,
  31 | 	uint32_t     DivExponent
  32 | )
  33 | {
  34 | 	assert(DivExponent < 32);
  35 | #if defined(_XM_NO_INTRINSICS_)
  36 | 	float fScale = 1.0f / (float)(1U << DivExponent);
  37 | 	uint32_t ElementIndex = 0;
  38 | 	XMVECTOR Result;
  39 | 	do {
  40 | 		int32_t iTemp = (int32_t)VInt.vector4_u32[ElementIndex];
  41 | 		Result.vector4_f32[ElementIndex] = ((float)iTemp) * fScale;
  42 | 	} while (++ElementIndex < 4);
  43 | 	return Result;
  44 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
  45 | 	float fScale = 1.0f / (float)(1U << DivExponent);
  46 | 	float32x4_t vResult = vcvtq_f32_s32(VInt);
  47 | 	return vmulq_n_f32(vResult, fScale);
  48 | #else // _XM_SSE_INTRINSICS_
  49 | 	// Convert to floats
  50 | 	XMVECTOR vResult = _mm_cvtepi32_ps(_mm_castps_si128(VInt));
  51 | 	// Convert DivExponent into 1.0f/(1<<DivExponent)
  52 | 	uint32_t uScale = 0x3F800000U - (DivExponent << 23);
  53 | 	// Splat the scalar value
  54 | 	__m128i vScale = _mm_set1_epi32(uScale);
  55 | 	vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(vScale));
  56 | 	return vResult;
  57 | #endif
  58 | }
  59 | 
  60 | //------------------------------------------------------------------------------
  61 | 
  62 | inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToInt
  63 | (
  64 | 	FXMVECTOR    VFloat,
  65 | 	uint32_t     MulExponent
  66 | )
  67 | {
  68 | 	assert(MulExponent < 32);
  69 | #if defined(_XM_NO_INTRINSICS_)
  70 | 	// Get the scalar factor.
  71 | 	float fScale = (float)(1U << MulExponent);
  72 | 	uint32_t ElementIndex = 0;
  73 | 	XMVECTOR Result;
  74 | 	do {
  75 | 		int32_t iResult;
  76 | 		float fTemp = VFloat.vector4_f32[ElementIndex] * fScale;
  77 | 		if (fTemp <= -(65536.0f*32768.0f)) {
  78 | 			iResult = (-0x7FFFFFFF) - 1;
  79 | 		}
  80 | 		else if (fTemp > (65536.0f*32768.0f) - 128.0f) {
  81 | 			iResult = 0x7FFFFFFF;
  82 | 		}
  83 | 		else {
  84 | 			iResult = (int32_t)fTemp;
  85 | 		}
  86 | 		Result.vector4_u32[ElementIndex] = (uint32_t)iResult;
  87 | 	} while (++ElementIndex < 4);
  88 | 	return Result;
  89 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
  90 | 	float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent));
  91 | 	// In case of positive overflow, detect it
  92 | 	uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxInt);
  93 | 	// Float to int conversion
  94 | 	int32x4_t vResulti = vcvtq_s32_f32(vResult);
  95 | 	// If there was positive overflow, set to 0x7FFFFFFF
  96 | 	vResult = vandq_u32(vOverflow, g_XMAbsMask);
  97 | 	vOverflow = vbicq_u32(vResulti, vOverflow);
  98 | 	vOverflow = vorrq_u32(vOverflow, vResult);
  99 | 	return vOverflow;
 100 | #else // _XM_SSE_INTRINSICS_
 101 | 	XMVECTOR vResult = _mm_set_ps1((float)(1U << MulExponent));
 102 | 	vResult = _mm_mul_ps(vResult, VFloat);
 103 | 	// In case of positive overflow, detect it
 104 | 	XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxInt);
 105 | 	// Float to int conversion
 106 | 	__m128i vResulti = _mm_cvttps_epi32(vResult);
 107 | 	// If there was positive overflow, set to 0x7FFFFFFF
 108 | 	vResult = _mm_and_ps(vOverflow, g_XMAbsMask);
 109 | 	vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti));
 110 | 	vOverflow = _mm_or_ps(vOverflow, vResult);
 111 | 	return vOverflow;
 112 | #endif
 113 | }
 114 | 
 115 | //------------------------------------------------------------------------------
 116 | 
 117 | inline XMVECTOR XM_CALLCONV XMConvertVectorUIntToFloat
 118 | (
 119 | 	FXMVECTOR     VUInt,
 120 | 	uint32_t      DivExponent
 121 | )
 122 | {
 123 | 	assert(DivExponent < 32);
 124 | #if defined(_XM_NO_INTRINSICS_)
 125 | 	float fScale = 1.0f / (float)(1U << DivExponent);
 126 | 	uint32_t ElementIndex = 0;
 127 | 	XMVECTOR Result;
 128 | 	do {
 129 | 		Result.vector4_f32[ElementIndex] = (float)VUInt.vector4_u32[ElementIndex] * fScale;
 130 | 	} while (++ElementIndex < 4);
 131 | 	return Result;
 132 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 133 | 	float fScale = 1.0f / (float)(1U << DivExponent);
 134 | 	float32x4_t vResult = vcvtq_f32_u32(VUInt);
 135 | 	return vmulq_n_f32(vResult, fScale);
 136 | #else // _XM_SSE_INTRINSICS_
 137 | 	// For the values that are higher than 0x7FFFFFFF, a fixup is needed
 138 | 	// Determine which ones need the fix.
 139 | 	XMVECTOR vMask = _mm_and_ps(VUInt, g_XMNegativeZero);
 140 | 	// Force all values positive
 141 | 	XMVECTOR vResult = _mm_xor_ps(VUInt, vMask);
 142 | 	// Convert to floats
 143 | 	vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
 144 | 	// Convert 0x80000000 -> 0xFFFFFFFF
 145 | 	__m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31);
 146 | 	// For only the ones that are too big, add the fixup
 147 | 	vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned);
 148 | 	vResult = _mm_add_ps(vResult, vMask);
 149 | 	// Convert DivExponent into 1.0f/(1<<DivExponent)
 150 | 	uint32_t uScale = 0x3F800000U - (DivExponent << 23);
 151 | 	// Splat
 152 | 	iMask = _mm_set1_epi32(uScale);
 153 | 	vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(iMask));
 154 | 	return vResult;
 155 | #endif
 156 | }
 157 | 
 158 | //------------------------------------------------------------------------------
 159 | 
 160 | inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToUInt
 161 | (
 162 | 	FXMVECTOR     VFloat,
 163 | 	uint32_t      MulExponent
 164 | )
 165 | {
 166 | 	assert(MulExponent < 32);
 167 | #if defined(_XM_NO_INTRINSICS_)
 168 | 	// Get the scalar factor.
 169 | 	float fScale = (float)(1U << MulExponent);
 170 | 	uint32_t ElementIndex = 0;
 171 | 	XMVECTOR Result;
 172 | 	do {
 173 | 		uint32_t uResult;
 174 | 		float fTemp = VFloat.vector4_f32[ElementIndex] * fScale;
 175 | 		if (fTemp <= 0.0f) {
 176 | 			uResult = 0;
 177 | 		}
 178 | 		else if (fTemp >= (65536.0f*65536.0f)) {
 179 | 			uResult = 0xFFFFFFFFU;
 180 | 		}
 181 | 		else {
 182 | 			uResult = (uint32_t)fTemp;
 183 | 		}
 184 | 		Result.vector4_u32[ElementIndex] = uResult;
 185 | 	} while (++ElementIndex < 4);
 186 | 	return Result;
 187 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 188 | 	float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent));
 189 | 	// In case of overflow, detect it
 190 | 	uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxUInt);
 191 | 	// Float to int conversion
 192 | 	uint32x4_t vResulti = vcvtq_u32_f32(vResult);
 193 | 	// If there was overflow, set to 0xFFFFFFFFU
 194 | 	vResult = vbicq_u32(vResulti, vOverflow);
 195 | 	vOverflow = vorrq_u32(vOverflow, vResult);
 196 | 	return vOverflow;
 197 | #else // _XM_SSE_INTRINSICS_
 198 | 	XMVECTOR vResult = _mm_set_ps1(static_cast<float>(1U << MulExponent));
 199 | 	vResult = _mm_mul_ps(vResult, VFloat);
 200 | 	// Clamp to >=0
 201 | 	vResult = _mm_max_ps(vResult, g_XMZero);
 202 | 	// Any numbers that are too big, set to 0xFFFFFFFFU
 203 | 	XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt);
 204 | 	XMVECTOR vValue = g_XMUnsignedFix;
 205 | 	// Too large for a signed integer?
 206 | 	XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue);
 207 | 	// Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise
 208 | 	vValue = _mm_and_ps(vValue, vMask);
 209 | 	// Perform fixup only on numbers too large (Keeps low bit precision)
 210 | 	vResult = _mm_sub_ps(vResult, vValue);
 211 | 	__m128i vResulti = _mm_cvttps_epi32(vResult);
 212 | 	// Convert from signed to unsigned pnly if greater than 0x80000000
 213 | 	vMask = _mm_and_ps(vMask, g_XMNegativeZero);
 214 | 	vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask);
 215 | 	// On those that are too large, set to 0xFFFFFFFF
 216 | 	vResult = _mm_or_ps(vResult, vOverflow);
 217 | 	return vResult;
 218 | #endif
 219 | }
 220 | 
 221 | #ifdef _MSC_VER
 222 | #	pragma warning(pop)
 223 | #endif
 224 | 
 225 | /****************************************************************************
 226 | *
 227 | * Vector and matrix load operations
 228 | *
 229 | ****************************************************************************/
 230 | 
 231 | //------------------------------------------------------------------------------
 232 | _Use_decl_annotations_
 233 | inline XMVECTOR XM_CALLCONV XMLoadInt(const uint32_t* pSource)
 234 | {
 235 | 	assert(pSource);
 236 | #if defined(_XM_NO_INTRINSICS_)
 237 | 	XMVECTOR V;
 238 | 	V.vector4_u32[0] = *pSource;
 239 | 	V.vector4_u32[1] = 0;
 240 | 	V.vector4_u32[2] = 0;
 241 | 	V.vector4_u32[3] = 0;
 242 | 	return V;
 243 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 244 | 	uint32x4_t zero = vdupq_n_u32(0);
 245 | 	return vld1q_lane_u32(pSource, zero, 0);
 246 | #elif defined(_XM_SSE_INTRINSICS_)
 247 | 	return _mm_load_ss(reinterpret_cast<const float*>(pSource));
 248 | #endif
 249 | }
 250 | 
 251 | //------------------------------------------------------------------------------
 252 | _Use_decl_annotations_
 253 | inline XMVECTOR XM_CALLCONV XMLoadFloat(const float* pSource)
 254 | {
 255 | 	assert(pSource);
 256 | #if defined(_XM_NO_INTRINSICS_)
 257 | 	XMVECTOR V;
 258 | 	V.vector4_f32[0] = *pSource;
 259 | 	V.vector4_f32[1] = 0.f;
 260 | 	V.vector4_f32[2] = 0.f;
 261 | 	V.vector4_f32[3] = 0.f;
 262 | 	return V;
 263 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 264 | 	float32x4_t zero = vdupq_n_f32(0);
 265 | 	return vld1q_lane_f32(pSource, zero, 0);
 266 | #elif defined(_XM_SSE_INTRINSICS_)
 267 | 	return _mm_load_ss(pSource);
 268 | #endif
 269 | }
 270 | 
 271 | //------------------------------------------------------------------------------
 272 | _Use_decl_annotations_
 273 | inline XMVECTOR XM_CALLCONV XMLoadInt2
 274 | (
 275 | 	const uint32_t* pSource
 276 | )
 277 | {
 278 | 	assert(pSource);
 279 | #if defined(_XM_NO_INTRINSICS_)
 280 | 	XMVECTOR V;
 281 | 	V.vector4_u32[0] = pSource[0];
 282 | 	V.vector4_u32[1] = pSource[1];
 283 | 	V.vector4_u32[2] = 0;
 284 | 	V.vector4_u32[3] = 0;
 285 | 	return V;
 286 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 287 | 	uint32x2_t x = vld1_u32(pSource);
 288 | 	uint32x2_t zero = vdup_n_u32(0);
 289 | 	return vcombine_u32(x, zero);
 290 | #elif defined(_XM_SSE_INTRINSICS_)
 291 | 	__m128 x = _mm_load_ss(reinterpret_cast<const float*>(pSource));
 292 | 	__m128 y = _mm_load_ss(reinterpret_cast<const float*>(pSource + 1));
 293 | 	return _mm_unpacklo_ps(x, y);
 294 | #endif
 295 | }
 296 | 
 297 | //------------------------------------------------------------------------------
 298 | _Use_decl_annotations_
 299 | inline XMVECTOR XM_CALLCONV XMLoadInt2A
 300 | (
 301 | 	const uint32_t* pSource
 302 | )
 303 | {
 304 | 	assert(pSource);
 305 | 	assert(((uintptr_t)pSource & 0xF) == 0);
 306 | #if defined(_XM_NO_INTRINSICS_)
 307 | 	XMVECTOR V;
 308 | 	V.vector4_u32[0] = pSource[0];
 309 | 	V.vector4_u32[1] = pSource[1];
 310 | 	V.vector4_u32[2] = 0;
 311 | 	V.vector4_u32[3] = 0;
 312 | 	return V;
 313 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 314 | 	uint32x2_t x = vld1_u32_ex(pSource, 64);
 315 | 	uint32x2_t zero = vdup_n_u32(0);
 316 | 	return vcombine_u32(x, zero);
 317 | #elif defined(_XM_SSE_INTRINSICS_)
 318 | 	__m128i V = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pSource));
 319 | 	return _mm_castsi128_ps(V);
 320 | #endif
 321 | }
 322 | 
 323 | //------------------------------------------------------------------------------
 324 | _Use_decl_annotations_
 325 | inline XMVECTOR XM_CALLCONV XMLoadFloat2
 326 | (
 327 | 	const XMFLOAT2* pSource
 328 | )
 329 | {
 330 | 	assert(pSource);
 331 | #if defined(_XM_NO_INTRINSICS_)
 332 | 	XMVECTOR V;
 333 | 	V.vector4_f32[0] = pSource->x;
 334 | 	V.vector4_f32[1] = pSource->y;
 335 | 	V.vector4_f32[2] = 0.f;
 336 | 	V.vector4_f32[3] = 0.f;
 337 | 	return V;
 338 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 339 | 	float32x2_t x = vld1_f32(reinterpret_cast<const float*>(pSource));
 340 | 	float32x2_t zero = vdup_n_f32(0);
 341 | 	return vcombine_f32(x, zero);
 342 | #elif defined(_XM_SSE_INTRINSICS_)
 343 | 	__m128 x = _mm_load_ss(&pSource->x);
 344 | 	__m128 y = _mm_load_ss(&pSource->y);
 345 | 	return _mm_unpacklo_ps(x, y);
 346 | #endif
 347 | }
 348 | 
 349 | //------------------------------------------------------------------------------
 350 | _Use_decl_annotations_
 351 | inline XMVECTOR XM_CALLCONV XMLoadFloat2A
 352 | (
 353 | 	const XMFLOAT2A* pSource
 354 | )
 355 | {
 356 | 	assert(pSource);
 357 | 	assert(((uintptr_t)pSource & 0xF) == 0);
 358 | #if defined(_XM_NO_INTRINSICS_)
 359 | 	XMVECTOR V;
 360 | 	V.vector4_f32[0] = pSource->x;
 361 | 	V.vector4_f32[1] = pSource->y;
 362 | 	V.vector4_f32[2] = 0.f;
 363 | 	V.vector4_f32[3] = 0.f;
 364 | 	return V;
 365 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 366 | 	float32x2_t x = vld1_f32_ex(reinterpret_cast<const float*>(pSource), 64);
 367 | 	float32x2_t zero = vdup_n_f32(0);
 368 | 	return vcombine_f32(x, zero);
 369 | #elif defined(_XM_SSE_INTRINSICS_)
 370 | 	__m128i V = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pSource));
 371 | 	return _mm_castsi128_ps(V);
 372 | #endif
 373 | }
 374 | 
 375 | //------------------------------------------------------------------------------
 376 | _Use_decl_annotations_
 377 | inline XMVECTOR XM_CALLCONV XMLoadSInt2
 378 | (
 379 | 	const XMINT2* pSource
 380 | )
 381 | {
 382 | 	assert(pSource);
 383 | #if defined(_XM_NO_INTRINSICS_)
 384 | 	XMVECTOR V;
 385 | 	V.vector4_f32[0] = (float)pSource->x;
 386 | 	V.vector4_f32[1] = (float)pSource->y;
 387 | 	V.vector4_f32[2] = 0.f;
 388 | 	V.vector4_f32[3] = 0.f;
 389 | 	return V;
 390 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 391 | 	int32x2_t x = vld1_s32(reinterpret_cast<const int32_t*>(pSource));
 392 | 	float32x2_t v = vcvt_f32_s32(x);
 393 | 	float32x2_t zero = vdup_n_f32(0);
 394 | 	return vcombine_f32(v, zero);
 395 | #elif defined(_XM_SSE_INTRINSICS_)
 396 | 	__m128 x = _mm_load_ss(reinterpret_cast<const float*>(&pSource->x));
 397 | 	__m128 y = _mm_load_ss(reinterpret_cast<const float*>(&pSource->y));
 398 | 	__m128 V = _mm_unpacklo_ps(x, y);
 399 | 	return _mm_cvtepi32_ps(_mm_castps_si128(V));
 400 | #endif
 401 | }
 402 | 
 403 | //------------------------------------------------------------------------------
 404 | _Use_decl_annotations_
 405 | inline XMVECTOR XM_CALLCONV XMLoadUInt2
 406 | (
 407 | 	const XMUINT2* pSource
 408 | )
 409 | {
 410 | 	assert(pSource);
 411 | #if defined(_XM_NO_INTRINSICS_)
 412 | 	XMVECTOR V;
 413 | 	V.vector4_f32[0] = (float)pSource->x;
 414 | 	V.vector4_f32[1] = (float)pSource->y;
 415 | 	V.vector4_f32[2] = 0.f;
 416 | 	V.vector4_f32[3] = 0.f;
 417 | 	return V;
 418 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 419 | 	uint32x2_t x = vld1_u32(reinterpret_cast<const uint32_t*>(pSource));
 420 | 	float32x2_t v = vcvt_f32_u32(x);
 421 | 	float32x2_t zero = vdup_n_f32(0);
 422 | 	return vcombine_f32(v, zero);
 423 | #elif defined(_XM_SSE_INTRINSICS_)
 424 | 	__m128 x = _mm_load_ss(reinterpret_cast<const float*>(&pSource->x));
 425 | 	__m128 y = _mm_load_ss(reinterpret_cast<const float*>(&pSource->y));
 426 | 	__m128 V = _mm_unpacklo_ps(x, y);
 427 | 	// For the values that are higher than 0x7FFFFFFF, a fixup is needed
 428 | 	// Determine which ones need the fix.
 429 | 	XMVECTOR vMask = _mm_and_ps(V, g_XMNegativeZero);
 430 | 	// Force all values positive
 431 | 	XMVECTOR vResult = _mm_xor_ps(V, vMask);
 432 | 	// Convert to floats
 433 | 	vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
 434 | 	// Convert 0x80000000 -> 0xFFFFFFFF
 435 | 	__m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31);
 436 | 	// For only the ones that are too big, add the fixup
 437 | 	vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned);
 438 | 	vResult = _mm_add_ps(vResult, vMask);
 439 | 	return vResult;
 440 | #endif
 441 | }
 442 | 
 443 | //------------------------------------------------------------------------------
 444 | _Use_decl_annotations_
 445 | inline XMVECTOR XM_CALLCONV XMLoadInt3
 446 | (
 447 | 	const uint32_t* pSource
 448 | )
 449 | {
 450 | 	assert(pSource);
 451 | #if defined(_XM_NO_INTRINSICS_)
 452 | 	XMVECTOR V;
 453 | 	V.vector4_u32[0] = pSource[0];
 454 | 	V.vector4_u32[1] = pSource[1];
 455 | 	V.vector4_u32[2] = pSource[2];
 456 | 	V.vector4_u32[3] = 0;
 457 | 	return V;
 458 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 459 | 	uint32x2_t x = vld1_u32(pSource);
 460 | 	uint32x2_t zero = vdup_n_u32(0);
 461 | 	uint32x2_t y = vld1_lane_u32(pSource + 2, zero, 0);
 462 | 	return vcombine_u32(x, y);
 463 | #elif defined(_XM_SSE_INTRINSICS_)
 464 | 	__m128 x = _mm_load_ss(reinterpret_cast<const float*>(pSource));
 465 | 	__m128 y = _mm_load_ss(reinterpret_cast<const float*>(pSource + 1));
 466 | 	__m128 z = _mm_load_ss(reinterpret_cast<const float*>(pSource + 2));
 467 | 	__m128 xy = _mm_unpacklo_ps(x, y);
 468 | 	return _mm_movelh_ps(xy, z);
 469 | #endif
 470 | }
 471 | 
 472 | //------------------------------------------------------------------------------
 473 | _Use_decl_annotations_
 474 | inline XMVECTOR XM_CALLCONV XMLoadInt3A
 475 | (
 476 | 	const uint32_t* pSource
 477 | )
 478 | {
 479 | 	assert(pSource);
 480 | 	assert(((uintptr_t)pSource & 0xF) == 0);
 481 | #if defined(_XM_NO_INTRINSICS_)
 482 | 	XMVECTOR V;
 483 | 	V.vector4_u32[0] = pSource[0];
 484 | 	V.vector4_u32[1] = pSource[1];
 485 | 	V.vector4_u32[2] = pSource[2];
 486 | 	V.vector4_u32[3] = 0;
 487 | 	return V;
 488 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 489 | 	// Reads an extra integer which is zero'd
 490 | 	uint32x4_t V = vld1q_u32_ex(pSource, 128);
 491 | 	return vsetq_lane_u32(0, V, 3);
 492 | #elif defined(_XM_SSE_INTRINSICS_)
 493 | 	// Reads an extra integer which is zero'd
 494 | 	__m128i V = _mm_load_si128(reinterpret_cast<const __m128i*>(pSource));
 495 | 	V = _mm_and_si128(V, g_XMMask3);
 496 | 	return _mm_castsi128_ps(V);
 497 | #endif
 498 | }
 499 | 
 500 | //------------------------------------------------------------------------------
 501 | _Use_decl_annotations_
 502 | inline XMVECTOR XM_CALLCONV XMLoadFloat3
 503 | (
 504 | 	const XMFLOAT3* pSource
 505 | )
 506 | {
 507 | 	assert(pSource);
 508 | #if defined(_XM_NO_INTRINSICS_)
 509 | 	XMVECTOR V;
 510 | 	V.vector4_f32[0] = pSource->x;
 511 | 	V.vector4_f32[1] = pSource->y;
 512 | 	V.vector4_f32[2] = pSource->z;
 513 | 	V.vector4_f32[3] = 0.f;
 514 | 	return V;
 515 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 516 | 	float32x2_t x = vld1_f32(reinterpret_cast<const float*>(pSource));
 517 | 	float32x2_t zero = vdup_n_f32(0);
 518 | 	float32x2_t y = vld1_lane_f32(reinterpret_cast<const float*>(pSource) + 2, zero, 0);
 519 | 	return vcombine_f32(x, y);
 520 | #elif defined(_XM_SSE_INTRINSICS_)
 521 | 	__m128 x = _mm_load_ss(&pSource->x);
 522 | 	__m128 y = _mm_load_ss(&pSource->y);
 523 | 	__m128 z = _mm_load_ss(&pSource->z);
 524 | 	__m128 xy = _mm_unpacklo_ps(x, y);
 525 | 	return _mm_movelh_ps(xy, z);
 526 | #endif
 527 | }
 528 | 
 529 | //------------------------------------------------------------------------------
 530 | _Use_decl_annotations_
 531 | inline XMVECTOR XM_CALLCONV XMLoadFloat3A
 532 | (
 533 | 	const XMFLOAT3A* pSource
 534 | )
 535 | {
 536 | 	assert(pSource);
 537 | 	assert(((uintptr_t)pSource & 0xF) == 0);
 538 | #if defined(_XM_NO_INTRINSICS_)
 539 | 	XMVECTOR V;
 540 | 	V.vector4_f32[0] = pSource->x;
 541 | 	V.vector4_f32[1] = pSource->y;
 542 | 	V.vector4_f32[2] = pSource->z;
 543 | 	V.vector4_f32[3] = 0.f;
 544 | 	return V;
 545 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 546 | 	// Reads an extra float which is zero'd
 547 | 	float32x4_t V = vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);
 548 | 	return vsetq_lane_f32(0, V, 3);
 549 | #elif defined(_XM_SSE_INTRINSICS_)
 550 | 	// Reads an extra float which is zero'd
 551 | 	__m128 V = _mm_load_ps(&pSource->x);
 552 | 	return _mm_and_ps(V, g_XMMask3);
 553 | #endif
 554 | }
 555 | 
 556 | //------------------------------------------------------------------------------
 557 | _Use_decl_annotations_
 558 | inline XMVECTOR XM_CALLCONV XMLoadSInt3
 559 | (
 560 | 	const XMINT3* pSource
 561 | )
 562 | {
 563 | 	assert(pSource);
 564 | #if defined(_XM_NO_INTRINSICS_)
 565 | 
 566 | 	XMVECTOR V;
 567 | 	V.vector4_f32[0] = (float)pSource->x;
 568 | 	V.vector4_f32[1] = (float)pSource->y;
 569 | 	V.vector4_f32[2] = (float)pSource->z;
 570 | 	V.vector4_f32[3] = 0.f;
 571 | 	return V;
 572 | 
 573 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 574 | 	int32x2_t x = vld1_s32(reinterpret_cast<const int32_t*>(pSource));
 575 | 	int32x2_t zero = vdup_n_s32(0);
 576 | 	int32x2_t y = vld1_lane_s32(reinterpret_cast<const int32_t*>(pSource) + 2, zero, 0);
 577 | 	int32x4_t v = vcombine_s32(x, y);
 578 | 	return vcvtq_f32_s32(v);
 579 | #elif defined(_XM_SSE_INTRINSICS_)
 580 | 	__m128 x = _mm_load_ss(reinterpret_cast<const float*>(&pSource->x));
 581 | 	__m128 y = _mm_load_ss(reinterpret_cast<const float*>(&pSource->y));
 582 | 	__m128 z = _mm_load_ss(reinterpret_cast<const float*>(&pSource->z));
 583 | 	__m128 xy = _mm_unpacklo_ps(x, y);
 584 | 	__m128 V = _mm_movelh_ps(xy, z);
 585 | 	return _mm_cvtepi32_ps(_mm_castps_si128(V));
 586 | #endif
 587 | }
 588 | 
 589 | //------------------------------------------------------------------------------
 590 | _Use_decl_annotations_
 591 | inline XMVECTOR XM_CALLCONV XMLoadUInt3
 592 | (
 593 | 	const XMUINT3* pSource
 594 | )
 595 | {
 596 | 	assert(pSource);
 597 | #if defined(_XM_NO_INTRINSICS_)
 598 | 	XMVECTOR V;
 599 | 	V.vector4_f32[0] = (float)pSource->x;
 600 | 	V.vector4_f32[1] = (float)pSource->y;
 601 | 	V.vector4_f32[2] = (float)pSource->z;
 602 | 	V.vector4_f32[3] = 0.f;
 603 | 	return V;
 604 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 605 | 	uint32x2_t x = vld1_u32(reinterpret_cast<const uint32_t*>(pSource));
 606 | 	uint32x2_t zero = vdup_n_u32(0);
 607 | 	uint32x2_t y = vld1_lane_u32(reinterpret_cast<const uint32_t*>(pSource) + 2, zero, 0);
 608 | 	uint32x4_t v = vcombine_u32(x, y);
 609 | 	return vcvtq_f32_u32(v);
 610 | #elif defined(_XM_SSE_INTRINSICS_)
 611 | 	__m128 x = _mm_load_ss(reinterpret_cast<const float*>(&pSource->x));
 612 | 	__m128 y = _mm_load_ss(reinterpret_cast<const float*>(&pSource->y));
 613 | 	__m128 z = _mm_load_ss(reinterpret_cast<const float*>(&pSource->z));
 614 | 	__m128 xy = _mm_unpacklo_ps(x, y);
 615 | 	__m128 V = _mm_movelh_ps(xy, z);
 616 | 	// For the values that are higher than 0x7FFFFFFF, a fixup is needed
 617 | 	// Determine which ones need the fix.
 618 | 	XMVECTOR vMask = _mm_and_ps(V, g_XMNegativeZero);
 619 | 	// Force all values positive
 620 | 	XMVECTOR vResult = _mm_xor_ps(V, vMask);
 621 | 	// Convert to floats
 622 | 	vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
 623 | 	// Convert 0x80000000 -> 0xFFFFFFFF
 624 | 	__m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31);
 625 | 	// For only the ones that are too big, add the fixup
 626 | 	vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned);
 627 | 	vResult = _mm_add_ps(vResult, vMask);
 628 | 	return vResult;
 629 | #endif
 630 | }
 631 | 
 632 | //------------------------------------------------------------------------------
 633 | _Use_decl_annotations_
 634 | inline XMVECTOR XM_CALLCONV XMLoadInt4
 635 | (
 636 | 	const uint32_t* pSource
 637 | )
 638 | {
 639 | 	assert(pSource);
 640 | 
 641 | #if defined(_XM_NO_INTRINSICS_)
 642 | 	XMVECTOR V;
 643 | 	V.vector4_u32[0] = pSource[0];
 644 | 	V.vector4_u32[1] = pSource[1];
 645 | 	V.vector4_u32[2] = pSource[2];
 646 | 	V.vector4_u32[3] = pSource[3];
 647 | 	return V;
 648 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 649 | 	return vld1q_u32(pSource);
 650 | #elif defined(_XM_SSE_INTRINSICS_)
 651 | 	__m128i V = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pSource));
 652 | 	return _mm_castsi128_ps(V);
 653 | #endif
 654 | }
 655 | 
 656 | //------------------------------------------------------------------------------
 657 | _Use_decl_annotations_
 658 | inline XMVECTOR XM_CALLCONV XMLoadInt4A
 659 | (
 660 | 	const uint32_t* pSource
 661 | )
 662 | {
 663 | 	assert(pSource);
 664 | 	assert(((uintptr_t)pSource & 0xF) == 0);
 665 | #if defined(_XM_NO_INTRINSICS_)
 666 | 	XMVECTOR V;
 667 | 	V.vector4_u32[0] = pSource[0];
 668 | 	V.vector4_u32[1] = pSource[1];
 669 | 	V.vector4_u32[2] = pSource[2];
 670 | 	V.vector4_u32[3] = pSource[3];
 671 | 	return V;
 672 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 673 | 	return vld1q_u32_ex(pSource, 128);
 674 | #elif defined(_XM_SSE_INTRINSICS_)
 675 | 	__m128i V = _mm_load_si128(reinterpret_cast<const __m128i*>(pSource));
 676 | 	return _mm_castsi128_ps(V);
 677 | #endif
 678 | }
 679 | 
 680 | //------------------------------------------------------------------------------
 681 | _Use_decl_annotations_
 682 | inline XMVECTOR XM_CALLCONV XMLoadFloat4
 683 | (
 684 | 	const XMFLOAT4* pSource
 685 | )
 686 | {
 687 | 	assert(pSource);
 688 | #if defined(_XM_NO_INTRINSICS_)
 689 | 	XMVECTOR V;
 690 | 	V.vector4_f32[0] = pSource->x;
 691 | 	V.vector4_f32[1] = pSource->y;
 692 | 	V.vector4_f32[2] = pSource->z;
 693 | 	V.vector4_f32[3] = pSource->w;
 694 | 	return V;
 695 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 696 | 	return vld1q_f32(reinterpret_cast<const float*>(pSource));
 697 | #elif defined(_XM_SSE_INTRINSICS_)
 698 | 	return _mm_loadu_ps(&pSource->x);
 699 | #endif
 700 | }
 701 | 
 702 | //------------------------------------------------------------------------------
 703 | _Use_decl_annotations_
 704 | inline XMVECTOR XM_CALLCONV XMLoadFloat4A
 705 | (
 706 | 	const XMFLOAT4A* pSource
 707 | )
 708 | {
 709 | 	assert(pSource);
 710 | 	assert(((uintptr_t)pSource & 0xF) == 0);
 711 | #if defined(_XM_NO_INTRINSICS_)
 712 | 	XMVECTOR V;
 713 | 	V.vector4_f32[0] = pSource->x;
 714 | 	V.vector4_f32[1] = pSource->y;
 715 | 	V.vector4_f32[2] = pSource->z;
 716 | 	V.vector4_f32[3] = pSource->w;
 717 | 	return V;
 718 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 719 | 	return vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);
 720 | #elif defined(_XM_SSE_INTRINSICS_)
 721 | 	return _mm_load_ps(&pSource->x);
 722 | #endif
 723 | }
 724 | 
 725 | //------------------------------------------------------------------------------
 726 | _Use_decl_annotations_
 727 | inline XMVECTOR XM_CALLCONV XMLoadSInt4
 728 | (
 729 | 	const XMINT4* pSource
 730 | )
 731 | {
 732 | 	assert(pSource);
 733 | #if defined(_XM_NO_INTRINSICS_)
 734 | 
 735 | 	XMVECTOR V;
 736 | 	V.vector4_f32[0] = (float)pSource->x;
 737 | 	V.vector4_f32[1] = (float)pSource->y;
 738 | 	V.vector4_f32[2] = (float)pSource->z;
 739 | 	V.vector4_f32[3] = (float)pSource->w;
 740 | 	return V;
 741 | 
 742 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 743 | 	int32x4_t v = vld1q_s32(reinterpret_cast<const int32_t*>(pSource));
 744 | 	return vcvtq_f32_s32(v);
 745 | #elif defined(_XM_SSE_INTRINSICS_)
 746 | 	__m128i V = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pSource));
 747 | 	return _mm_cvtepi32_ps(V);
 748 | #endif
 749 | }
 750 | 
 751 | //------------------------------------------------------------------------------
 752 | _Use_decl_annotations_
 753 | inline XMVECTOR XM_CALLCONV XMLoadUInt4
 754 | (
 755 | 	const XMUINT4* pSource
 756 | )
 757 | {
 758 | 	assert(pSource);
 759 | #if defined(_XM_NO_INTRINSICS_)
 760 | 	XMVECTOR V;
 761 | 	V.vector4_f32[0] = (float)pSource->x;
 762 | 	V.vector4_f32[1] = (float)pSource->y;
 763 | 	V.vector4_f32[2] = (float)pSource->z;
 764 | 	V.vector4_f32[3] = (float)pSource->w;
 765 | 	return V;
 766 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 767 | 	uint32x4_t v = vld1q_u32(reinterpret_cast<const uint32_t*>(pSource));
 768 | 	return vcvtq_f32_u32(v);
 769 | #elif defined(_XM_SSE_INTRINSICS_)
 770 | 	__m128i V = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pSource));
 771 | 	// For the values that are higher than 0x7FFFFFFF, a fixup is needed
 772 | 	// Determine which ones need the fix.
 773 | 	XMVECTOR vMask = _mm_and_ps(_mm_castsi128_ps(V), g_XMNegativeZero);
 774 | 	// Force all values positive
 775 | 	XMVECTOR vResult = _mm_xor_ps(_mm_castsi128_ps(V), vMask);
 776 | 	// Convert to floats
 777 | 	vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
 778 | 	// Convert 0x80000000 -> 0xFFFFFFFF
 779 | 	__m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31);
 780 | 	// For only the ones that are too big, add the fixup
 781 | 	vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned);
 782 | 	vResult = _mm_add_ps(vResult, vMask);
 783 | 	return vResult;
 784 | #endif
 785 | }
 786 | 
 787 | //------------------------------------------------------------------------------
 788 | _Use_decl_annotations_
 789 | inline XMMATRIX XM_CALLCONV XMLoadFloat3x3
 790 | (
 791 | 	const XMFLOAT3X3* pSource
 792 | )
 793 | {
 794 | 	assert(pSource);
 795 | #if defined(_XM_NO_INTRINSICS_)
 796 | 
 797 | 	XMMATRIX M;
 798 | 	M.c[0].vector4_f32[0] = pSource->m[0][0];
 799 | 	M.c[0].vector4_f32[1] = pSource->m[0][1];
 800 | 	M.c[0].vector4_f32[2] = pSource->m[0][2];
 801 | 	M.c[0].vector4_f32[3] = 0.0f;
 802 | 
 803 | 	M.c[1].vector4_f32[0] = pSource->m[1][0];
 804 | 	M.c[1].vector4_f32[1] = pSource->m[1][1];
 805 | 	M.c[1].vector4_f32[2] = pSource->m[1][2];
 806 | 	M.c[1].vector4_f32[3] = 0.0f;
 807 | 
 808 | 	M.c[2].vector4_f32[0] = pSource->m[2][0];
 809 | 	M.c[2].vector4_f32[1] = pSource->m[2][1];
 810 | 	M.c[2].vector4_f32[2] = pSource->m[2][2];
 811 | 	M.c[2].vector4_f32[3] = 0.0f;
 812 | 	M.c[3].vector4_f32[0] = 0.0f;
 813 | 	M.c[3].vector4_f32[1] = 0.0f;
 814 | 	M.c[3].vector4_f32[2] = 0.0f;
 815 | 	M.c[3].vector4_f32[3] = 1.0f;
 816 | 	return M;
 817 | 
 818 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 819 | 	float32x4_t v0 = vld1q_f32(&pSource->m[0][0]);
 820 | 	float32x4_t v1 = vld1q_f32(&pSource->m[1][1]);
 821 | 	float32x2_t v2 = vcreate_f32((uint64_t)*(const uint32_t*)&pSource->m[2][2]);
 822 | 	float32x4_t T = vextq_f32(v0, v1, 3);
 823 | 
 824 | 	XMMATRIX M;
 825 | 	M.c[0] = vandq_u32(v0, g_XMMask3);
 826 | 	M.c[1] = vandq_u32(T, g_XMMask3);
 827 | 	M.c[2] = vcombine_f32(vget_high_f32(v1), v2);
 828 | 	M.c[3] = g_XMIdentityR3;
 829 | 	return M;
 830 | #elif defined(_XM_SSE_INTRINSICS_)
 831 | 	__m128 Z = _mm_setzero_ps();
 832 | 
 833 | 	__m128 V1 = _mm_loadu_ps(&pSource->m[0][0]);
 834 | 	__m128 V2 = _mm_loadu_ps(&pSource->m[1][1]);
 835 | 	__m128 V3 = _mm_load_ss(&pSource->m[2][2]);
 836 | 
 837 | 	__m128 T1 = _mm_unpackhi_ps(V1, Z);
 838 | 	__m128 T2 = _mm_unpacklo_ps(V2, Z);
 839 | 	__m128 T3 = _mm_shuffle_ps(V3, T2, _MM_SHUFFLE(0, 1, 0, 0));
 840 | 	__m128 T4 = _mm_movehl_ps(T2, T3);
 841 | 	__m128 T5 = _mm_movehl_ps(Z, T1);
 842 | 
 843 | 	XMMATRIX M;
 844 | 	M.c[0] = _mm_movelh_ps(V1, T1);
 845 | 	M.c[1] = _mm_add_ps(T4, T5);
 846 | 	M.c[2] = _mm_shuffle_ps(V2, V3, _MM_SHUFFLE(1, 0, 3, 2));
 847 | 	M.c[3] = g_XMIdentityR3;
 848 | 	return M;
 849 | #endif
 850 | }
 851 | 
 852 | //------------------------------------------------------------------------------
 853 | _Use_decl_annotations_
 854 | inline XMMATRIX XM_CALLCONV XMLoadFloat4x3
 855 | (
 856 | 	const XMFLOAT4X3* pSource
 857 | )
 858 | {
 859 | 	assert(pSource);
 860 | #if defined(_XM_NO_INTRINSICS_)
 861 | 
 862 | 	XMMATRIX M;
 863 | 	M.c[0].vector4_f32[0] = pSource->m[0][0];
 864 | 	M.c[0].vector4_f32[1] = pSource->m[0][1];
 865 | 	M.c[0].vector4_f32[2] = pSource->m[0][2];
 866 | 	M.c[0].vector4_f32[3] = pSource->m[0][3];
 867 | 
 868 | 	M.c[1].vector4_f32[0] = pSource->m[1][0];
 869 | 	M.c[1].vector4_f32[1] = pSource->m[1][1];
 870 | 	M.c[1].vector4_f32[2] = pSource->m[1][2];
 871 | 	M.c[1].vector4_f32[3] = pSource->m[1][3];
 872 | 
 873 | 	M.c[2].vector4_f32[0] = pSource->m[2][0];
 874 | 	M.c[2].vector4_f32[1] = pSource->m[2][1];
 875 | 	M.c[2].vector4_f32[2] = pSource->m[2][2];
 876 | 	M.c[2].vector4_f32[3] = pSource->m[2][3];
 877 | 
 878 | 	M.c[3] = g_XMIdentityR3;
 879 | 	return M;
 880 | 
 881 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 882 | 	XMMATRIX M;
 883 | 	M.c[0] = vld1q_f32(reinterpret_cast<const float*>(&pSource->_11));
 884 | 	M.c[1] = vld1q_f32(reinterpret_cast<const float*>(&pSource->_12));
 885 | 	M.c[2] = vld1q_f32(reinterpret_cast<const float*>(&pSource->_13));
 886 | 	M.c[3] = g_XMIdentityR3;
 887 | 	return M;
 888 | #elif defined(_XM_SSE_INTRINSICS_)
 889 | 	XMMATRIX M;
 890 | 	M.c[0] = _mm_loadu_ps(&pSource->_11);
 891 | 	M.c[1] = _mm_loadu_ps(&pSource->_12);
 892 | 	M.c[2] = _mm_loadu_ps(&pSource->_13);
 893 | 	M.c[3] = g_XMIdentityR3;
 894 | 	return M;
 895 | #endif
 896 | }
 897 | 
 898 | //------------------------------------------------------------------------------
 899 | _Use_decl_annotations_
 900 | inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A
 901 | (
 902 | 	const XMFLOAT4X3A* pSource
 903 | )
 904 | {
 905 | 	assert(pSource);
 906 | 	assert(((uintptr_t)pSource & 0xF) == 0);
 907 | #if defined(_XM_NO_INTRINSICS_)
 908 | 
 909 | 	XMMATRIX M;
 910 | 	M.c[0].vector4_f32[0] = pSource->m[0][0];
 911 | 	M.c[0].vector4_f32[1] = pSource->m[0][1];
 912 | 	M.c[0].vector4_f32[2] = pSource->m[0][2];
 913 | 	M.c[0].vector4_f32[3] = pSource->m[0][3];
 914 | 
 915 | 	M.c[1].vector4_f32[0] = pSource->m[1][0];
 916 | 	M.c[1].vector4_f32[1] = pSource->m[1][1];
 917 | 	M.c[1].vector4_f32[2] = pSource->m[1][2];
 918 | 	M.c[1].vector4_f32[3] = pSource->m[1][3];
 919 | 
 920 | 	M.c[2].vector4_f32[0] = pSource->m[2][0];
 921 | 	M.c[2].vector4_f32[1] = pSource->m[2][1];
 922 | 	M.c[2].vector4_f32[2] = pSource->m[2][2];
 923 | 	M.c[2].vector4_f32[3] = pSource->m[2][3];
 924 | 
 925 | 	M.c[3] = g_XMIdentityR3;
 926 | 	return M;
 927 | 
 928 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 929 | 	XMMATRIX M;
 930 | 	M.c[0] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_11), 128);
 931 | 	M.c[1] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_12), 128);
 932 | 	M.c[2] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_13), 128);
 933 | 	M.c[3] = g_XMIdentityR3;
 934 | 	return M;
 935 | #elif defined(_XM_SSE_INTRINSICS_)
 936 | 	XMMATRIX M;
 937 | 	M.c[0] = _mm_load_ps(&pSource->_11);
 938 | 	M.c[1] = _mm_load_ps(&pSource->_12);
 939 | 	M.c[2] = _mm_load_ps(&pSource->_13);
 940 | 	M.c[3] = g_XMIdentityR3;
 941 | 	return M;
 942 | #endif
 943 | }
 944 | 
 945 | //------------------------------------------------------------------------------
 946 | _Use_decl_annotations_
 947 | inline XMMATRIX XM_CALLCONV XMLoadFloat4x4
 948 | (
 949 | 	const XMFLOAT4X4* pSource
 950 | )
 951 | {
 952 | 	assert(pSource);
 953 | #if defined(_XM_NO_INTRINSICS_)
 954 | 
 955 | 	XMMATRIX M;
 956 | 	M.c[0].vector4_f32[0] = pSource->m[0][0];
 957 | 	M.c[0].vector4_f32[1] = pSource->m[0][1];
 958 | 	M.c[0].vector4_f32[2] = pSource->m[0][2];
 959 | 	M.c[0].vector4_f32[3] = pSource->m[0][3];
 960 | 
 961 | 	M.c[1].vector4_f32[0] = pSource->m[1][0];
 962 | 	M.c[1].vector4_f32[1] = pSource->m[1][1];
 963 | 	M.c[1].vector4_f32[2] = pSource->m[1][2];
 964 | 	M.c[1].vector4_f32[3] = pSource->m[1][3];
 965 | 
 966 | 	M.c[2].vector4_f32[0] = pSource->m[2][0];
 967 | 	M.c[2].vector4_f32[1] = pSource->m[2][1];
 968 | 	M.c[2].vector4_f32[2] = pSource->m[2][2];
 969 | 	M.c[2].vector4_f32[3] = pSource->m[2][3];
 970 | 
 971 | 	M.c[3].vector4_f32[0] = pSource->m[3][0];
 972 | 	M.c[3].vector4_f32[1] = pSource->m[3][1];
 973 | 	M.c[3].vector4_f32[2] = pSource->m[3][2];
 974 | 	M.c[3].vector4_f32[3] = pSource->m[3][3];
 975 | 	return M;
 976 | 
 977 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 978 | 	XMMATRIX M;
 979 | 	M.c[0] = vld1q_f32(reinterpret_cast<const float*>(&pSource->_11));
 980 | 	M.c[1] = vld1q_f32(reinterpret_cast<const float*>(&pSource->_12));
 981 | 	M.c[2] = vld1q_f32(reinterpret_cast<const float*>(&pSource->_13));
 982 | 	M.c[3] = vld1q_f32(reinterpret_cast<const float*>(&pSource->_14));
 983 | 	return M;
 984 | #elif defined(_XM_SSE_INTRINSICS_)
 985 | 	XMMATRIX M;
 986 | 	M.c[0] = _mm_loadu_ps(&pSource->_11);
 987 | 	M.c[1] = _mm_loadu_ps(&pSource->_12);
 988 | 	M.c[2] = _mm_loadu_ps(&pSource->_13);
 989 | 	M.c[3] = _mm_loadu_ps(&pSource->_14);
 990 | 	return M;
 991 | #endif
 992 | }
 993 | 
 994 | //------------------------------------------------------------------------------
 995 | _Use_decl_annotations_
 996 | inline XMMATRIX XM_CALLCONV XMLoadFloat4x4A
 997 | (
 998 | 	const XMFLOAT4X4A* pSource
 999 | )
1000 | {
1001 | 	assert(pSource);
1002 | 	assert(((uintptr_t)pSource & 0xF) == 0);
1003 | #if defined(_XM_NO_INTRINSICS_)
1004 | 
1005 | 	XMMATRIX M;
1006 | 	M.c[0].vector4_f32[0] = pSource->m[0][0];
1007 | 	M.c[0].vector4_f32[1] = pSource->m[0][1];
1008 | 	M.c[0].vector4_f32[2] = pSource->m[0][2];
1009 | 	M.c[0].vector4_f32[3] = pSource->m[0][3];
1010 | 
1011 | 	M.c[1].vector4_f32[0] = pSource->m[1][0];
1012 | 	M.c[1].vector4_f32[1] = pSource->m[1][1];
1013 | 	M.c[1].vector4_f32[2] = pSource->m[1][2];
1014 | 	M.c[1].vector4_f32[3] = pSource->m[1][3];
1015 | 
1016 | 	M.c[2].vector4_f32[0] = pSource->m[2][0];
1017 | 	M.c[2].vector4_f32[1] = pSource->m[2][1];
1018 | 	M.c[2].vector4_f32[2] = pSource->m[2][2];
1019 | 	M.c[2].vector4_f32[3] = pSource->m[2][3];
1020 | 
1021 | 	M.c[3].vector4_f32[0] = pSource->m[3][0];
1022 | 	M.c[3].vector4_f32[1] = pSource->m[3][1];
1023 | 	M.c[3].vector4_f32[2] = pSource->m[3][2];
1024 | 	M.c[3].vector4_f32[3] = pSource->m[3][3];
1025 | 	return M;
1026 | 
1027 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1028 | 	XMMATRIX M;
1029 | 	M.c[0] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_11), 128);
1030 | 	M.c[1] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_12), 128);
1031 | 	M.c[2] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_13), 128);
1032 | 	M.c[3] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_14), 128);
1033 | 	return M;
1034 | #elif defined(_XM_SSE_INTRINSICS_)
1035 | 	XMMATRIX M;
1036 | 	M.c[0] = _mm_load_ps(&pSource->_11);
1037 | 	M.c[1] = _mm_load_ps(&pSource->_12);
1038 | 	M.c[2] = _mm_load_ps(&pSource->_13);
1039 | 	M.c[3] = _mm_load_ps(&pSource->_14);
1040 | 	return M;
1041 | #endif
1042 | }
1043 | 
1044 | /****************************************************************************
1045 | *
1046 | * Vector and matrix store operations
1047 | *
1048 | ****************************************************************************/
1049 | _Use_decl_annotations_
1050 | inline void XM_CALLCONV XMStoreInt
1051 | (
1052 | 	uint32_t*    pDestination,
1053 | 	FXMVECTOR V
1054 | )
1055 | {
1056 | 	assert(pDestination);
1057 | #if defined(_XM_NO_INTRINSICS_)
1058 | 	*pDestination = XMVectorGetIntX(V);
1059 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1060 | 	vst1q_lane_u32(pDestination, *reinterpret_cast<const uint32x4_t*>(&V), 0);
1061 | #elif defined(_XM_SSE_INTRINSICS_)
1062 | 	_mm_store_ss(reinterpret_cast<float*>(pDestination), V);
1063 | #endif
1064 | }
1065 | 
1066 | //------------------------------------------------------------------------------
1067 | _Use_decl_annotations_
1068 | inline void XM_CALLCONV XMStoreFloat
1069 | (
1070 | 	float*    pDestination,
1071 | 	FXMVECTOR V
1072 | )
1073 | {
1074 | 	assert(pDestination);
1075 | #if defined(_XM_NO_INTRINSICS_)
1076 | 	*pDestination = XMVectorGetX(V);
1077 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1078 | 	vst1q_lane_f32(pDestination, V, 0);
1079 | #elif defined(_XM_SSE_INTRINSICS_)
1080 | 	_mm_store_ss(pDestination, V);
1081 | #endif
1082 | }
1083 | 
1084 | //------------------------------------------------------------------------------
1085 | _Use_decl_annotations_
1086 | inline void XM_CALLCONV XMStoreInt2
1087 | (
1088 | 	uint32_t*    pDestination,
1089 | 	FXMVECTOR V
1090 | )
1091 | {
1092 | 	assert(pDestination);
1093 | #if defined(_XM_NO_INTRINSICS_)
1094 | 	pDestination[0] = V.vector4_u32[0];
1095 | 	pDestination[1] = V.vector4_u32[1];
1096 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1097 | 	uint32x2_t VL = vget_low_u32(V);
1098 | 	vst1_u32(pDestination, VL);
1099 | #elif defined(_XM_SSE_INTRINSICS_)
1100 | 	XMVECTOR T = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1));
1101 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination[0]), V);
1102 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination[1]), T);
1103 | #endif
1104 | }
1105 | 
1106 | //------------------------------------------------------------------------------
1107 | _Use_decl_annotations_
1108 | inline void XM_CALLCONV XMStoreInt2A
1109 | (
1110 | 	uint32_t*    pDestination,
1111 | 	FXMVECTOR V
1112 | )
1113 | {
1114 | 	assert(pDestination);
1115 | 	assert(((uintptr_t)pDestination & 0xF) == 0);
1116 | #if defined(_XM_NO_INTRINSICS_)
1117 | 	pDestination[0] = V.vector4_u32[0];
1118 | 	pDestination[1] = V.vector4_u32[1];
1119 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1120 | 	uint32x2_t VL = vget_low_u32(V);
1121 | 	vst1_u32_ex(pDestination, VL, 64);
1122 | #elif defined(_XM_SSE_INTRINSICS_)
1123 | 	_mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V));
1124 | #endif
1125 | }
1126 | 
1127 | //------------------------------------------------------------------------------
1128 | _Use_decl_annotations_
1129 | inline void XM_CALLCONV XMStoreFloat2
1130 | (
1131 | 	XMFLOAT2* pDestination,
1132 | 	FXMVECTOR  V
1133 | )
1134 | {
1135 | 	assert(pDestination);
1136 | #if defined(_XM_NO_INTRINSICS_)
1137 | 	pDestination->x = V.vector4_f32[0];
1138 | 	pDestination->y = V.vector4_f32[1];
1139 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1140 | 	float32x2_t VL = vget_low_f32(V);
1141 | 	vst1_f32(reinterpret_cast<float*>(pDestination), VL);
1142 | #elif defined(_XM_SSE_INTRINSICS_)
1143 | 	XMVECTOR T = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1));
1144 | 	_mm_store_ss(&pDestination->x, V);
1145 | 	_mm_store_ss(&pDestination->y, T);
1146 | #endif
1147 | }
1148 | 
1149 | //------------------------------------------------------------------------------
1150 | _Use_decl_annotations_
1151 | inline void XM_CALLCONV XMStoreFloat2A
1152 | (
1153 | 	XMFLOAT2A*   pDestination,
1154 | 	FXMVECTOR     V
1155 | )
1156 | {
1157 | 	assert(pDestination);
1158 | 	assert(((uintptr_t)pDestination & 0xF) == 0);
1159 | #if defined(_XM_NO_INTRINSICS_)
1160 | 	pDestination->x = V.vector4_f32[0];
1161 | 	pDestination->y = V.vector4_f32[1];
1162 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1163 | 	float32x2_t VL = vget_low_f32(V);
1164 | 	vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);
1165 | #elif defined(_XM_SSE_INTRINSICS_)
1166 | 	_mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V));
1167 | #endif
1168 | }
1169 | 
1170 | //------------------------------------------------------------------------------
1171 | _Use_decl_annotations_
1172 | inline void XM_CALLCONV XMStoreSInt2
1173 | (
1174 | 	XMINT2* pDestination,
1175 | 	FXMVECTOR V
1176 | )
1177 | {
1178 | 	assert(pDestination);
1179 | #if defined(_XM_NO_INTRINSICS_)
1180 | 	pDestination->x = (int32_t)V.vector4_f32[0];
1181 | 	pDestination->y = (int32_t)V.vector4_f32[1];
1182 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1183 | 	int32x2_t v = vget_low_s32(V);
1184 | 	v = vcvt_s32_f32(v);
1185 | 	vst1_s32(reinterpret_cast<int32_t*>(pDestination), v);
1186 | #elif defined(_XM_SSE_INTRINSICS_)
1187 | 	// In case of positive overflow, detect it
1188 | 	XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt);
1189 | 	// Float to int conversion
1190 | 	__m128i vResulti = _mm_cvttps_epi32(V);
1191 | 	// If there was positive overflow, set to 0x7FFFFFFF
1192 | 	XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask);
1193 | 	vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti));
1194 | 	vOverflow = _mm_or_ps(vOverflow, vResult);
1195 | 	// Write two ints
1196 | 	XMVECTOR T = XM_PERMUTE_PS(vOverflow, _MM_SHUFFLE(1, 1, 1, 1));
1197 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->x), vOverflow);
1198 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->y), T);
1199 | #endif
1200 | }
1201 | 
1202 | //------------------------------------------------------------------------------
1203 | _Use_decl_annotations_
1204 | inline void XM_CALLCONV XMStoreUInt2
1205 | (
1206 | 	XMUINT2* pDestination,
1207 | 	FXMVECTOR V
1208 | )
1209 | {
1210 | 	assert(pDestination);
1211 | #if defined(_XM_NO_INTRINSICS_)
1212 | 	pDestination->x = (uint32_t)V.vector4_f32[0];
1213 | 	pDestination->y = (uint32_t)V.vector4_f32[1];
1214 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1215 | 	float32x2_t v = vget_low_f32(V);
1216 | 	uint32x2_t iv = vcvt_u32_f32(v);
1217 | 	vst1_u32(reinterpret_cast<uint32_t*>(pDestination), iv);
1218 | #elif defined(_XM_SSE_INTRINSICS_)
1219 | 	// Clamp to >=0
1220 | 	XMVECTOR vResult = _mm_max_ps(V, g_XMZero);
1221 | 	// Any numbers that are too big, set to 0xFFFFFFFFU
1222 | 	XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt);
1223 | 	XMVECTOR vValue = g_XMUnsignedFix;
1224 | 	// Too large for a signed integer?
1225 | 	XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue);
1226 | 	// Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise
1227 | 	vValue = _mm_and_ps(vValue, vMask);
1228 | 	// Perform fixup only on numbers too large (Keeps low bit precision)
1229 | 	vResult = _mm_sub_ps(vResult, vValue);
1230 | 	__m128i vResulti = _mm_cvttps_epi32(vResult);
1231 | 	// Convert from signed to unsigned pnly if greater than 0x80000000
1232 | 	vMask = _mm_and_ps(vMask, g_XMNegativeZero);
1233 | 	vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask);
1234 | 	// On those that are too large, set to 0xFFFFFFFF
1235 | 	vResult = _mm_or_ps(vResult, vOverflow);
1236 | 	// Write two uints
1237 | 	XMVECTOR T = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(1, 1, 1, 1));
1238 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->x), vResult);
1239 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->y), T);
1240 | #endif
1241 | }
1242 | 
1243 | //------------------------------------------------------------------------------
1244 | _Use_decl_annotations_
1245 | inline void XM_CALLCONV XMStoreInt3
1246 | (
1247 | 	uint32_t*    pDestination,
1248 | 	FXMVECTOR V
1249 | )
1250 | {
1251 | 	assert(pDestination);
1252 | #if defined(_XM_NO_INTRINSICS_)
1253 | 	pDestination[0] = V.vector4_u32[0];
1254 | 	pDestination[1] = V.vector4_u32[1];
1255 | 	pDestination[2] = V.vector4_u32[2];
1256 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1257 | 	uint32x2_t VL = vget_low_u32(V);
1258 | 	vst1_u32(pDestination, VL);
1259 | 	vst1q_lane_u32(pDestination + 2, *reinterpret_cast<const uint32x4_t*>(&V), 2);
1260 | #elif defined(_XM_SSE_INTRINSICS_)
1261 | 	XMVECTOR T1 = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1));
1262 | 	XMVECTOR T2 = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2));
1263 | 	_mm_store_ss(reinterpret_cast<float*>(pDestination), V);
1264 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination[1]), T1);
1265 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination[2]), T2);
1266 | #endif
1267 | }
1268 | 
1269 | //------------------------------------------------------------------------------
1270 | _Use_decl_annotations_
1271 | inline void XM_CALLCONV XMStoreInt3A
1272 | (
1273 | 	uint32_t*    pDestination,
1274 | 	FXMVECTOR V
1275 | )
1276 | {
1277 | 	assert(pDestination);
1278 | 	assert(((uintptr_t)pDestination & 0xF) == 0);
1279 | #if defined(_XM_NO_INTRINSICS_)
1280 | 	pDestination[0] = V.vector4_u32[0];
1281 | 	pDestination[1] = V.vector4_u32[1];
1282 | 	pDestination[2] = V.vector4_u32[2];
1283 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1284 | 	uint32x2_t VL = vget_low_u32(V);
1285 | 	vst1_u32_ex(pDestination, VL, 64);
1286 | 	vst1q_lane_u32(pDestination + 2, *reinterpret_cast<const uint32x4_t*>(&V), 2);
1287 | #elif defined(_XM_SSE_INTRINSICS_)
1288 | 	XMVECTOR T = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2));
1289 | 	_mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V));
1290 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination[2]), T);
1291 | #endif
1292 | }
1293 | 
1294 | //------------------------------------------------------------------------------
1295 | _Use_decl_annotations_
1296 | inline void XM_CALLCONV XMStoreFloat3
1297 | (
1298 | 	XMFLOAT3* pDestination,
1299 | 	FXMVECTOR V
1300 | )
1301 | {
1302 | 	assert(pDestination);
1303 | #if defined(_XM_NO_INTRINSICS_)
1304 | 	pDestination->x = V.vector4_f32[0];
1305 | 	pDestination->y = V.vector4_f32[1];
1306 | 	pDestination->z = V.vector4_f32[2];
1307 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1308 | 	float32x2_t VL = vget_low_f32(V);
1309 | 	vst1_f32(reinterpret_cast<float*>(pDestination), VL);
1310 | 	vst1q_lane_f32(reinterpret_cast<float*>(pDestination) + 2, V, 2);
1311 | #elif defined(_XM_SSE_INTRINSICS_)
1312 | 	XMVECTOR T1 = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1));
1313 | 	XMVECTOR T2 = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2));
1314 | 	_mm_store_ss(&pDestination->x, V);
1315 | 	_mm_store_ss(&pDestination->y, T1);
1316 | 	_mm_store_ss(&pDestination->z, T2);
1317 | #endif
1318 | }
1319 | 
1320 | //------------------------------------------------------------------------------
1321 | _Use_decl_annotations_
1322 | inline void XM_CALLCONV XMStoreFloat3A
1323 | (
1324 | 	XMFLOAT3A*   pDestination,
1325 | 	FXMVECTOR     V
1326 | )
1327 | {
1328 | 	assert(pDestination);
1329 | 	assert(((uintptr_t)pDestination & 0xF) == 0);
1330 | #if defined(_XM_NO_INTRINSICS_)
1331 | 	pDestination->x = V.vector4_f32[0];
1332 | 	pDestination->y = V.vector4_f32[1];
1333 | 	pDestination->z = V.vector4_f32[2];
1334 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1335 | 	float32x2_t VL = vget_low_f32(V);
1336 | 	vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);
1337 | 	vst1q_lane_f32(reinterpret_cast<float*>(pDestination) + 2, V, 2);
1338 | #elif defined(_XM_SSE_INTRINSICS_)
1339 | 	XMVECTOR T = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2));
1340 | 	_mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V));
1341 | 	_mm_store_ss(&pDestination->z, T);
1342 | #endif
1343 | }
1344 | 
1345 | //------------------------------------------------------------------------------
1346 | _Use_decl_annotations_
1347 | inline void XM_CALLCONV XMStoreSInt3
1348 | (
1349 | 	XMINT3* pDestination,
1350 | 	FXMVECTOR V
1351 | )
1352 | {
1353 | 	assert(pDestination);
1354 | #if defined(_XM_NO_INTRINSICS_)
1355 | 	pDestination->x = (int32_t)V.vector4_f32[0];
1356 | 	pDestination->y = (int32_t)V.vector4_f32[1];
1357 | 	pDestination->z = (int32_t)V.vector4_f32[2];
1358 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1359 | 	int32x4_t v = vcvtq_s32_f32(V);
1360 | 	int32x2_t vL = vget_low_s32(v);
1361 | 	vst1_s32(reinterpret_cast<int32_t*>(pDestination), vL);
1362 | 	vst1q_lane_s32(reinterpret_cast<int32_t*>(pDestination) + 2, v, 2);
1363 | #elif defined(_XM_SSE_INTRINSICS_)
1364 | 	// In case of positive overflow, detect it
1365 | 	XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt);
1366 | 	// Float to int conversion
1367 | 	__m128i vResulti = _mm_cvttps_epi32(V);
1368 | 	// If there was positive overflow, set to 0x7FFFFFFF
1369 | 	XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask);
1370 | 	vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti));
1371 | 	vOverflow = _mm_or_ps(vOverflow, vResult);
1372 | 	// Write 3 uints
1373 | 	XMVECTOR T1 = XM_PERMUTE_PS(vOverflow, _MM_SHUFFLE(1, 1, 1, 1));
1374 | 	XMVECTOR T2 = XM_PERMUTE_PS(vOverflow, _MM_SHUFFLE(2, 2, 2, 2));
1375 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->x), vOverflow);
1376 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->y), T1);
1377 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->z), T2);
1378 | #endif
1379 | }
1380 | 
1381 | //------------------------------------------------------------------------------
1382 | _Use_decl_annotations_
1383 | inline void XM_CALLCONV XMStoreUInt3
1384 | (
1385 | 	XMUINT3* pDestination,
1386 | 	FXMVECTOR V
1387 | )
1388 | {
1389 | 	assert(pDestination);
1390 | #if defined(_XM_NO_INTRINSICS_)
1391 | 	pDestination->x = (uint32_t)V.vector4_f32[0];
1392 | 	pDestination->y = (uint32_t)V.vector4_f32[1];
1393 | 	pDestination->z = (uint32_t)V.vector4_f32[2];
1394 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1395 | 	uint32x4_t v = vcvtq_u32_f32(V);
1396 | 	uint32x2_t vL = vget_low_u32(v);
1397 | 	vst1_u32(reinterpret_cast<uint32_t*>(pDestination), vL);
1398 | 	vst1q_lane_u32(reinterpret_cast<uint32_t*>(pDestination) + 2, v, 2);
1399 | #elif defined(_XM_SSE_INTRINSICS_)
1400 | 	// Clamp to >=0
1401 | 	XMVECTOR vResult = _mm_max_ps(V, g_XMZero);
1402 | 	// Any numbers that are too big, set to 0xFFFFFFFFU
1403 | 	XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt);
1404 | 	XMVECTOR vValue = g_XMUnsignedFix;
1405 | 	// Too large for a signed integer?
1406 | 	XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue);
1407 | 	// Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise
1408 | 	vValue = _mm_and_ps(vValue, vMask);
1409 | 	// Perform fixup only on numbers too large (Keeps low bit precision)
1410 | 	vResult = _mm_sub_ps(vResult, vValue);
1411 | 	__m128i vResulti = _mm_cvttps_epi32(vResult);
1412 | 	// Convert from signed to unsigned pnly if greater than 0x80000000
1413 | 	vMask = _mm_and_ps(vMask, g_XMNegativeZero);
1414 | 	vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask);
1415 | 	// On those that are too large, set to 0xFFFFFFFF
1416 | 	vResult = _mm_or_ps(vResult, vOverflow);
1417 | 	// Write 3 uints
1418 | 	XMVECTOR T1 = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(1, 1, 1, 1));
1419 | 	XMVECTOR T2 = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(2, 2, 2, 2));
1420 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->x), vResult);
1421 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->y), T1);
1422 | 	_mm_store_ss(reinterpret_cast<float*>(&pDestination->z), T2);
1423 | #endif
1424 | }
1425 | 
1426 | //------------------------------------------------------------------------------
1427 | _Use_decl_annotations_
1428 | inline void XM_CALLCONV XMStoreInt4
1429 | (
1430 | 	uint32_t*    pDestination,
1431 | 	FXMVECTOR V
1432 | )
1433 | {
1434 | 	assert(pDestination);
1435 | #if defined(_XM_NO_INTRINSICS_)
1436 | 	pDestination[0] = V.vector4_u32[0];
1437 | 	pDestination[1] = V.vector4_u32[1];
1438 | 	pDestination[2] = V.vector4_u32[2];
1439 | 	pDestination[3] = V.vector4_u32[3];
1440 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1441 | 	vst1q_u32(pDestination, V);
1442 | #elif defined(_XM_SSE_INTRINSICS_)
1443 | 	_mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V));
1444 | #endif
1445 | }
1446 | 
1447 | //------------------------------------------------------------------------------
1448 | _Use_decl_annotations_
1449 | inline void XM_CALLCONV XMStoreInt4A
1450 | (
1451 | 	uint32_t*    pDestination,
1452 | 	FXMVECTOR V
1453 | )
1454 | {
1455 | 	assert(pDestination);
1456 | 	assert(((uintptr_t)pDestination & 0xF) == 0);
1457 | #if defined(_XM_NO_INTRINSICS_)
1458 | 	pDestination[0] = V.vector4_u32[0];
1459 | 	pDestination[1] = V.vector4_u32[1];
1460 | 	pDestination[2] = V.vector4_u32[2];
1461 | 	pDestination[3] = V.vector4_u32[3];
1462 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1463 | 	vst1q_u32_ex(pDestination, V, 128);
1464 | #elif defined(_XM_SSE_INTRINSICS_)
1465 | 	_mm_store_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V));
1466 | #endif
1467 | }
1468 | 
1469 | //------------------------------------------------------------------------------
1470 | _Use_decl_annotations_
1471 | inline void XM_CALLCONV XMStoreFloat4
1472 | (
1473 | 	XMFLOAT4* pDestination,
1474 | 	FXMVECTOR  V
1475 | )
1476 | {
1477 | 	assert(pDestination);
1478 | #if defined(_XM_NO_INTRINSICS_)
1479 | 	pDestination->x = V.vector4_f32[0];
1480 | 	pDestination->y = V.vector4_f32[1];
1481 | 	pDestination->z = V.vector4_f32[2];
1482 | 	pDestination->w = V.vector4_f32[3];
1483 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1484 | 	vst1q_f32(reinterpret_cast<float*>(pDestination), V);
1485 | #elif defined(_XM_SSE_INTRINSICS_)
1486 | 	_mm_storeu_ps(&pDestination->x, V);
1487 | #endif
1488 | }
1489 | 
1490 | //------------------------------------------------------------------------------
1491 | _Use_decl_annotations_
1492 | inline void XM_CALLCONV XMStoreFloat4A
1493 | (
1494 | 	XMFLOAT4A*   pDestination,
1495 | 	FXMVECTOR     V
1496 | )
1497 | {
1498 | 	assert(pDestination);
1499 | 	assert(((uintptr_t)pDestination & 0xF) == 0);
1500 | #if defined(_XM_NO_INTRINSICS_)
1501 | 	pDestination->x = V.vector4_f32[0];
1502 | 	pDestination->y = V.vector4_f32[1];
1503 | 	pDestination->z = V.vector4_f32[2];
1504 | 	pDestination->w = V.vector4_f32[3];
1505 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1506 | 	vst1q_f32_ex(reinterpret_cast<float*>(pDestination), V, 128);
1507 | #elif defined(_XM_SSE_INTRINSICS_)
1508 | 	_mm_store_ps(&pDestination->x, V);
1509 | #endif
1510 | }
1511 | 
1512 | //------------------------------------------------------------------------------
1513 | _Use_decl_annotations_
1514 | inline void XM_CALLCONV XMStoreSInt4
1515 | (
1516 | 	XMINT4* pDestination,
1517 | 	FXMVECTOR V
1518 | )
1519 | {
1520 | 	assert(pDestination);
1521 | #if defined(_XM_NO_INTRINSICS_)
1522 | 	pDestination->x = (int32_t)V.vector4_f32[0];
1523 | 	pDestination->y = (int32_t)V.vector4_f32[1];
1524 | 	pDestination->z = (int32_t)V.vector4_f32[2];
1525 | 	pDestination->w = (int32_t)V.vector4_f32[3];
1526 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1527 | 	int32x4_t v = vcvtq_s32_f32(V);
1528 | 	vst1q_s32(reinterpret_cast<int32_t*>(pDestination), v);
1529 | #elif defined(_XM_SSE_INTRINSICS_)
1530 | 	// In case of positive overflow, detect it
1531 | 	XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt);
1532 | 	// Float to int conversion
1533 | 	__m128i vResulti = _mm_cvttps_epi32(V);
1534 | 	// If there was positive overflow, set to 0x7FFFFFFF
1535 | 	XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask);
1536 | 	vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti));
1537 | 	vOverflow = _mm_or_ps(vOverflow, vResult);
1538 | 	_mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vOverflow));
1539 | #endif
1540 | }
1541 | 
1542 | //------------------------------------------------------------------------------
1543 | _Use_decl_annotations_
1544 | inline void XM_CALLCONV XMStoreUInt4
1545 | (
1546 | 	XMUINT4* pDestination,
1547 | 	FXMVECTOR V
1548 | )
1549 | {
1550 | 	assert(pDestination);
1551 | #if defined(_XM_NO_INTRINSICS_)
1552 | 	pDestination->x = (uint32_t)V.vector4_f32[0];
1553 | 	pDestination->y = (uint32_t)V.vector4_f32[1];
1554 | 	pDestination->z = (uint32_t)V.vector4_f32[2];
1555 | 	pDestination->w = (uint32_t)V.vector4_f32[3];
1556 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1557 | 	uint32x4_t v = vcvtq_u32_f32(V);
1558 | 	vst1q_u32(reinterpret_cast<uint32_t*>(pDestination), v);
1559 | #elif defined(_XM_SSE_INTRINSICS_)
1560 | 	// Clamp to >=0
1561 | 	XMVECTOR vResult = _mm_max_ps(V, g_XMZero);
1562 | 	// Any numbers that are too big, set to 0xFFFFFFFFU
1563 | 	XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt);
1564 | 	XMVECTOR vValue = g_XMUnsignedFix;
1565 | 	// Too large for a signed integer?
1566 | 	XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue);
1567 | 	// Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise
1568 | 	vValue = _mm_and_ps(vValue, vMask);
1569 | 	// Perform fixup only on numbers too large (Keeps low bit precision)
1570 | 	vResult = _mm_sub_ps(vResult, vValue);
1571 | 	__m128i vResulti = _mm_cvttps_epi32(vResult);
1572 | 	// Convert from signed to unsigned pnly if greater than 0x80000000
1573 | 	vMask = _mm_and_ps(vMask, g_XMNegativeZero);
1574 | 	vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask);
1575 | 	// On those that are too large, set to 0xFFFFFFFF
1576 | 	vResult = _mm_or_ps(vResult, vOverflow);
1577 | 	_mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vResult));
1578 | #endif
1579 | }
1580 | 
1581 | //------------------------------------------------------------------------------
1582 | _Use_decl_annotations_
1583 | inline void XM_CALLCONV XMStoreFloat3x3
1584 | (
1585 | 	XMFLOAT3X3*	pDestination,
1586 | 	FXMMATRIX	M
1587 | )
1588 | {
1589 | 	assert(pDestination);
1590 | #if defined(_XM_NO_INTRINSICS_)
1591 | 
1592 | 	pDestination->m[0][0] = M.c[0].vector4_f32[0];
1593 | 	pDestination->m[0][1] = M.c[0].vector4_f32[1];
1594 | 	pDestination->m[0][2] = M.c[0].vector4_f32[2];
1595 | 
1596 | 	pDestination->m[1][0] = M.c[1].vector4_f32[0];
1597 | 	pDestination->m[1][1] = M.c[1].vector4_f32[1];
1598 | 	pDestination->m[1][2] = M.c[1].vector4_f32[2];
1599 | 
1600 | 	pDestination->m[2][0] = M.c[2].vector4_f32[0];
1601 | 	pDestination->m[2][1] = M.c[2].vector4_f32[1];
1602 | 	pDestination->m[2][2] = M.c[2].vector4_f32[2];
1603 | 
1604 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1605 | 	float32x4_t T1 = vextq_f32(M.c[0], M.c[1], 1);
1606 | 	float32x4_t T2 = vbslq_f32(g_XMMask3, M.c[0], T1);
1607 | 	vst1q_f32(&pDestination->m[0][0], T2);
1608 | 
1609 | 	T1 = vextq_f32(M.c[1], M.c[1], 1);
1610 | 	T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.c[2]));
1611 | 	vst1q_f32(&pDestination->m[1][1], T2);
1612 | 
1613 | 	vst1q_lane_f32(&pDestination->m[2][2], M.c[2], 2);
1614 | #elif defined(_XM_SSE_INTRINSICS_)
1615 | 	XMVECTOR vTemp1 = M.c[0];
1616 | 	XMVECTOR vTemp2 = M.c[1];
1617 | 	XMVECTOR vTemp3 = M.c[2];
1618 | 	XMVECTOR vWork = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(0, 0, 2, 2));
1619 | 	vTemp1 = _mm_shuffle_ps(vTemp1, vWork, _MM_SHUFFLE(2, 0, 1, 0));
1620 | 	_mm_storeu_ps(&pDestination->m[0][0], vTemp1);
1621 | 	vTemp2 = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1));
1622 | 	_mm_storeu_ps(&pDestination->m[1][1], vTemp2);
1623 | 	vTemp3 = XM_PERMUTE_PS(vTemp3, _MM_SHUFFLE(2, 2, 2, 2));
1624 | 	_mm_store_ss(&pDestination->m[2][2], vTemp3);
1625 | #endif
1626 | }
1627 | 
1628 | //------------------------------------------------------------------------------
1629 | _Use_decl_annotations_
1630 | inline void XM_CALLCONV XMStoreFloat4x3
1631 | (
1632 | 	XMFLOAT4X3* pDestination,
1633 | 	FXMMATRIX M
1634 | )
1635 | {
1636 | 	assert(pDestination);
1637 | #if defined(_XM_NO_INTRINSICS_)
1638 | 
1639 | 	pDestination->m[0][0] = M.c[0].vector4_f32[0];
1640 | 	pDestination->m[0][1] = M.c[0].vector4_f32[1];
1641 | 	pDestination->m[0][2] = M.c[0].vector4_f32[2];
1642 | 	pDestination->m[0][3] = M.c[0].vector4_f32[3];
1643 | 
1644 | 	pDestination->m[1][0] = M.c[1].vector4_f32[0];
1645 | 	pDestination->m[1][1] = M.c[1].vector4_f32[1];
1646 | 	pDestination->m[1][2] = M.c[1].vector4_f32[2];
1647 | 	pDestination->m[1][3] = M.c[1].vector4_f32[3];
1648 | 
1649 | 	pDestination->m[2][0] = M.c[2].vector4_f32[0];
1650 | 	pDestination->m[2][1] = M.c[2].vector4_f32[1];
1651 | 	pDestination->m[2][2] = M.c[2].vector4_f32[2];
1652 | 	pDestination->m[2][3] = M.c[2].vector4_f32[3];
1653 | 
1654 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1655 | 	vst1q_f32(reinterpret_cast<float*>(&pDestination->_11), M.c[0]);
1656 | 	vst1q_f32(reinterpret_cast<float*>(&pDestination->_12), M.c[1]);
1657 | 	vst1q_f32(reinterpret_cast<float*>(&pDestination->_13), M.c[2]);
1658 | #elif defined(_XM_SSE_INTRINSICS_)
1659 | 	_mm_storeu_ps(&pDestination->_11, M.c[0]);
1660 | 	_mm_storeu_ps(&pDestination->_12, M.c[1]);
1661 | 	_mm_storeu_ps(&pDestination->_13, M.c[2]);
1662 | #endif
1663 | }
1664 | 
1665 | //------------------------------------------------------------------------------
1666 | _Use_decl_annotations_
1667 | inline void XM_CALLCONV XMStoreFloat4x3A
1668 | (
1669 | 	XMFLOAT4X3A*	pDestination,
1670 | 	FXMMATRIX		M
1671 | )
1672 | {
1673 | 	assert(pDestination);
1674 | 	assert(((uintptr_t)pDestination & 0xF) == 0);
1675 | #if defined(_XM_NO_INTRINSICS_)
1676 | 
1677 | 	pDestination->m[0][0] = M.c[0].vector4_f32[0];
1678 | 	pDestination->m[0][1] = M.c[0].vector4_f32[1];
1679 | 	pDestination->m[0][2] = M.c[0].vector4_f32[2];
1680 | 	pDestination->m[0][3] = M.c[0].vector4_f32[3];
1681 | 
1682 | 	pDestination->m[1][0] = M.c[1].vector4_f32[0];
1683 | 	pDestination->m[1][1] = M.c[1].vector4_f32[1];
1684 | 	pDestination->m[1][2] = M.c[1].vector4_f32[2];
1685 | 	pDestination->m[1][3] = M.c[1].vector4_f32[3];
1686 | 
1687 | 	pDestination->m[2][0] = M.c[2].vector4_f32[0];
1688 | 	pDestination->m[2][1] = M.c[2].vector4_f32[1];
1689 | 	pDestination->m[2][2] = M.c[2].vector4_f32[2];
1690 | 	pDestination->m[2][3] = M.c[2].vector4_f32[3];
1691 | 
1692 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1693 | 	vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_11), M.c[0], 128);
1694 | 	vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_12), M.c[1], 128);
1695 | 	vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_13), M.c[2], 128);
1696 | #elif defined(_XM_SSE_INTRINSICS_)
1697 | 	_mm_store_ps(&pDestination->_11, M.c[0]);
1698 | 	_mm_store_ps(&pDestination->_12, M.c[1]);
1699 | 	_mm_store_ps(&pDestination->_13, M.c[2]);
1700 | #endif
1701 | }
1702 | 
1703 | //------------------------------------------------------------------------------
1704 | _Use_decl_annotations_
1705 | inline void XM_CALLCONV XMStoreFloat4x4
1706 | (
1707 | 	XMFLOAT4X4* pDestination,
1708 | 	FXMMATRIX M
1709 | )
1710 | {
1711 | 	assert(pDestination);
1712 | #if defined(_XM_NO_INTRINSICS_)
1713 | 
1714 | 	pDestination->m[0][0] = M.c[0].vector4_f32[0];
1715 | 	pDestination->m[0][1] = M.c[0].vector4_f32[1];
1716 | 	pDestination->m[0][2] = M.c[0].vector4_f32[2];
1717 | 	pDestination->m[0][3] = M.c[0].vector4_f32[3];
1718 | 
1719 | 	pDestination->m[1][0] = M.c[1].vector4_f32[0];
1720 | 	pDestination->m[1][1] = M.c[1].vector4_f32[1];
1721 | 	pDestination->m[1][2] = M.c[1].vector4_f32[2];
1722 | 	pDestination->m[1][3] = M.c[1].vector4_f32[3];
1723 | 
1724 | 	pDestination->m[2][0] = M.c[2].vector4_f32[0];
1725 | 	pDestination->m[2][1] = M.c[2].vector4_f32[1];
1726 | 	pDestination->m[2][2] = M.c[2].vector4_f32[2];
1727 | 	pDestination->m[2][3] = M.c[2].vector4_f32[3];
1728 | 
1729 | 	pDestination->m[3][0] = M.c[3].vector4_f32[0];
1730 | 	pDestination->m[3][1] = M.c[3].vector4_f32[1];
1731 | 	pDestination->m[3][2] = M.c[3].vector4_f32[2];
1732 | 	pDestination->m[3][3] = M.c[3].vector4_f32[3];
1733 | 
1734 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1735 | 	vst1q_f32(reinterpret_cast<float*>(&pDestination->_11), M.c[0]);
1736 | 	vst1q_f32(reinterpret_cast<float*>(&pDestination->_12), M.c[1]);
1737 | 	vst1q_f32(reinterpret_cast<float*>(&pDestination->_13), M.c[2]);
1738 | 	vst1q_f32(reinterpret_cast<float*>(&pDestination->_14), M.c[3]);
1739 | #elif defined(_XM_SSE_INTRINSICS_)
1740 | 	_mm_storeu_ps(&pDestination->_11, M.c[0]);
1741 | 	_mm_storeu_ps(&pDestination->_12, M.c[1]);
1742 | 	_mm_storeu_ps(&pDestination->_13, M.c[2]);
1743 | 	_mm_storeu_ps(&pDestination->_14, M.c[3]);
1744 | #endif
1745 | }
1746 | 
1747 | //------------------------------------------------------------------------------
1748 | _Use_decl_annotations_
1749 | inline void XM_CALLCONV XMStoreFloat4x4A
1750 | (
1751 | 	XMFLOAT4X4A*	pDestination,
1752 | 	FXMMATRIX		M
1753 | )
1754 | {
1755 | 	assert(pDestination);
1756 | 	assert(((uintptr_t)pDestination & 0xF) == 0);
1757 | #if defined(_XM_NO_INTRINSICS_)
1758 | 
1759 | 	pDestination->m[0][0] = M.c[0].vector4_f32[0];
1760 | 	pDestination->m[0][1] = M.c[0].vector4_f32[1];
1761 | 	pDestination->m[0][2] = M.c[0].vector4_f32[2];
1762 | 	pDestination->m[0][3] = M.c[0].vector4_f32[3];
1763 | 
1764 | 	pDestination->m[1][0] = M.c[1].vector4_f32[0];
1765 | 	pDestination->m[1][1] = M.c[1].vector4_f32[1];
1766 | 	pDestination->m[1][2] = M.c[1].vector4_f32[2];
1767 | 	pDestination->m[1][3] = M.c[1].vector4_f32[3];
1768 | 
1769 | 	pDestination->m[2][0] = M.c[2].vector4_f32[0];
1770 | 	pDestination->m[2][1] = M.c[2].vector4_f32[1];
1771 | 	pDestination->m[2][2] = M.c[2].vector4_f32[2];
1772 | 	pDestination->m[2][3] = M.c[2].vector4_f32[3];
1773 | 
1774 | 	pDestination->m[3][0] = M.c[3].vector4_f32[0];
1775 | 	pDestination->m[3][1] = M.c[3].vector4_f32[1];
1776 | 	pDestination->m[3][2] = M.c[3].vector4_f32[2];
1777 | 	pDestination->m[3][3] = M.c[3].vector4_f32[3];
1778 | 
1779 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1780 | 	vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_11), M.c[0], 128);
1781 | 	vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_12), M.c[1], 128);
1782 | 	vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_13), M.c[2], 128);
1783 | 	vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_14), M.c[3], 128);
1784 | #elif defined(_XM_SSE_INTRINSICS_)
1785 | 	_mm_store_ps(&pDestination->_11, M.c[0]);
1786 | 	_mm_store_ps(&pDestination->_12, M.c[1]);
1787 | 	_mm_store_ps(&pDestination->_13, M.c[2]);
1788 | 	_mm_store_ps(&pDestination->_14, M.c[3]);
1789 | #endif
1790 | }
1791 | 


--------------------------------------------------------------------------------
/Inc/XMathMisc.inl:
--------------------------------------------------------------------------------
   1 | //-------------------------------------------------------------------------------------
   2 | // DirectXMathMisc.inl -- SIMD C++ Math library
   3 | //
   4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
   5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
   6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
   7 | // PARTICULAR PURPOSE.
   8 | //  
   9 | // Copyright (c) Microsoft Corporation. All rights reserved.
  10 | //-------------------------------------------------------------------------------------
  11 | 
  12 | #pragma once
  13 | 
  14 | /****************************************************************************
  15 | *
  16 | * Quaternion
  17 | *
  18 | ****************************************************************************/
  19 | 
  20 | //------------------------------------------------------------------------------
  21 | // Comparison operations
  22 | //------------------------------------------------------------------------------
  23 | 
  24 | //------------------------------------------------------------------------------
  25 | 
  26 | inline bool XM_CALLCONV XMQuaternionEqual
  27 | (
  28 | 	FXMVECTOR Q1,
  29 | 	FXMVECTOR Q2
  30 | )
  31 | {
  32 | 	return XMVector4Equal(Q1, Q2);
  33 | }
  34 | 
  35 | //------------------------------------------------------------------------------
  36 | 
  37 | inline bool XM_CALLCONV XMQuaternionNotEqual
  38 | (
  39 | 	FXMVECTOR Q1,
  40 | 	FXMVECTOR Q2
  41 | )
  42 | {
  43 | 	return XMVector4NotEqual(Q1, Q2);
  44 | }
  45 | 
  46 | //------------------------------------------------------------------------------
  47 | 
  48 | inline bool XM_CALLCONV XMQuaternionIsNaN
  49 | (
  50 | 	FXMVECTOR Q
  51 | )
  52 | {
  53 | 	return XMVector4IsNaN(Q);
  54 | }
  55 | 
  56 | //------------------------------------------------------------------------------
  57 | 
  58 | inline bool XM_CALLCONV XMQuaternionIsInfinite
  59 | (
  60 | 	FXMVECTOR Q
  61 | )
  62 | {
  63 | 	return XMVector4IsInfinite(Q);
  64 | }
  65 | 
  66 | //------------------------------------------------------------------------------
  67 | 
  68 | inline bool XM_CALLCONV XMQuaternionIsIdentity
  69 | (
  70 | 	FXMVECTOR Q
  71 | )
  72 | {
  73 | 	return XMVector4Equal(Q, g_XMIdentityR3.v);
  74 | }
  75 | 
  76 | //------------------------------------------------------------------------------
  77 | // Computation operations
  78 | //------------------------------------------------------------------------------
  79 | 
  80 | //------------------------------------------------------------------------------
  81 | 
  82 | inline XMVECTOR XM_CALLCONV XMQuaternionDot
  83 | (
  84 | 	FXMVECTOR Q1,
  85 | 	FXMVECTOR Q2
  86 | )
  87 | {
  88 | 	return XMVector4Dot(Q1, Q2);
  89 | }
  90 | 
  91 | //------------------------------------------------------------------------------
  92 | 
  93 | inline XMVECTOR XM_CALLCONV XMQuaternionMultiply
  94 | (
  95 | 	FXMVECTOR Q1,
  96 | 	FXMVECTOR Q2
  97 | )
  98 | {
  99 | 	// Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2)
 100 | 
 101 | 	// [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y),
 102 | 	//   (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x),
 103 | 	//   (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w),
 104 | 	//   (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ]
 105 | 
 106 | #if defined(_XM_NO_INTRINSICS_)
 107 | 	XMVECTOR Result = {
 108 | 		(Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]),
 109 | 		(Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]),
 110 | 		(Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]),
 111 | 		(Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2]) };
 112 | 	return Result;
 113 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 114 | 	static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f };
 115 | 	static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f };
 116 | 	static const XMVECTORF32 ControlYXWZ = { -1.0f, 1.0f, 1.0f,-1.0f };
 117 | 
 118 | 	float32x2_t Q2L = vget_low_f32(Q2);
 119 | 	float32x2_t Q2H = vget_high_f32(Q2);
 120 | 
 121 | 	float32x4_t Q2X = vdupq_lane_f32(Q2L, 0);
 122 | 	float32x4_t Q2Y = vdupq_lane_f32(Q2L, 1);
 123 | 	float32x4_t Q2Z = vdupq_lane_f32(Q2H, 0);
 124 | 	XMVECTOR vResult = vmulq_lane_f32(Q1, Q2H, 1);
 125 | 
 126 | 	// Mul by Q1WZYX
 127 | 	float32x4_t vTemp = vrev64q_f32(Q1);
 128 | 	vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp));
 129 | 	Q2X = vmulq_f32(Q2X, vTemp);
 130 | 	vResult = vmlaq_f32(vResult, Q2X, ControlWZYX);
 131 | 
 132 | 	// Mul by Q1ZWXY
 133 | 	vTemp = vrev64q_u32(vTemp);
 134 | 	Q2Y = vmulq_f32(Q2Y, vTemp);
 135 | 	vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY);
 136 | 
 137 | 	// Mul by Q1YXWZ
 138 | 	vTemp = vrev64q_u32(vTemp);
 139 | 	vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp));
 140 | 	Q2Z = vmulq_f32(Q2Z, vTemp);
 141 | 	vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ);
 142 | 	return vResult;
 143 | #elif defined(_XM_SSE_INTRINSICS_)
 144 | 	static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f };
 145 | 	static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f };
 146 | 	static const XMVECTORF32 ControlYXWZ = { -1.0f, 1.0f, 1.0f,-1.0f };
 147 | 	// Copy to SSE registers and use as few as possible for x86
 148 | 	XMVECTOR Q2X = Q2;
 149 | 	XMVECTOR Q2Y = Q2;
 150 | 	XMVECTOR Q2Z = Q2;
 151 | 	XMVECTOR vResult = Q2;
 152 | 	// Splat with one instruction
 153 | 	vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 3, 3, 3));
 154 | 	Q2X = XM_PERMUTE_PS(Q2X, _MM_SHUFFLE(0, 0, 0, 0));
 155 | 	Q2Y = XM_PERMUTE_PS(Q2Y, _MM_SHUFFLE(1, 1, 1, 1));
 156 | 	Q2Z = XM_PERMUTE_PS(Q2Z, _MM_SHUFFLE(2, 2, 2, 2));
 157 | 	// Retire Q1 and perform Q1*Q2W
 158 | 	vResult = _mm_mul_ps(vResult, Q1);
 159 | 	XMVECTOR Q1Shuffle = Q1;
 160 | 	// Shuffle the copies of Q1
 161 | 	Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
 162 | 	// Mul by Q1WZYX
 163 | 	Q2X = _mm_mul_ps(Q2X, Q1Shuffle);
 164 | 	Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1));
 165 | 	// Flip the signs on y and z
 166 | 	Q2X = _mm_mul_ps(Q2X, ControlWZYX);
 167 | 	// Mul by Q1ZWXY
 168 | 	Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle);
 169 | 	Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
 170 | 	// Flip the signs on z and w
 171 | 	Q2Y = _mm_mul_ps(Q2Y, ControlZWXY);
 172 | 	// Mul by Q1YXWZ
 173 | 	Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle);
 174 | 	vResult = _mm_add_ps(vResult, Q2X);
 175 | 	// Flip the signs on x and w
 176 | 	Q2Z = _mm_mul_ps(Q2Z, ControlYXWZ);
 177 | 	Q2Y = _mm_add_ps(Q2Y, Q2Z);
 178 | 	vResult = _mm_add_ps(vResult, Q2Y);
 179 | 	return vResult;
 180 | #endif
 181 | }
 182 | 
 183 | //------------------------------------------------------------------------------
 184 | 
 185 | inline XMVECTOR XM_CALLCONV XMQuaternionLengthSq
 186 | (
 187 | 	FXMVECTOR Q
 188 | )
 189 | {
 190 | 	return XMVector4LengthSq(Q);
 191 | }
 192 | 
 193 | //------------------------------------------------------------------------------
 194 | 
 195 | inline XMVECTOR XM_CALLCONV XMQuaternionReciprocalLength
 196 | (
 197 | 	FXMVECTOR Q
 198 | )
 199 | {
 200 | 	return XMVector4ReciprocalLength(Q);
 201 | }
 202 | 
 203 | //------------------------------------------------------------------------------
 204 | 
 205 | inline XMVECTOR XM_CALLCONV XMQuaternionLength
 206 | (
 207 | 	FXMVECTOR Q
 208 | )
 209 | {
 210 | 	return XMVector4Length(Q);
 211 | }
 212 | 
 213 | //------------------------------------------------------------------------------
 214 | 
 215 | inline XMVECTOR XM_CALLCONV XMQuaternionNormalizeEst
 216 | (
 217 | 	FXMVECTOR Q
 218 | )
 219 | {
 220 | 	return XMVector4NormalizeEst(Q);
 221 | }
 222 | 
 223 | //------------------------------------------------------------------------------
 224 | 
 225 | inline XMVECTOR XM_CALLCONV XMQuaternionNormalize
 226 | (
 227 | 	FXMVECTOR Q
 228 | )
 229 | {
 230 | 	return XMVector4Normalize(Q);
 231 | }
 232 | 
 233 | //------------------------------------------------------------------------------
 234 | 
 235 | inline XMVECTOR XM_CALLCONV XMQuaternionConjugate
 236 | (
 237 | 	FXMVECTOR Q
 238 | )
 239 | {
 240 | #if defined(_XM_NO_INTRINSICS_)
 241 | 	XMVECTOR Result = {
 242 | 		-Q.vector4_f32[0],
 243 | 		-Q.vector4_f32[1],
 244 | 		-Q.vector4_f32[2],
 245 | 		Q.vector4_f32[3]
 246 | 	};
 247 | 	return Result;
 248 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 249 | 	static const XMVECTORF32 NegativeOne3 = { -1.0f,-1.0f,-1.0f,1.0f };
 250 | 	return vmulq_f32(Q, NegativeOne3.v);
 251 | #elif defined(_XM_SSE_INTRINSICS_)
 252 | 	static const XMVECTORF32 NegativeOne3 = { -1.0f,-1.0f,-1.0f,1.0f };
 253 | 	return _mm_mul_ps(Q, NegativeOne3);
 254 | #endif
 255 | }
 256 | 
 257 | //------------------------------------------------------------------------------
 258 | 
 259 | inline XMVECTOR XM_CALLCONV XMQuaternionInverse
 260 | (
 261 | 	FXMVECTOR Q
 262 | )
 263 | {
 264 | 	const XMVECTOR  Zero = XMVectorZero();
 265 | 
 266 | 	XMVECTOR L = XMVector4LengthSq(Q);
 267 | 	XMVECTOR Conjugate = XMQuaternionConjugate(Q);
 268 | 
 269 | 	XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
 270 | 
 271 | 	XMVECTOR Result = XMVectorDivide(Conjugate, L);
 272 | 
 273 | 	Result = XMVectorSelect(Result, Zero, Control);
 274 | 
 275 | 	return Result;
 276 | }
 277 | 
 278 | //------------------------------------------------------------------------------
 279 | 
 280 | inline XMVECTOR XM_CALLCONV XMQuaternionLn
 281 | (
 282 | 	FXMVECTOR Q
 283 | )
 284 | {
 285 | 	static const XMVECTORF32 OneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f };
 286 | 
 287 | 	XMVECTOR QW = XMVectorSplatW(Q);
 288 | 	XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
 289 | 
 290 | 	XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v);
 291 | 
 292 | 	XMVECTOR Theta = XMVectorACos(QW);
 293 | 	XMVECTOR SinTheta = XMVectorSin(Theta);
 294 | 
 295 | 	XMVECTOR S = XMVectorDivide(Theta, SinTheta);
 296 | 
 297 | 	XMVECTOR Result = XMVectorMultiply(Q0, S);
 298 | 	Result = XMVectorSelect(Q0, Result, ControlW);
 299 | 
 300 | 	return Result;
 301 | }
 302 | 
 303 | //------------------------------------------------------------------------------
 304 | 
 305 | inline XMVECTOR XM_CALLCONV XMQuaternionExp
 306 | (
 307 | 	FXMVECTOR Q
 308 | )
 309 | {
 310 | 	XMVECTOR Theta = XMVector3Length(Q);
 311 | 
 312 | 	XMVECTOR SinTheta, CosTheta;
 313 | 	XMVectorSinCos(&SinTheta, &CosTheta, Theta);
 314 | 
 315 | 	XMVECTOR S = XMVectorDivide(SinTheta, Theta);
 316 | 
 317 | 	XMVECTOR Result = XMVectorMultiply(Q, S);
 318 | 
 319 | 	const XMVECTOR Zero = XMVectorZero();
 320 | 	XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
 321 | 	Result = XMVectorSelect(Result, Q, Control);
 322 | 
 323 | 	Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
 324 | 
 325 | 	return Result;
 326 | }
 327 | 
 328 | //------------------------------------------------------------------------------
 329 | 
 330 | inline XMVECTOR XM_CALLCONV XMQuaternionSlerp
 331 | (
 332 | 	FXMVECTOR Q0,
 333 | 	FXMVECTOR Q1,
 334 | 	float    t
 335 | )
 336 | {
 337 | 	XMVECTOR T = XMVectorReplicate(t);
 338 | 	return XMQuaternionSlerpV(Q0, Q1, T);
 339 | }
 340 | 
 341 | //------------------------------------------------------------------------------
 342 | 
 343 | inline XMVECTOR XM_CALLCONV XMQuaternionSlerpV
 344 | (
 345 | 	FXMVECTOR Q0,
 346 | 	FXMVECTOR Q1,
 347 | 	FXMVECTOR T
 348 | )
 349 | {
 350 | 	assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
 351 | 
 352 | 	// Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
 353 | 
 354 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
 355 | 
 356 | 	const XMVECTORF32 OneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f };
 357 | 
 358 | 	XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
 359 | 
 360 | 	const XMVECTOR Zero = XMVectorZero();
 361 | 	XMVECTOR Control = XMVectorLess(CosOmega, Zero);
 362 | 	XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
 363 | 
 364 | 	CosOmega = XMVectorMultiply(CosOmega, Sign);
 365 | 
 366 | 	Control = XMVectorLess(CosOmega, OneMinusEpsilon);
 367 | 
 368 | 	XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
 369 | 	SinOmega = XMVectorSqrt(SinOmega);
 370 | 
 371 | 	XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
 372 | 
 373 | 	XMVECTOR SignMask = XMVectorSplatSignMask();
 374 | 	XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2);
 375 | 	SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
 376 | 	V01 = XMVectorXorInt(V01, SignMask);
 377 | 	V01 = XMVectorAdd(g_XMIdentityR0.v, V01);
 378 | 
 379 | 	XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega);
 380 | 
 381 | 	XMVECTOR S0 = XMVectorMultiply(V01, Omega);
 382 | 	S0 = XMVectorSin(S0);
 383 | 	S0 = XMVectorMultiply(S0, InvSinOmega);
 384 | 
 385 | 	S0 = XMVectorSelect(V01, S0, Control);
 386 | 
 387 | 	XMVECTOR S1 = XMVectorSplatY(S0);
 388 | 	S0 = XMVectorSplatX(S0);
 389 | 
 390 | 	S1 = XMVectorMultiply(S1, Sign);
 391 | 
 392 | 	XMVECTOR Result = XMVectorMultiply(Q0, S0);
 393 | 	Result = XMVectorMultiplyAdd(Q1, S1, Result);
 394 | 
 395 | 	return Result;
 396 | 
 397 | #elif defined(_XM_SSE_INTRINSICS_)
 398 | 	static const XMVECTORF32 OneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f };
 399 | 	static const XMVECTORU32 SignMask2 = { 0x80000000,0x00000000,0x00000000,0x00000000 };
 400 | 
 401 | 	XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
 402 | 
 403 | 	const XMVECTOR Zero = XMVectorZero();
 404 | 	XMVECTOR Control = XMVectorLess(CosOmega, Zero);
 405 | 	XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
 406 | 
 407 | 	CosOmega = _mm_mul_ps(CosOmega, Sign);
 408 | 
 409 | 	Control = XMVectorLess(CosOmega, OneMinusEpsilon);
 410 | 
 411 | 	XMVECTOR SinOmega = _mm_mul_ps(CosOmega, CosOmega);
 412 | 	SinOmega = _mm_sub_ps(g_XMOne, SinOmega);
 413 | 	SinOmega = _mm_sqrt_ps(SinOmega);
 414 | 
 415 | 	XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
 416 | 
 417 | 	XMVECTOR V01 = XM_PERMUTE_PS(T, _MM_SHUFFLE(2, 3, 0, 1));
 418 | 	V01 = _mm_and_ps(V01, g_XMMaskXY);
 419 | 	V01 = _mm_xor_ps(V01, SignMask2);
 420 | 	V01 = _mm_add_ps(g_XMIdentityR0, V01);
 421 | 
 422 | 	XMVECTOR S0 = _mm_mul_ps(V01, Omega);
 423 | 	S0 = XMVectorSin(S0);
 424 | 	S0 = _mm_div_ps(S0, SinOmega);
 425 | 
 426 | 	S0 = XMVectorSelect(V01, S0, Control);
 427 | 
 428 | 	XMVECTOR S1 = XMVectorSplatY(S0);
 429 | 	S0 = XMVectorSplatX(S0);
 430 | 
 431 | 	S1 = _mm_mul_ps(S1, Sign);
 432 | 	XMVECTOR Result = _mm_mul_ps(Q0, S0);
 433 | 	S1 = _mm_mul_ps(S1, Q1);
 434 | 	Result = _mm_add_ps(Result, S1);
 435 | 	return Result;
 436 | #endif
 437 | }
 438 | 
 439 | //------------------------------------------------------------------------------
 440 | 
 441 | inline XMVECTOR XM_CALLCONV XMQuaternionSquad
 442 | (
 443 | 	FXMVECTOR Q0,
 444 | 	FXMVECTOR Q1,
 445 | 	FXMVECTOR Q2,
 446 | 	GXMVECTOR Q3,
 447 | 	float    t
 448 | )
 449 | {
 450 | 	XMVECTOR T = XMVectorReplicate(t);
 451 | 	return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
 452 | }
 453 | 
 454 | //------------------------------------------------------------------------------
 455 | 
 456 | inline XMVECTOR XM_CALLCONV XMQuaternionSquadV
 457 | (
 458 | 	FXMVECTOR Q0,
 459 | 	FXMVECTOR Q1,
 460 | 	FXMVECTOR Q2,
 461 | 	GXMVECTOR Q3,
 462 | 	HXMVECTOR T
 463 | )
 464 | {
 465 | 	assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
 466 | 
 467 | 	XMVECTOR TP = T;
 468 | 	const XMVECTOR Two = XMVectorSplatConstant(2, 0);
 469 | 
 470 | 	XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T);
 471 | 	XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T);
 472 | 
 473 | 	TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
 474 | 	TP = XMVectorMultiply(TP, Two);
 475 | 
 476 | 	XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP);
 477 | 
 478 | 	return Result;
 479 | }
 480 | 
 481 | //------------------------------------------------------------------------------
 482 | _Use_decl_annotations_
 483 | inline void XM_CALLCONV XMQuaternionSquadSetup
 484 | (
 485 | 	XMVECTOR* pA,
 486 | 	XMVECTOR* pB,
 487 | 	XMVECTOR* pC,
 488 | 	FXMVECTOR  Q0,
 489 | 	FXMVECTOR  Q1,
 490 | 	FXMVECTOR  Q2,
 491 | 	GXMVECTOR  Q3
 492 | )
 493 | {
 494 | 	assert(pA);
 495 | 	assert(pB);
 496 | 	assert(pC);
 497 | 
 498 | 	XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
 499 | 	XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
 500 | 	XMVECTOR SQ2 = XMVectorNegate(Q2);
 501 | 
 502 | 	XMVECTOR Control1 = XMVectorLess(LS12, LD12);
 503 | 	SQ2 = XMVectorSelect(Q2, SQ2, Control1);
 504 | 
 505 | 	XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
 506 | 	XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
 507 | 	XMVECTOR SQ0 = XMVectorNegate(Q0);
 508 | 
 509 | 	XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
 510 | 	XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
 511 | 	XMVECTOR SQ3 = XMVectorNegate(Q3);
 512 | 
 513 | 	XMVECTOR Control0 = XMVectorLess(LS01, LD01);
 514 | 	XMVECTOR Control2 = XMVectorLess(LS23, LD23);
 515 | 
 516 | 	SQ0 = XMVectorSelect(Q0, SQ0, Control0);
 517 | 	SQ3 = XMVectorSelect(Q3, SQ3, Control2);
 518 | 
 519 | 	XMVECTOR InvQ1 = XMQuaternionInverse(Q1);
 520 | 	XMVECTOR InvQ2 = XMQuaternionInverse(SQ2);
 521 | 
 522 | 	XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
 523 | 	XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
 524 | 	XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
 525 | 	XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
 526 | 
 527 | 	const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
 528 | 
 529 | 	XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
 530 | 	XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
 531 | 	ExpQ02 = XMQuaternionExp(ExpQ02);
 532 | 	ExpQ13 = XMQuaternionExp(ExpQ13);
 533 | 
 534 | 	*pA = XMQuaternionMultiply(Q1, ExpQ02);
 535 | 	*pB = XMQuaternionMultiply(SQ2, ExpQ13);
 536 | 	*pC = SQ2;
 537 | }
 538 | 
 539 | //------------------------------------------------------------------------------
 540 | 
 541 | inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentric
 542 | (
 543 | 	FXMVECTOR Q0,
 544 | 	FXMVECTOR Q1,
 545 | 	FXMVECTOR Q2,
 546 | 	float    f,
 547 | 	float    g
 548 | )
 549 | {
 550 | 	float s = f + g;
 551 | 
 552 | 	XMVECTOR Result;
 553 | 	if ((s < 0.00001f) && (s > -0.00001f))
 554 | 	{
 555 | 		Result = Q0;
 556 | 	}
 557 | 	else
 558 | 	{
 559 | 		XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s);
 560 | 		XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s);
 561 | 
 562 | 		Result = XMQuaternionSlerp(Q01, Q02, g / s);
 563 | 	}
 564 | 
 565 | 	return Result;
 566 | }
 567 | 
 568 | //------------------------------------------------------------------------------
 569 | 
 570 | inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV
 571 | (
 572 | 	FXMVECTOR Q0,
 573 | 	FXMVECTOR Q1,
 574 | 	FXMVECTOR Q2,
 575 | 	GXMVECTOR F,
 576 | 	HXMVECTOR G
 577 | )
 578 | {
 579 | 	assert((XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)));
 580 | 	assert((XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)));
 581 | 
 582 | 	const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16);
 583 | 
 584 | 	XMVECTOR S = XMVectorAdd(F, G);
 585 | 
 586 | 	XMVECTOR Result;
 587 | 	if (XMVector4InBounds(S, Epsilon))
 588 | 	{
 589 | 		Result = Q0;
 590 | 	}
 591 | 	else
 592 | 	{
 593 | 		XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S);
 594 | 		XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S);
 595 | 		XMVECTOR GS = XMVectorReciprocal(S);
 596 | 		GS = XMVectorMultiply(G, GS);
 597 | 
 598 | 		Result = XMQuaternionSlerpV(Q01, Q02, GS);
 599 | 	}
 600 | 
 601 | 	return Result;
 602 | }
 603 | 
 604 | //------------------------------------------------------------------------------
 605 | // Transformation operations
 606 | //------------------------------------------------------------------------------
 607 | 
 608 | //------------------------------------------------------------------------------
 609 | 
 610 | inline XMVECTOR XM_CALLCONV XMQuaternionIdentity()
 611 | {
 612 | 	return g_XMIdentityR3.v;
 613 | }
 614 | 
 615 | //------------------------------------------------------------------------------
 616 | 
 617 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYaw
 618 | (
 619 | 	float Pitch,
 620 | 	float Yaw,
 621 | 	float Roll
 622 | )
 623 | {
 624 | 	XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
 625 | 	XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
 626 | 	return Q;
 627 | }
 628 | 
 629 | //------------------------------------------------------------------------------
 630 | 
 631 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector
 632 | (
 633 | 	FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
 634 | )
 635 | {
 636 | 	static const XMVECTORF32  Sign = { 1.0f, -1.0f, -1.0f, 1.0f };
 637 | 
 638 | 	XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
 639 | 
 640 | 	XMVECTOR SinAngles, CosAngles;
 641 | 	XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
 642 | 
 643 | 	XMVECTOR P0 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(SinAngles, CosAngles);
 644 | 	XMVECTOR Y0 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(SinAngles, CosAngles);
 645 | 	XMVECTOR R0 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(SinAngles, CosAngles);
 646 | 	XMVECTOR P1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(CosAngles, SinAngles);
 647 | 	XMVECTOR Y1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(CosAngles, SinAngles);
 648 | 	XMVECTOR R1 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(CosAngles, SinAngles);
 649 | 
 650 | 	XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v);
 651 | 	XMVECTOR Q0 = XMVectorMultiply(P0, Y0);
 652 | 	Q1 = XMVectorMultiply(Q1, Y1);
 653 | 	Q0 = XMVectorMultiply(Q0, R0);
 654 | 	XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0);
 655 | 
 656 | 	return Q;
 657 | }
 658 | 
 659 | //------------------------------------------------------------------------------
 660 | 
 661 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationNormal
 662 | (
 663 | 	FXMVECTOR NormalAxis,
 664 | 	float    Angle
 665 | )
 666 | {
 667 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
 668 | 
 669 | 	XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
 670 | 
 671 | 	float SinV, CosV;
 672 | 	XMScalarSinCos(&SinV, &CosV, 0.5f * Angle);
 673 | 
 674 | 	XMVECTOR Scale = XMVectorSet(SinV, SinV, SinV, CosV);
 675 | 	return XMVectorMultiply(N, Scale);
 676 | #elif defined(_XM_SSE_INTRINSICS_)
 677 | 	XMVECTOR N = _mm_and_ps(NormalAxis, g_XMMask3);
 678 | 	N = _mm_or_ps(N, g_XMIdentityR3);
 679 | 	XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
 680 | 	XMVECTOR vSine;
 681 | 	XMVECTOR vCosine;
 682 | 	XMVectorSinCos(&vSine, &vCosine, Scale);
 683 | 	Scale = _mm_and_ps(vSine, g_XMMask3);
 684 | 	vCosine = _mm_and_ps(vCosine, g_XMMaskW);
 685 | 	Scale = _mm_or_ps(Scale, vCosine);
 686 | 	N = _mm_mul_ps(N, Scale);
 687 | 	return N;
 688 | #endif
 689 | }
 690 | 
 691 | //------------------------------------------------------------------------------
 692 | 
 693 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationAxis
 694 | (
 695 | 	FXMVECTOR Axis,
 696 | 	float    Angle
 697 | )
 698 | {
 699 | 	assert(!XMVector3Equal(Axis, XMVectorZero()));
 700 | 	assert(!XMVector3IsInfinite(Axis));
 701 | 
 702 | 	XMVECTOR Normal = XMVector3Normalize(Axis);
 703 | 	XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle);
 704 | 	return Q;
 705 | }
 706 | 
 707 | //------------------------------------------------------------------------------
 708 | 
 709 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix
 710 | (
 711 | 	FXMMATRIX M
 712 | )
 713 | {
 714 | #if defined(_XM_NO_INTRINSICS_)
 715 | 
 716 | 	XMVECTORF32 q;
 717 | 	float r22 = M.m[2][2];
 718 | 	if (r22 <= 0.f)  // x^2 + y^2 >= z^2 + w^2
 719 | 	{
 720 | 		float dif10 = M.m[1][1] - M.m[0][0];
 721 | 		float omr22 = 1.f - r22;
 722 | 		if (dif10 <= 0.f)  // x^2 >= y^2
 723 | 		{
 724 | 			float fourXSqr = omr22 - dif10;
 725 | 			float inv4x = 0.5f / sqrtf(fourXSqr);
 726 | 			q.f[0] = fourXSqr*inv4x;
 727 | 			q.f[1] = (M.m[0][1] + M.m[1][0])*inv4x;
 728 | 			q.f[2] = (M.m[0][2] + M.m[2][0])*inv4x;
 729 | 			q.f[3] = (M.m[1][2] - M.m[2][1])*inv4x;
 730 | 		}
 731 | 		else  // y^2 >= x^2
 732 | 		{
 733 | 			float fourYSqr = omr22 + dif10;
 734 | 			float inv4y = 0.5f / sqrtf(fourYSqr);
 735 | 			q.f[0] = (M.m[0][1] + M.m[1][0])*inv4y;
 736 | 			q.f[1] = fourYSqr*inv4y;
 737 | 			q.f[2] = (M.m[1][2] + M.m[2][1])*inv4y;
 738 | 			q.f[3] = (M.m[2][0] - M.m[0][2])*inv4y;
 739 | 		}
 740 | 	}
 741 | 	else  // z^2 + w^2 >= x^2 + y^2
 742 | 	{
 743 | 		float sum10 = M.m[1][1] + M.m[0][0];
 744 | 		float opr22 = 1.f + r22;
 745 | 		if (sum10 <= 0.f)  // z^2 >= w^2
 746 | 		{
 747 | 			float fourZSqr = opr22 - sum10;
 748 | 			float inv4z = 0.5f / sqrtf(fourZSqr);
 749 | 			q.f[0] = (M.m[0][2] + M.m[2][0])*inv4z;
 750 | 			q.f[1] = (M.m[1][2] + M.m[2][1])*inv4z;
 751 | 			q.f[2] = fourZSqr*inv4z;
 752 | 			q.f[3] = (M.m[0][1] - M.m[1][0])*inv4z;
 753 | 		}
 754 | 		else  // w^2 >= z^2
 755 | 		{
 756 | 			float fourWSqr = opr22 + sum10;
 757 | 			float inv4w = 0.5f / sqrtf(fourWSqr);
 758 | 			q.f[0] = (M.m[1][2] - M.m[2][1])*inv4w;
 759 | 			q.f[1] = (M.m[2][0] - M.m[0][2])*inv4w;
 760 | 			q.f[2] = (M.m[0][1] - M.m[1][0])*inv4w;
 761 | 			q.f[3] = fourWSqr*inv4w;
 762 | 		}
 763 | 	}
 764 | 	return q.v;
 765 | 
 766 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
 767 | 	static const XMVECTORF32 XMPMMP = { +1.0f, -1.0f, -1.0f, +1.0f };
 768 | 	static const XMVECTORF32 XMMPMP = { -1.0f, +1.0f, -1.0f, +1.0f };
 769 | 	static const XMVECTORF32 XMMMPP = { -1.0f, -1.0f, +1.0f, +1.0f };
 770 | 	static const XMVECTORU32 Select0110 = { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 };
 771 | 	static const XMVECTORU32 Select0010 = { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 };
 772 | 
 773 | 	XMVECTOR r0 = M.c[0];
 774 | 	XMVECTOR r1 = M.c[1];
 775 | 	XMVECTOR r2 = M.c[2];
 776 | 
 777 | 	XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0);
 778 | 	XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1);
 779 | 	XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0);
 780 | 
 781 | 	// x^2 >= y^2 equivalent to r11 - r00 <= 0
 782 | 	XMVECTOR r11mr00 = vsubq_f32(r11, r00);
 783 | 	XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero);
 784 | 
 785 | 	// z^2 >= w^2 equivalent to r11 + r00 <= 0
 786 | 	XMVECTOR r11pr00 = vaddq_f32(r11, r00);
 787 | 	XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero);
 788 | 
 789 | 	// x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
 790 | 	XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero);
 791 | 
 792 | 	// (4*x^2, 4*y^2, 4*z^2, 4*w^2)
 793 | 	XMVECTOR t0 = vmulq_f32(XMPMMP, r00);
 794 | 	XMVECTOR x2y2z2w2 = vmlaq_f32(t0, XMMPMP, r11);
 795 | 	x2y2z2w2 = vmlaq_f32(x2y2z2w2, XMMMPP, r22);
 796 | 	x2y2z2w2 = vaddq_f32(x2y2z2w2, g_XMOne);
 797 | 
 798 | 	// (r01, r02, r12, r11)
 799 | 	t0 = vextq_f32(r0, r0, 1);
 800 | 	XMVECTOR t1 = vextq_f32(r1, r1, 1);
 801 | 	t0 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_low_f32(t1)));
 802 | 
 803 | 	// (r10, r20, r21, r10)
 804 | 	t1 = vextq_f32(r2, r2, 3);
 805 | 	XMVECTOR r10 = vdupq_lane_f32(vget_low_f32(r1), 0);
 806 | 	t1 = vbslq_f32(Select0110, t1, r10);
 807 | 
 808 | 	// (4*x*y, 4*x*z, 4*y*z, unused)
 809 | 	XMVECTOR xyxzyz = vaddq_f32(t0, t1);
 810 | 
 811 | 	// (r21, r20, r10, r10)
 812 | 	t0 = vcombine_f32(vrev64_f32(vget_low_f32(r2)), vget_low_f32(r10));
 813 | 
 814 | 	// (r12, r02, r01, r12)
 815 | 	XMVECTOR t2 = vcombine_f32(vrev64_f32(vget_high_f32(r0)), vrev64_f32(vget_low_f32(r0)));
 816 | 	XMVECTOR t3 = vdupq_lane_f32(vget_high_f32(r1), 0);
 817 | 	t1 = vbslq_f32(Select0110, t2, t3);
 818 | 
 819 | 	// (4*x*w, 4*y*w, 4*z*w, unused)
 820 | 	XMVECTOR xwywzw = vsubq_f32(t0, t1);
 821 | 	xwywzw = vmulq_f32(XMMPMP, xwywzw);
 822 | 
 823 | 	// (4*x*x, 4*x*y, 4*x*z, 4*x*w)
 824 | 	t0 = vextq_f32(xyxzyz, xyxzyz, 3);
 825 | 	t1 = vbslq_f32(Select0110, t0, x2y2z2w2);
 826 | 	t2 = vdupq_lane_f32(vget_low_f32(xwywzw), 0);
 827 | 	XMVECTOR tensor0 = vbslq_f32(g_XMSelect1110, t1, t2);
 828 | 
 829 | 	// (4*y*x, 4*y*y, 4*y*z, 4*y*w)
 830 | 	t0 = vbslq_f32(g_XMSelect1011, xyxzyz, x2y2z2w2);
 831 | 	t1 = vdupq_lane_f32(vget_low_f32(xwywzw), 1);
 832 | 	XMVECTOR tensor1 = vbslq_f32(g_XMSelect1110, t0, t1);
 833 | 
 834 | 	// (4*z*x, 4*z*y, 4*z*z, 4*z*w)
 835 | 	t0 = vextq_f32(xyxzyz, xyxzyz, 1);
 836 | 	t1 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_high_f32(xwywzw)));
 837 | 	XMVECTOR tensor2 = vbslq_f32(Select0010, x2y2z2w2, t1);
 838 | 
 839 | 	// (4*w*x, 4*w*y, 4*w*z, 4*w*w)
 840 | 	XMVECTOR tensor3 = vbslq_f32(g_XMSelect1110, xwywzw, x2y2z2w2);
 841 | 
 842 | 	// Select the row of the tensor-product matrix that has the largest
 843 | 	// magnitude.
 844 | 	t0 = vbslq_f32(x2gey2, tensor0, tensor1);
 845 | 	t1 = vbslq_f32(z2gew2, tensor2, tensor3);
 846 | 	t2 = vbslq_f32(x2py2gez2pw2, t0, t1);
 847 | 
 848 | 	// Normalize the row.  No division by zero is possible because the
 849 | 	// quaternion is unit-length (and the row is a nonzero multiple of
 850 | 	// the quaternion).
 851 | 	t0 = XMVector4Length(t2);
 852 | 	return XMVectorDivide(t2, t0);
 853 | #elif defined(_XM_SSE_INTRINSICS_)
 854 | 	static const XMVECTORF32 XMPMMP = { +1.0f, -1.0f, -1.0f, +1.0f };
 855 | 	static const XMVECTORF32 XMMPMP = { -1.0f, +1.0f, -1.0f, +1.0f };
 856 | 	static const XMVECTORF32 XMMMPP = { -1.0f, -1.0f, +1.0f, +1.0f };
 857 | 
 858 | 	XMVECTOR r0 = M.c[0];  // (r00, r01, r02, 0)
 859 | 	XMVECTOR r1 = M.c[1];  // (r10, r11, r12, 0)
 860 | 	XMVECTOR r2 = M.c[2];  // (r20, r21, r22, 0)
 861 | 
 862 | 						   // (r00, r00, r00, r00)
 863 | 	XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0, 0, 0, 0));
 864 | 	// (r11, r11, r11, r11)
 865 | 	XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1, 1, 1, 1));
 866 | 	// (r22, r22, r22, r22)
 867 | 	XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2, 2, 2, 2));
 868 | 
 869 | 	// x^2 >= y^2 equivalent to r11 - r00 <= 0
 870 | 	// (r11 - r00, r11 - r00, r11 - r00, r11 - r00)
 871 | 	XMVECTOR r11mr00 = _mm_sub_ps(r11, r00);
 872 | 	XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero);
 873 | 
 874 | 	// z^2 >= w^2 equivalent to r11 + r00 <= 0
 875 | 	// (r11 + r00, r11 + r00, r11 + r00, r11 + r00)
 876 | 	XMVECTOR r11pr00 = _mm_add_ps(r11, r00);
 877 | 	XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero);
 878 | 
 879 | 	// x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
 880 | 	XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero);
 881 | 
 882 | 	// (+r00, -r00, -r00, +r00)
 883 | 	XMVECTOR t0 = _mm_mul_ps(XMPMMP, r00);
 884 | 
 885 | 	// (-r11, +r11, -r11, +r11)
 886 | 	XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11);
 887 | 
 888 | 	// (-r22, -r22, +r22, +r22)
 889 | 	XMVECTOR t2 = _mm_mul_ps(XMMMPP, r22);
 890 | 
 891 | 	// (4*x^2, 4*y^2, 4*z^2, 4*w^2)
 892 | 	XMVECTOR x2y2z2w2 = _mm_add_ps(t0, t1);
 893 | 	x2y2z2w2 = _mm_add_ps(t2, x2y2z2w2);
 894 | 	x2y2z2w2 = _mm_add_ps(x2y2z2w2, g_XMOne);
 895 | 
 896 | 	// (r01, r02, r12, r11)
 897 | 	t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1, 2, 2, 1));
 898 | 	// (r10, r10, r20, r21)
 899 | 	t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1, 0, 0, 0));
 900 | 	// (r10, r20, r21, r10)
 901 | 	t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0));
 902 | 	// (4*x*y, 4*x*z, 4*y*z, unused)
 903 | 	XMVECTOR xyxzyz = _mm_add_ps(t0, t1);
 904 | 
 905 | 	// (r21, r20, r10, r10)
 906 | 	t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 1));
 907 | 	// (r12, r12, r02, r01)
 908 | 	t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1, 2, 2, 2));
 909 | 	// (r12, r02, r01, r12)
 910 | 	t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0));
 911 | 	// (4*x*w, 4*y*w, 4*z*w, unused)
 912 | 	XMVECTOR xwywzw = _mm_sub_ps(t0, t1);
 913 | 	xwywzw = _mm_mul_ps(XMMPMP, xwywzw);
 914 | 
 915 | 	// (4*x^2, 4*y^2, 4*x*y, unused)
 916 | 	t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0, 0, 1, 0));
 917 | 	// (4*z^2, 4*w^2, 4*z*w, unused)
 918 | 	t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0, 2, 3, 2));
 919 | 	// (4*x*z, 4*y*z, 4*x*w, 4*y*w)
 920 | 	t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1, 0, 2, 1));
 921 | 
 922 | 	// (4*x*x, 4*x*y, 4*x*z, 4*x*w)
 923 | 	XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2, 0, 2, 0));
 924 | 	// (4*y*x, 4*y*y, 4*y*z, 4*y*w)
 925 | 	XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 1, 1, 2));
 926 | 	// (4*z*x, 4*z*y, 4*z*z, 4*z*w)
 927 | 	XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2, 0, 1, 0));
 928 | 	// (4*w*x, 4*w*y, 4*w*z, 4*w*w)
 929 | 	XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1, 2, 3, 2));
 930 | 
 931 | 	// Select the row of the tensor-product matrix that has the largest
 932 | 	// magnitude.
 933 | 	t0 = _mm_and_ps(x2gey2, tensor0);
 934 | 	t1 = _mm_andnot_ps(x2gey2, tensor1);
 935 | 	t0 = _mm_or_ps(t0, t1);
 936 | 	t1 = _mm_and_ps(z2gew2, tensor2);
 937 | 	t2 = _mm_andnot_ps(z2gew2, tensor3);
 938 | 	t1 = _mm_or_ps(t1, t2);
 939 | 	t0 = _mm_and_ps(x2py2gez2pw2, t0);
 940 | 	t1 = _mm_andnot_ps(x2py2gez2pw2, t1);
 941 | 	t2 = _mm_or_ps(t0, t1);
 942 | 
 943 | 	// Normalize the row.  No division by zero is possible because the
 944 | 	// quaternion is unit-length (and the row is a nonzero multiple of
 945 | 	// the quaternion).
 946 | 	t0 = XMVector4Length(t2);
 947 | 	return _mm_div_ps(t2, t0);
 948 | #endif
 949 | }
 950 | 
 951 | //------------------------------------------------------------------------------
 952 | // Conversion operations
 953 | //------------------------------------------------------------------------------
 954 | 
 955 | //------------------------------------------------------------------------------
 956 | _Use_decl_annotations_
 957 | inline void XM_CALLCONV XMQuaternionToAxisAngle
 958 | (
 959 | 	XMVECTOR* pAxis,
 960 | 	float*    pAngle,
 961 | 	FXMVECTOR  Q
 962 | )
 963 | {
 964 | 	assert(pAxis);
 965 | 	assert(pAngle);
 966 | 
 967 | 	*pAxis = Q;
 968 | 
 969 | 	*pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
 970 | }
 971 | 
 972 | /****************************************************************************
 973 | *
 974 | * Plane
 975 | *
 976 | ****************************************************************************/
 977 | 
 978 | //------------------------------------------------------------------------------
 979 | // Comparison operations
 980 | //------------------------------------------------------------------------------
 981 | 
 982 | //------------------------------------------------------------------------------
 983 | 
 984 | inline bool XM_CALLCONV XMPlaneEqual
 985 | (
 986 | 	FXMVECTOR P1,
 987 | 	FXMVECTOR P2
 988 | )
 989 | {
 990 | 	return XMVector4Equal(P1, P2);
 991 | }
 992 | 
 993 | //------------------------------------------------------------------------------
 994 | 
 995 | inline bool XM_CALLCONV XMPlaneNearEqual
 996 | (
 997 | 	FXMVECTOR P1,
 998 | 	FXMVECTOR P2,
 999 | 	FXMVECTOR Epsilon
1000 | )
1001 | {
1002 | 	XMVECTOR NP1 = XMPlaneNormalize(P1);
1003 | 	XMVECTOR NP2 = XMPlaneNormalize(P2);
1004 | 	return XMVector4NearEqual(NP1, NP2, Epsilon);
1005 | }
1006 | 
1007 | //------------------------------------------------------------------------------
1008 | 
1009 | inline bool XM_CALLCONV XMPlaneNotEqual
1010 | (
1011 | 	FXMVECTOR P1,
1012 | 	FXMVECTOR P2
1013 | )
1014 | {
1015 | 	return XMVector4NotEqual(P1, P2);
1016 | }
1017 | 
1018 | //------------------------------------------------------------------------------
1019 | 
1020 | inline bool XM_CALLCONV XMPlaneIsNaN
1021 | (
1022 | 	FXMVECTOR P
1023 | )
1024 | {
1025 | 	return XMVector4IsNaN(P);
1026 | }
1027 | 
1028 | //------------------------------------------------------------------------------
1029 | 
1030 | inline bool XM_CALLCONV XMPlaneIsInfinite
1031 | (
1032 | 	FXMVECTOR P
1033 | )
1034 | {
1035 | 	return XMVector4IsInfinite(P);
1036 | }
1037 | 
1038 | //------------------------------------------------------------------------------
1039 | // Computation operations
1040 | //------------------------------------------------------------------------------
1041 | 
1042 | //------------------------------------------------------------------------------
1043 | 
1044 | inline XMVECTOR XM_CALLCONV XMPlaneDot
1045 | (
1046 | 	FXMVECTOR P,
1047 | 	FXMVECTOR V
1048 | )
1049 | {
1050 | 	return XMVector4Dot(P, V);
1051 | }
1052 | 
1053 | //------------------------------------------------------------------------------
1054 | 
1055 | inline XMVECTOR XM_CALLCONV XMPlaneDotCoord
1056 | (
1057 | 	FXMVECTOR P,
1058 | 	FXMVECTOR V
1059 | )
1060 | {
1061 | 	// Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
1062 | 
1063 | 	XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
1064 | 	XMVECTOR Result = XMVector4Dot(P, V3);
1065 | 	return Result;
1066 | }
1067 | 
1068 | //------------------------------------------------------------------------------
1069 | 
1070 | inline XMVECTOR XM_CALLCONV XMPlaneDotNormal
1071 | (
1072 | 	FXMVECTOR P,
1073 | 	FXMVECTOR V
1074 | )
1075 | {
1076 | 	return XMVector3Dot(P, V);
1077 | }
1078 | 
1079 | //------------------------------------------------------------------------------
1080 | // XMPlaneNormalizeEst uses a reciprocal estimate and
1081 | // returns QNaN on zero and infinite vectors.
1082 | 
1083 | inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst
1084 | (
1085 | 	FXMVECTOR P
1086 | )
1087 | {
1088 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1089 | 
1090 | 	XMVECTOR Result = XMVector3ReciprocalLengthEst(P);
1091 | 	return XMVectorMultiply(P, Result);
1092 | 
1093 | #elif defined(_XM_SSE4_INTRINSICS_)
1094 | 	XMVECTOR vTemp = _mm_dp_ps(P, P, 0x7f);
1095 | 	XMVECTOR vResult = _mm_rsqrt_ps(vTemp);
1096 | 	return _mm_mul_ps(vResult, P);
1097 | #elif defined(_XM_SSE_INTRINSICS_)
1098 | 	// Perform the dot product
1099 | 	XMVECTOR vDot = _mm_mul_ps(P, P);
1100 | 	// x=Dot.y, y=Dot.z
1101 | 	XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1));
1102 | 	// Result.x = x+y
1103 | 	vDot = _mm_add_ss(vDot, vTemp);
1104 | 	// x=Dot.z
1105 | 	vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1));
1106 | 	// Result.x = (x+y)+z
1107 | 	vDot = _mm_add_ss(vDot, vTemp);
1108 | 	// Splat x
1109 | 	vDot = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0));
1110 | 	// Get the reciprocal
1111 | 	vDot = _mm_rsqrt_ps(vDot);
1112 | 	// Get the reciprocal
1113 | 	vDot = _mm_mul_ps(vDot, P);
1114 | 	return vDot;
1115 | #endif
1116 | }
1117 | 
1118 | //------------------------------------------------------------------------------
1119 | 
1120 | inline XMVECTOR XM_CALLCONV XMPlaneNormalize
1121 | (
1122 | 	FXMVECTOR P
1123 | )
1124 | {
1125 | #if defined(_XM_NO_INTRINSICS_)
1126 | 	float fLengthSq = sqrtf((P.vector4_f32[0] * P.vector4_f32[0]) + (P.vector4_f32[1] * P.vector4_f32[1]) + (P.vector4_f32[2] * P.vector4_f32[2]));
1127 | 	// Prevent divide by zero
1128 | 	if (fLengthSq) {
1129 | 		fLengthSq = 1.0f / fLengthSq;
1130 | 	}
1131 | 	{
1132 | 		XMVECTOR vResult = {
1133 | 			P.vector4_f32[0] * fLengthSq,
1134 | 			P.vector4_f32[1] * fLengthSq,
1135 | 			P.vector4_f32[2] * fLengthSq,
1136 | 			P.vector4_f32[3] * fLengthSq
1137 | 		};
1138 | 		return vResult;
1139 | 	}
1140 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1141 | 	XMVECTOR vLength = XMVector3ReciprocalLength(P);
1142 | 	return XMVectorMultiply(P, vLength);
1143 | #elif defined(_XM_SSE4_INTRINSICS_)
1144 | 	XMVECTOR vLengthSq = _mm_dp_ps(P, P, 0x7f);
1145 | 	// Prepare for the division
1146 | 	XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
1147 | 	// Failsafe on zero (Or epsilon) length planes
1148 | 	// If the length is infinity, set the elements to zero
1149 | 	vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity);
1150 | 	// Reciprocal mul to perform the normalization
1151 | 	vResult = _mm_div_ps(P, vResult);
1152 | 	// Any that are infinity, set to zero
1153 | 	vResult = _mm_and_ps(vResult, vLengthSq);
1154 | 	return vResult;
1155 | #elif defined(_XM_SSE_INTRINSICS_)
1156 | 	// Perform the dot product on x,y and z only
1157 | 	XMVECTOR vLengthSq = _mm_mul_ps(P, P);
1158 | 	XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 1, 2, 1));
1159 | 	vLengthSq = _mm_add_ss(vLengthSq, vTemp);
1160 | 	vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1));
1161 | 	vLengthSq = _mm_add_ss(vLengthSq, vTemp);
1162 | 	vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0));
1163 | 	// Prepare for the division
1164 | 	XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
1165 | 	// Failsafe on zero (Or epsilon) length planes
1166 | 	// If the length is infinity, set the elements to zero
1167 | 	vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity);
1168 | 	// Reciprocal mul to perform the normalization
1169 | 	vResult = _mm_div_ps(P, vResult);
1170 | 	// Any that are infinity, set to zero
1171 | 	vResult = _mm_and_ps(vResult, vLengthSq);
1172 | 	return vResult;
1173 | #endif
1174 | }
1175 | 
1176 | //------------------------------------------------------------------------------
1177 | 
1178 | inline XMVECTOR XM_CALLCONV XMPlaneIntersectLine
1179 | (
1180 | 	FXMVECTOR P,
1181 | 	FXMVECTOR LinePoint1,
1182 | 	FXMVECTOR LinePoint2
1183 | )
1184 | {
1185 | 	XMVECTOR V1 = XMVector3Dot(P, LinePoint1);
1186 | 	XMVECTOR V2 = XMVector3Dot(P, LinePoint2);
1187 | 	XMVECTOR D = XMVectorSubtract(V1, V2);
1188 | 
1189 | 	XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1);
1190 | 	VT = XMVectorDivide(VT, D);
1191 | 
1192 | 	XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1);
1193 | 	Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
1194 | 
1195 | 	const XMVECTOR Zero = XMVectorZero();
1196 | 	XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
1197 | 
1198 | 	return XMVectorSelect(Point, g_XMQNaN.v, Control);
1199 | }
1200 | 
1201 | //------------------------------------------------------------------------------
1202 | _Use_decl_annotations_
1203 | inline void XM_CALLCONV XMPlaneIntersectPlane
1204 | (
1205 | 	XMVECTOR* pLinePoint1,
1206 | 	XMVECTOR* pLinePoint2,
1207 | 	FXMVECTOR  P1,
1208 | 	FXMVECTOR  P2
1209 | )
1210 | {
1211 | 	assert(pLinePoint1);
1212 | 	assert(pLinePoint2);
1213 | 
1214 | 	XMVECTOR V1 = XMVector3Cross(P2, P1);
1215 | 
1216 | 	XMVECTOR LengthSq = XMVector3LengthSq(V1);
1217 | 
1218 | 	XMVECTOR V2 = XMVector3Cross(P2, V1);
1219 | 
1220 | 	XMVECTOR P1W = XMVectorSplatW(P1);
1221 | 	XMVECTOR Point = XMVectorMultiply(V2, P1W);
1222 | 
1223 | 	XMVECTOR V3 = XMVector3Cross(V1, P1);
1224 | 
1225 | 	XMVECTOR P2W = XMVectorSplatW(P2);
1226 | 	Point = XMVectorMultiplyAdd(V3, P2W, Point);
1227 | 
1228 | 	XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq);
1229 | 
1230 | 	XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1);
1231 | 
1232 | 	XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
1233 | 	*pLinePoint1 = XMVectorSelect(LinePoint1, g_XMQNaN.v, Control);
1234 | 	*pLinePoint2 = XMVectorSelect(LinePoint2, g_XMQNaN.v, Control);
1235 | }
1236 | 
1237 | //------------------------------------------------------------------------------
1238 | 
1239 | inline XMVECTOR XM_CALLCONV XMPlaneTransform
1240 | (
1241 | 	FXMVECTOR P,
1242 | 	FXMMATRIX M
1243 | )
1244 | {
1245 | 	XMVECTOR W = XMVectorSplatW(P);
1246 | 	XMVECTOR Z = XMVectorSplatZ(P);
1247 | 	XMVECTOR Y = XMVectorSplatY(P);
1248 | 	XMVECTOR X = XMVectorSplatX(P);
1249 | 
1250 | 	XMVECTOR Result = XMVectorMultiply(W, M.c[3]);
1251 | 	Result = XMVectorMultiplyAdd(Z, M.c[2], Result);
1252 | 	Result = XMVectorMultiplyAdd(Y, M.c[1], Result);
1253 | 	Result = XMVectorMultiplyAdd(X, M.c[0], Result);
1254 | 	return Result;
1255 | }
1256 | 
1257 | //------------------------------------------------------------------------------
1258 | _Use_decl_annotations_
1259 | inline XMFLOAT4* XM_CALLCONV XMPlaneTransformStream
1260 | (
1261 | 	XMFLOAT4*       pOutputStream,
1262 | 	size_t          OutputStride,
1263 | 	const XMFLOAT4* pInputStream,
1264 | 	size_t          InputStride,
1265 | 	size_t          PlaneCount,
1266 | 	FXMMATRIX       M
1267 | )
1268 | {
1269 | 	return XMVector4TransformStream(pOutputStream,
1270 | 		OutputStride,
1271 | 		pInputStream,
1272 | 		InputStride,
1273 | 		PlaneCount,
1274 | 		M);
1275 | }
1276 | 
1277 | //------------------------------------------------------------------------------
1278 | // Conversion operations
1279 | //------------------------------------------------------------------------------
1280 | 
1281 | //------------------------------------------------------------------------------
1282 | 
1283 | inline XMVECTOR XM_CALLCONV XMPlaneFromPointNormal
1284 | (
1285 | 	FXMVECTOR Point,
1286 | 	FXMVECTOR Normal
1287 | )
1288 | {
1289 | 	XMVECTOR W = XMVector3Dot(Point, Normal);
1290 | 	W = XMVectorNegate(W);
1291 | 	return XMVectorSelect(W, Normal, g_XMSelect1110.v);
1292 | }
1293 | 
1294 | //------------------------------------------------------------------------------
1295 | 
1296 | inline XMVECTOR XM_CALLCONV XMPlaneFromPoints
1297 | (
1298 | 	FXMVECTOR Point1,
1299 | 	FXMVECTOR Point2,
1300 | 	FXMVECTOR Point3
1301 | )
1302 | {
1303 | 	XMVECTOR V21 = XMVectorSubtract(Point1, Point2);
1304 | 	XMVECTOR V31 = XMVectorSubtract(Point1, Point3);
1305 | 
1306 | 	XMVECTOR N = XMVector3Cross(V21, V31);
1307 | 	N = XMVector3Normalize(N);
1308 | 
1309 | 	XMVECTOR D = XMPlaneDotNormal(N, Point1);
1310 | 	D = XMVectorNegate(D);
1311 | 
1312 | 	XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v);
1313 | 
1314 | 	return Result;
1315 | }
1316 | 
1317 | /****************************************************************************
1318 | *
1319 | * Color
1320 | *
1321 | ****************************************************************************/
1322 | 
1323 | //------------------------------------------------------------------------------
1324 | // Comparison operations
1325 | //------------------------------------------------------------------------------
1326 | 
1327 | //------------------------------------------------------------------------------
1328 | 
1329 | inline bool XM_CALLCONV XMColorEqual
1330 | (
1331 | 	FXMVECTOR C1,
1332 | 	FXMVECTOR C2
1333 | )
1334 | {
1335 | 	return XMVector4Equal(C1, C2);
1336 | }
1337 | 
1338 | //------------------------------------------------------------------------------
1339 | 
1340 | inline bool XM_CALLCONV XMColorNotEqual
1341 | (
1342 | 	FXMVECTOR C1,
1343 | 	FXMVECTOR C2
1344 | )
1345 | {
1346 | 	return XMVector4NotEqual(C1, C2);
1347 | }
1348 | 
1349 | //------------------------------------------------------------------------------
1350 | 
1351 | inline bool XM_CALLCONV XMColorGreater
1352 | (
1353 | 	FXMVECTOR C1,
1354 | 	FXMVECTOR C2
1355 | )
1356 | {
1357 | 	return XMVector4Greater(C1, C2);
1358 | }
1359 | 
1360 | //------------------------------------------------------------------------------
1361 | 
1362 | inline bool XM_CALLCONV XMColorGreaterOrEqual
1363 | (
1364 | 	FXMVECTOR C1,
1365 | 	FXMVECTOR C2
1366 | )
1367 | {
1368 | 	return XMVector4GreaterOrEqual(C1, C2);
1369 | }
1370 | 
1371 | //------------------------------------------------------------------------------
1372 | 
1373 | inline bool XM_CALLCONV XMColorLess
1374 | (
1375 | 	FXMVECTOR C1,
1376 | 	FXMVECTOR C2
1377 | )
1378 | {
1379 | 	return XMVector4Less(C1, C2);
1380 | }
1381 | 
1382 | //------------------------------------------------------------------------------
1383 | 
1384 | inline bool XM_CALLCONV XMColorLessOrEqual
1385 | (
1386 | 	FXMVECTOR C1,
1387 | 	FXMVECTOR C2
1388 | )
1389 | {
1390 | 	return XMVector4LessOrEqual(C1, C2);
1391 | }
1392 | 
1393 | //------------------------------------------------------------------------------
1394 | 
1395 | inline bool XM_CALLCONV XMColorIsNaN
1396 | (
1397 | 	FXMVECTOR C
1398 | )
1399 | {
1400 | 	return XMVector4IsNaN(C);
1401 | }
1402 | 
1403 | //------------------------------------------------------------------------------
1404 | 
1405 | inline bool XM_CALLCONV XMColorIsInfinite
1406 | (
1407 | 	FXMVECTOR C
1408 | )
1409 | {
1410 | 	return XMVector4IsInfinite(C);
1411 | }
1412 | 
1413 | //------------------------------------------------------------------------------
1414 | // Computation operations
1415 | //------------------------------------------------------------------------------
1416 | 
1417 | //------------------------------------------------------------------------------
1418 | 
1419 | inline XMVECTOR XM_CALLCONV XMColorNegative
1420 | (
1421 | 	FXMVECTOR vColor
1422 | )
1423 | {
1424 | #if defined(_XM_NO_INTRINSICS_)
1425 | 	XMVECTORF32 vResult = {
1426 | 		1.0f - vColor.vector4_f32[0],
1427 | 		1.0f - vColor.vector4_f32[1],
1428 | 		1.0f - vColor.vector4_f32[2],
1429 | 		vColor.vector4_f32[3]
1430 | 	};
1431 | 	return vResult.v;
1432 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1433 | 	XMVECTOR vTemp = veorq_u32(vColor, g_XMNegate3);
1434 | 	return vaddq_f32(vTemp, g_XMOne3);
1435 | #elif defined(_XM_SSE_INTRINSICS_)
1436 | 	// Negate only x,y and z.
1437 | 	XMVECTOR vTemp = _mm_xor_ps(vColor, g_XMNegate3);
1438 | 	// Add 1,1,1,0 to -x,-y,-z,w
1439 | 	return _mm_add_ps(vTemp, g_XMOne3);
1440 | #endif
1441 | }
1442 | 
1443 | //------------------------------------------------------------------------------
1444 | 
1445 | inline XMVECTOR XM_CALLCONV XMColorModulate
1446 | (
1447 | 	FXMVECTOR C1,
1448 | 	FXMVECTOR C2
1449 | )
1450 | {
1451 | 	return XMVectorMultiply(C1, C2);
1452 | }
1453 | 
1454 | //------------------------------------------------------------------------------
1455 | 
1456 | inline XMVECTOR XM_CALLCONV XMColorAdjustSaturation
1457 | (
1458 | 	FXMVECTOR vColor,
1459 | 	float    fSaturation
1460 | )
1461 | {
1462 | 	// Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
1463 | 	// Result = (C - Luminance) * Saturation + Luminance;
1464 | 
1465 | #if defined(_XM_NO_INTRINSICS_)
1466 | 	const XMVECTORF32 gvLuminance = { 0.2125f, 0.7154f, 0.0721f, 0.0f };
1467 | 
1468 | 	float fLuminance = (vColor.vector4_f32[0] * gvLuminance.f[0]) + (vColor.vector4_f32[1] * gvLuminance.f[1]) + (vColor.vector4_f32[2] * gvLuminance.f[2]);
1469 | 	XMVECTOR vResult;
1470 | 	vResult.vector4_f32[0] = ((vColor.vector4_f32[0] - fLuminance)*fSaturation) + fLuminance;
1471 | 	vResult.vector4_f32[1] = ((vColor.vector4_f32[1] - fLuminance)*fSaturation) + fLuminance;
1472 | 	vResult.vector4_f32[2] = ((vColor.vector4_f32[2] - fLuminance)*fSaturation) + fLuminance;
1473 | 	vResult.vector4_f32[3] = vColor.vector4_f32[3];
1474 | 	return vResult;
1475 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1476 | 	static const XMVECTORF32 gvLuminance = { 0.2125f, 0.7154f, 0.0721f, 0.0f };
1477 | 	XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance);
1478 | 	XMVECTOR vResult = vsubq_f32(vColor, vLuminance);
1479 | 	vResult = vmlaq_n_f32(vLuminance, vResult, fSaturation);
1480 | 	return vbslq_f32(g_XMSelect1110, vResult, vColor);
1481 | #elif defined(_XM_SSE_INTRINSICS_)
1482 | 	static const XMVECTORF32 gvLuminance = { 0.2125f, 0.7154f, 0.0721f, 0.0f };
1483 | 	XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance);
1484 | 	// Splat fSaturation
1485 | 	XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
1486 | 	// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
1487 | 	XMVECTOR vResult = _mm_sub_ps(vColor, vLuminance);
1488 | 	vResult = _mm_mul_ps(vResult, vSaturation);
1489 | 	vResult = _mm_add_ps(vResult, vLuminance);
1490 | 	// Retain w from the source color
1491 | 	vLuminance = _mm_shuffle_ps(vResult, vColor, _MM_SHUFFLE(3, 2, 2, 2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1492 | 	vResult = _mm_shuffle_ps(vResult, vLuminance, _MM_SHUFFLE(3, 0, 1, 0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1493 | 	return vResult;
1494 | #endif
1495 | }
1496 | 
1497 | //------------------------------------------------------------------------------
1498 | 
1499 | inline XMVECTOR XM_CALLCONV XMColorAdjustContrast
1500 | (
1501 | 	FXMVECTOR vColor,
1502 | 	float    fContrast
1503 | )
1504 | {
1505 | 	// Result = (vColor - 0.5f) * fContrast + 0.5f;
1506 | 
1507 | #if defined(_XM_NO_INTRINSICS_)
1508 | 	XMVECTORF32 vResult = {
1509 | 		((vColor.vector4_f32[0] - 0.5f) * fContrast) + 0.5f,
1510 | 		((vColor.vector4_f32[1] - 0.5f) * fContrast) + 0.5f,
1511 | 		((vColor.vector4_f32[2] - 0.5f) * fContrast) + 0.5f,
1512 | 		vColor.vector4_f32[3]        // Leave W untouched
1513 | 	};
1514 | 	return vResult.v;
1515 | #elif defined(_XM_ARM_NEON_INTRINSICS_)
1516 | 	XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v);
1517 | 	vResult = vmlaq_n_f32(g_XMOneHalf.v, vResult, fContrast);
1518 | 	return vbslq_f32(g_XMSelect1110, vResult, vColor);
1519 | #elif defined(_XM_SSE_INTRINSICS_)
1520 | 	XMVECTOR vScale = _mm_set_ps1(fContrast);           // Splat the scale
1521 | 	XMVECTOR vResult = _mm_sub_ps(vColor, g_XMOneHalf);  // Subtract 0.5f from the source (Saving source)
1522 | 	vResult = _mm_mul_ps(vResult, vScale);               // Mul by scale
1523 | 	vResult = _mm_add_ps(vResult, g_XMOneHalf);          // Add 0.5f
1524 | 														 // Retain w from the source color
1525 | 	vScale = _mm_shuffle_ps(vResult, vColor, _MM_SHUFFLE(3, 2, 2, 2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1526 | 	vResult = _mm_shuffle_ps(vResult, vScale, _MM_SHUFFLE(3, 0, 1, 0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1527 | 	return vResult;
1528 | #endif
1529 | }
1530 | 
1531 | //------------------------------------------------------------------------------
1532 | 
1533 | inline XMVECTOR XM_CALLCONV XMColorRGBToHSL(FXMVECTOR rgb)
1534 | {
1535 | 	XMVECTOR r = XMVectorSplatX(rgb);
1536 | 	XMVECTOR g = XMVectorSplatY(rgb);
1537 | 	XMVECTOR b = XMVectorSplatZ(rgb);
1538 | 
1539 | 	XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b));
1540 | 	XMVECTOR max = XMVectorMax(r, XMVectorMax(g, b));
1541 | 
1542 | 	XMVECTOR l = XMVectorMultiply(XMVectorAdd(min, max), g_XMOneHalf);
1543 | 
1544 | 	XMVECTOR d = XMVectorSubtract(max, min);
1545 | 
1546 | 	XMVECTOR la = XMVectorSelect(rgb, l, g_XMSelect1110);
1547 | 
1548 | 	if (XMVector3Less(d, g_XMEpsilon))
1549 | 	{
1550 | 		// Achromatic, assume H and S of 0
1551 | 		return XMVectorSelect(la, g_XMZero, g_XMSelect1100);
1552 | 	}
1553 | 	else
1554 | 	{
1555 | 		XMVECTOR s, h;
1556 | 
1557 | 		XMVECTOR d2 = XMVectorAdd(min, max);
1558 | 
1559 | 		if (XMVector3Greater(l, g_XMOneHalf))
1560 | 		{
1561 | 			// d / (2-max-min)
1562 | 			s = XMVectorDivide(d, XMVectorSubtract(g_XMTwo, d2));
1563 | 		}
1564 | 		else
1565 | 		{
1566 | 			// d / (max+min)
1567 | 			s = XMVectorDivide(d, d2);
1568 | 		}
1569 | 
1570 | 		if (XMVector3Equal(r, max))
1571 | 		{
1572 | 			// Red is max
1573 | 			h = XMVectorDivide(XMVectorSubtract(g, b), d);
1574 | 		}
1575 | 		else if (XMVector3Equal(g, max))
1576 | 		{
1577 | 			// Green is max
1578 | 			h = XMVectorDivide(XMVectorSubtract(b, r), d);
1579 | 			h = XMVectorAdd(h, g_XMTwo);
1580 | 		}
1581 | 		else
1582 | 		{
1583 | 			// Blue is max
1584 | 			h = XMVectorDivide(XMVectorSubtract(r, g), d);
1585 | 			h = XMVectorAdd(h, g_XMFour);
1586 | 		}
1587 | 
1588 | 		h = XMVectorDivide(h, g_XMSix);
1589 | 
1590 | 		if (XMVector3Less(h, g_XMZero))
1591 | 			h = XMVectorAdd(h, g_XMOne);
1592 | 
1593 | 		XMVECTOR lha = XMVectorSelect(la, h, g_XMSelect1100);
1594 | 		return XMVectorSelect(s, lha, g_XMSelect1011);
1595 | 	}
1596 | }
1597 | 
1598 | //------------------------------------------------------------------------------
1599 | 
1600 | namespace Internal
1601 | {
1602 | 
1603 | 	inline XMVECTOR XM_CALLCONV XMColorHue2Clr(FXMVECTOR p, FXMVECTOR q, FXMVECTOR h)
1604 | 	{
1605 | 		static const XMVECTORF32 oneSixth = { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f };
1606 | 		static const XMVECTORF32 twoThirds = { 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f };
1607 | 
1608 | 		XMVECTOR t = h;
1609 | 
1610 | 		if (XMVector3Less(t, g_XMZero))
1611 | 			t = XMVectorAdd(t, g_XMOne);
1612 | 
1613 | 		if (XMVector3Greater(t, g_XMOne))
1614 | 			t = XMVectorSubtract(t, g_XMOne);
1615 | 
1616 | 		if (XMVector3Less(t, oneSixth))
1617 | 		{
1618 | 			// p + (q - p) * 6 * t
1619 | 			XMVECTOR t1 = XMVectorSubtract(q, p);
1620 | 			XMVECTOR t2 = XMVectorMultiply(g_XMSix, t);
1621 | 			return XMVectorMultiplyAdd(t1, t2, p);
1622 | 		}
1623 | 
1624 | 		if (XMVector3Less(t, g_XMOneHalf))
1625 | 			return q;
1626 | 
1627 | 		if (XMVector3Less(t, twoThirds))
1628 | 		{
1629 | 			// p + (q - p) * 6 * (2/3 - t)
1630 | 			XMVECTOR t1 = XMVectorSubtract(q, p);
1631 | 			XMVECTOR t2 = XMVectorMultiply(g_XMSix, XMVectorSubtract(twoThirds, t));
1632 | 			return XMVectorMultiplyAdd(t1, t2, p);
1633 | 		}
1634 | 
1635 | 		return p;
1636 | 	}
1637 | 
1638 | }; // namespace Internal
1639 | 
1640 | inline XMVECTOR XM_CALLCONV XMColorHSLToRGB(FXMVECTOR hsl)
1641 | {
1642 | 	static const XMVECTORF32 oneThird = { 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f };
1643 | 
1644 | 	XMVECTOR s = XMVectorSplatY(hsl);
1645 | 	XMVECTOR l = XMVectorSplatZ(hsl);
1646 | 
1647 | 	if (XMVector3NearEqual(s, g_XMZero, g_XMEpsilon))
1648 | 	{
1649 | 		// Achromatic
1650 | 		return XMVectorSelect(hsl, l, g_XMSelect1110);
1651 | 	}
1652 | 	else
1653 | 	{
1654 | 		XMVECTOR h = XMVectorSplatX(hsl);
1655 | 
1656 | 		XMVECTOR q;
1657 | 		if (XMVector3Less(l, g_XMOneHalf))
1658 | 		{
1659 | 			q = XMVectorMultiply(l, XMVectorAdd(g_XMOne, s));
1660 | 		}
1661 | 		else
1662 | 		{
1663 | 			q = XMVectorSubtract(XMVectorAdd(l, s), XMVectorMultiply(l, s));
1664 | 		}
1665 | 
1666 | 		XMVECTOR p = XMVectorSubtract(XMVectorMultiply(g_XMTwo, l), q);
1667 | 
1668 | 		XMVECTOR r = Internal::XMColorHue2Clr(p, q, XMVectorAdd(h, oneThird));
1669 | 		XMVECTOR g = Internal::XMColorHue2Clr(p, q, h);
1670 | 		XMVECTOR b = Internal::XMColorHue2Clr(p, q, XMVectorSubtract(h, oneThird));
1671 | 
1672 | 		XMVECTOR rg = XMVectorSelect(g, r, g_XMSelect1000);
1673 | 		XMVECTOR ba = XMVectorSelect(hsl, b, g_XMSelect1110);
1674 | 
1675 | 		return XMVectorSelect(ba, rg, g_XMSelect1100);
1676 | 	}
1677 | }
1678 | 
1679 | //------------------------------------------------------------------------------
1680 | 
1681 | inline XMVECTOR XM_CALLCONV XMColorRGBToHSV(FXMVECTOR rgb)
1682 | {
1683 | 	XMVECTOR r = XMVectorSplatX(rgb);
1684 | 	XMVECTOR g = XMVectorSplatY(rgb);
1685 | 	XMVECTOR b = XMVectorSplatZ(rgb);
1686 | 
1687 | 	XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b));
1688 | 	XMVECTOR v = XMVectorMax(r, XMVectorMax(g, b));
1689 | 
1690 | 	XMVECTOR d = XMVectorSubtract(v, min);
1691 | 
1692 | 	XMVECTOR s = (XMVector3NearEqual(v, g_XMZero, g_XMEpsilon)) ? g_XMZero : XMVectorDivide(d, v);
1693 | 
1694 | 	if (XMVector3Less(d, g_XMEpsilon))
1695 | 	{
1696 | 		// Achromatic, assume H of 0
1697 | 		XMVECTOR hv = XMVectorSelect(v, g_XMZero, g_XMSelect1000);
1698 | 		XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110);
1699 | 		return XMVectorSelect(s, hva, g_XMSelect1011);
1700 | 	}
1701 | 	else
1702 | 	{
1703 | 		XMVECTOR h;
1704 | 
1705 | 		if (XMVector3Equal(r, v))
1706 | 		{
1707 | 			// Red is max
1708 | 			h = XMVectorDivide(XMVectorSubtract(g, b), d);
1709 | 
1710 | 			if (XMVector3Less(g, b))
1711 | 				h = XMVectorAdd(h, g_XMSix);
1712 | 		}
1713 | 		else if (XMVector3Equal(g, v))
1714 | 		{
1715 | 			// Green is max
1716 | 			h = XMVectorDivide(XMVectorSubtract(b, r), d);
1717 | 			h = XMVectorAdd(h, g_XMTwo);
1718 | 		}
1719 | 		else
1720 | 		{
1721 | 			// Blue is max
1722 | 			h = XMVectorDivide(XMVectorSubtract(r, g), d);
1723 | 			h = XMVectorAdd(h, g_XMFour);
1724 | 		}
1725 | 
1726 | 		h = XMVectorDivide(h, g_XMSix);
1727 | 
1728 | 		XMVECTOR hv = XMVectorSelect(v, h, g_XMSelect1000);
1729 | 		XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110);
1730 | 		return XMVectorSelect(s, hva, g_XMSelect1011);
1731 | 	}
1732 | }
1733 | 
1734 | //------------------------------------------------------------------------------
1735 | 
1736 | inline XMVECTOR XM_CALLCONV XMColorHSVToRGB(FXMVECTOR hsv)
1737 | {
1738 | 	XMVECTOR h = XMVectorSplatX(hsv);
1739 | 	XMVECTOR s = XMVectorSplatY(hsv);
1740 | 	XMVECTOR v = XMVectorSplatZ(hsv);
1741 | 
1742 | 	XMVECTOR h6 = XMVectorMultiply(h, g_XMSix);
1743 | 
1744 | 	XMVECTOR i = XMVectorFloor(h6);
1745 | 	XMVECTOR f = XMVectorSubtract(h6, i);
1746 | 
1747 | 	// p = v* (1-s)
1748 | 	XMVECTOR p = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, s));
1749 | 
1750 | 	// q = v*(1-f*s)
1751 | 	XMVECTOR q = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, XMVectorMultiply(f, s)));
1752 | 
1753 | 	// t = v*(1 - (1-f)*s)
1754 | 	XMVECTOR t = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, XMVectorMultiply(XMVectorSubtract(g_XMOne, f), s)));
1755 | 
1756 | 	int ii = static_cast<int>(XMVectorGetX(XMVectorMod(i, g_XMSix)));
1757 | 
1758 | 	XMVECTOR _rgb;
1759 | 
1760 | 	switch (ii)
1761 | 	{
1762 | 	case 0: // rgb = vtp
1763 | 	{
1764 | 		XMVECTOR vt = XMVectorSelect(t, v, g_XMSelect1000);
1765 | 		_rgb = XMVectorSelect(p, vt, g_XMSelect1100);
1766 | 	}
1767 | 	break;
1768 | 	case 1: // rgb = qvp
1769 | 	{
1770 | 		XMVECTOR qv = XMVectorSelect(v, q, g_XMSelect1000);
1771 | 		_rgb = XMVectorSelect(p, qv, g_XMSelect1100);
1772 | 	}
1773 | 	break;
1774 | 	case 2: // rgb = pvt
1775 | 	{
1776 | 		XMVECTOR pv = XMVectorSelect(v, p, g_XMSelect1000);
1777 | 		_rgb = XMVectorSelect(t, pv, g_XMSelect1100);
1778 | 	}
1779 | 	break;
1780 | 	case 3: // rgb = pqv
1781 | 	{
1782 | 		XMVECTOR pq = XMVectorSelect(q, p, g_XMSelect1000);
1783 | 		_rgb = XMVectorSelect(v, pq, g_XMSelect1100);
1784 | 	}
1785 | 	break;
1786 | 	case 4: // rgb = tpv
1787 | 	{
1788 | 		XMVECTOR tp = XMVectorSelect(p, t, g_XMSelect1000);
1789 | 		_rgb = XMVectorSelect(v, tp, g_XMSelect1100);
1790 | 	}
1791 | 	break;
1792 | 	default: // rgb = vpq
1793 | 	{
1794 | 		XMVECTOR vp = XMVectorSelect(p, v, g_XMSelect1000);
1795 | 		_rgb = XMVectorSelect(q, vp, g_XMSelect1100);
1796 | 	}
1797 | 	break;
1798 | 	}
1799 | 
1800 | 	return XMVectorSelect(hsv, _rgb, g_XMSelect1110);
1801 | }
1802 | 
1803 | //------------------------------------------------------------------------------
1804 | 
1805 | inline XMVECTOR XM_CALLCONV XMColorRGBToYUV(FXMVECTOR rgb)
1806 | {
1807 | 	static const XMVECTORF32 Scale0 = { 0.299f, -0.147f,  0.615f, 0.0f };
1808 | 	static const XMVECTORF32 Scale1 = { 0.587f, -0.289f, -0.515f, 0.0f };
1809 | 	static const XMVECTORF32 Scale2 = { 0.114f,  0.436f, -0.100f, 0.0f };
1810 | 
1811 | 	XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
1812 | 	XMVECTOR clr = XMVector3Transform(rgb, M);
1813 | 
1814 | 	return XMVectorSelect(rgb, clr, g_XMSelect1110);
1815 | }
1816 | 
1817 | //------------------------------------------------------------------------------
1818 | 
1819 | inline XMVECTOR XM_CALLCONV XMColorYUVToRGB(FXMVECTOR yuv)
1820 | {
1821 | 	static const XMVECTORF32 Scale1 = { 0.0f, -0.395f, 2.032f, 0.0f };
1822 | 	static const XMVECTORF32 Scale2 = { 1.140f, -0.581f,   0.0f, 0.0f };
1823 | 
1824 | 	XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero);
1825 | 	XMVECTOR clr = XMVector3Transform(yuv, M);
1826 | 
1827 | 	return XMVectorSelect(yuv, clr, g_XMSelect1110);
1828 | }
1829 | 
1830 | //------------------------------------------------------------------------------
1831 | 
1832 | inline XMVECTOR XM_CALLCONV XMColorRGBToYUV_HD(FXMVECTOR rgb)
1833 | {
1834 | 	static const XMVECTORF32 Scale0 = { 0.2126f, -0.0997f,  0.6150f, 0.0f };
1835 | 	static const XMVECTORF32 Scale1 = { 0.7152f, -0.3354f, -0.5586f, 0.0f };
1836 | 	static const XMVECTORF32 Scale2 = { 0.0722f,  0.4351f, -0.0564f, 0.0f };
1837 | 
1838 | 	XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
1839 | 	XMVECTOR clr = XMVector3Transform(rgb, M);
1840 | 
1841 | 	return XMVectorSelect(rgb, clr, g_XMSelect1110);
1842 | }
1843 | 
1844 | //------------------------------------------------------------------------------
1845 | 
1846 | inline XMVECTOR XM_CALLCONV XMColorYUVToRGB_HD(FXMVECTOR yuv)
1847 | {
1848 | 	static const XMVECTORF32 Scale1 = { 0.0f, -0.2153f, 2.1324f, 0.0f };
1849 | 	static const XMVECTORF32 Scale2 = { 1.2803f, -0.3806f,    0.0f, 0.0f };
1850 | 
1851 | 	XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero);
1852 | 	XMVECTOR clr = XMVector3Transform(yuv, M);
1853 | 
1854 | 	return XMVectorSelect(yuv, clr, g_XMSelect1110);
1855 | }
1856 | 
1857 | //------------------------------------------------------------------------------
1858 | 
1859 | inline XMVECTOR XM_CALLCONV XMColorRGBToXYZ(FXMVECTOR rgb)
1860 | {
1861 | 	static const XMVECTORF32 Scale0 = { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f };
1862 | 	static const XMVECTORF32 Scale1 = { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f };
1863 | 	static const XMVECTORF32 Scale2 = { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f };
1864 | 	static const XMVECTORF32 Scale = { 1.f / 0.17697f, 1.f / 0.17697f, 1.f / 0.17697f, 0.0f };
1865 | 
1866 | 	XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
1867 | 	XMVECTOR clr = XMVectorMultiply(XMVector3Transform(rgb, M), Scale);
1868 | 
1869 | 	return XMVectorSelect(rgb, clr, g_XMSelect1110);
1870 | }
1871 | 
1872 | inline XMVECTOR XM_CALLCONV XMColorXYZToRGB(FXMVECTOR xyz)
1873 | {
1874 | 	static const XMVECTORF32 Scale0 = { 2.3706743f, -0.5138850f,  0.0052982f, 0.0f };
1875 | 	static const XMVECTORF32 Scale1 = { -0.9000405f,  1.4253036f, -0.0146949f, 0.0f };
1876 | 	static const XMVECTORF32 Scale2 = { -0.4706338f,  0.0885814f,  1.0093968f, 0.0f };
1877 | 	static const XMVECTORF32 Scale = { 0.17697f, 0.17697f, 0.17697f, 0.0f };
1878 | 
1879 | 	XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
1880 | 	XMVECTOR clr = XMVector3Transform(XMVectorMultiply(xyz, Scale), M);
1881 | 
1882 | 	return XMVectorSelect(xyz, clr, g_XMSelect1110);
1883 | }
1884 | 
1885 | //------------------------------------------------------------------------------
1886 | 
1887 | inline XMVECTOR XM_CALLCONV XMColorXYZToSRGB(FXMVECTOR xyz)
1888 | {
1889 | 	static const XMVECTORF32 Scale0 = { 3.2406f, -0.9689f,  0.0557f, 0.0f };
1890 | 	static const XMVECTORF32 Scale1 = { -1.5372f,  1.8758f, -0.2040f, 0.0f };
1891 | 	static const XMVECTORF32 Scale2 = { -0.4986f,  0.0415f,  1.0570f, 0.0f };
1892 | 	static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f };
1893 | 	static const XMVECTORF32 Exp = { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.0f };
1894 | 
1895 | 	XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
1896 | 	XMVECTOR lclr = XMVector3Transform(xyz, M);
1897 | 
1898 | 	XMVECTOR sel = XMVectorGreater(lclr, Cutoff);
1899 | 
1900 | 	// clr = 12.92 * lclr for lclr <= 0.0031308f
1901 | 	XMVECTOR smallC = XMVectorMultiply(lclr, g_XMsrgbScale);
1902 | 
1903 | 	// clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055)
1904 | 	XMVECTOR largeC = XMVectorSubtract(XMVectorMultiply(g_XMsrgbA1, XMVectorPow(lclr, Exp)), g_XMsrgbA);
1905 | 
1906 | 	XMVECTOR clr = XMVectorSelect(smallC, largeC, sel);
1907 | 
1908 | 	return XMVectorSelect(xyz, clr, g_XMSelect1110);
1909 | }
1910 | 
1911 | //------------------------------------------------------------------------------
1912 | 
1913 | inline XMVECTOR XM_CALLCONV XMColorSRGBToXYZ(FXMVECTOR srgb)
1914 | {
1915 | 	static const XMVECTORF32 Scale0 = { 0.4124f, 0.2126f, 0.0193f, 0.0f };
1916 | 	static const XMVECTORF32 Scale1 = { 0.3576f, 0.7152f, 0.1192f, 0.0f };
1917 | 	static const XMVECTORF32 Scale2 = { 0.1805f, 0.0722f, 0.9505f, 0.0f };
1918 | 	static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 0.0f };
1919 | 	static const XMVECTORF32 Exp = { 2.4f, 2.4f, 2.4f, 1.0f };
1920 | 
1921 | 	XMVECTOR sel = XMVectorGreater(srgb, Cutoff);
1922 | 
1923 | 	// lclr = clr / 12.92
1924 | 	XMVECTOR smallC = XMVectorDivide(srgb, g_XMsrgbScale);
1925 | 
1926 | 	// lclr = pow( (clr + a) / (1+a), 2.4 )
1927 | 	XMVECTOR largeC = XMVectorPow(XMVectorDivide(XMVectorAdd(srgb, g_XMsrgbA), g_XMsrgbA1), Exp);
1928 | 
1929 | 	XMVECTOR lclr = XMVectorSelect(smallC, largeC, sel);
1930 | 
1931 | 	XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
1932 | 	XMVECTOR clr = XMVector3Transform(lclr, M);
1933 | 
1934 | 	return XMVectorSelect(srgb, clr, g_XMSelect1110);
1935 | }
1936 | 
1937 | //------------------------------------------------------------------------------
1938 | 
1939 | inline XMVECTOR XM_CALLCONV XMColorRGBToSRGB(FXMVECTOR rgb)
1940 | {
1941 | 	static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 1.f };
1942 | 	static const XMVECTORF32 Linear = { 12.92f, 12.92f, 12.92f, 1.f };
1943 | 	static const XMVECTORF32 Scale = { 1.055f, 1.055f, 1.055f, 1.f };
1944 | 	static const XMVECTORF32 Bias = { -0.055f, -0.055f, -0.055f, 0.f };
1945 | 	static const XMVECTORF32 InvGamma = { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.f };
1946 | 
1947 | 	XMVECTOR V = XMVectorSaturate(rgb);
1948 | 	XMVECTOR V0 = XMVectorMultiply(V, Linear);
1949 | 	XMVECTOR V1 = XMVectorMultiplyAdd(Scale, XMVectorPow(V, InvGamma), Bias);
1950 | 	XMVECTOR select = XMVectorLess(V, Cutoff);
1951 | 	V = XMVectorSelect(V1, V0, select);
1952 | 	return XMVectorSelect(rgb, V, g_XMSelect1110);
1953 | }
1954 | 
1955 | //------------------------------------------------------------------------------
1956 | 
1957 | inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB(FXMVECTOR srgb)
1958 | {
1959 | 	static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 1.f };
1960 | 	static const XMVECTORF32 ILinear = { 1.f / 12.92f, 1.f / 12.92f, 1.f / 12.92f, 1.f };
1961 | 	static const XMVECTORF32 Scale = { 1.f / 1.055f, 1.f / 1.055f, 1.f / 1.055f, 1.f };
1962 | 	static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f };
1963 | 	static const XMVECTORF32 Gamma = { 2.4f, 2.4f, 2.4f, 1.f };
1964 | 
1965 | 	XMVECTOR V = XMVectorSaturate(srgb);
1966 | 	XMVECTOR V0 = XMVectorMultiply(V, ILinear);
1967 | 	XMVECTOR V1 = XMVectorPow(XMVectorMultiply(XMVectorAdd(V, Bias), Scale), Gamma);
1968 | 	XMVECTOR select = XMVectorGreater(V, Cutoff);
1969 | 	V = XMVectorSelect(V0, V1, select);
1970 | 	return XMVectorSelect(srgb, V, g_XMSelect1110);
1971 | }
1972 | 
1973 | /****************************************************************************
1974 | *
1975 | * Miscellaneous
1976 | *
1977 | ****************************************************************************/
1978 | 
1979 | //------------------------------------------------------------------------------
1980 | #ifndef XM_CPU_ID
1981 | #if defined(__GNUC__) && defined(i386)
1982 | #define XM_CPU_ID(func, a, b, c, d) \
1983 |     __asm__ __volatile__ ( \
1984 | "        pushl %%ebx        \n" \
1985 | "        xorl %%ecx,%%ecx   \n" \
1986 | "        cpuid              \n" \
1987 | "        movl %%ebx, %%esi  \n" \
1988 | "        popl %%ebx         \n" : \
1989 |             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
1990 | #elif defined(__GNUC__) && defined(__x86_64__)
1991 | #define XM_CPU_ID(func, a, b, c, d) \
1992 |     __asm__ __volatile__ ( \
1993 | "        pushq %%rbx        \n" \
1994 | "        xorq %%rcx,%%rcx   \n" \
1995 | "        cpuid              \n" \
1996 | "        movq %%rbx, %%rsi  \n" \
1997 | "        popq %%rbx         \n" : \
1998 |             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
1999 | #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
2000 | #define XM_CPU_ID(func, a, b, c, d) \
2001 |     __asm { \
2002 |         __asm mov eax, func \
2003 |         __asm xor ecx, ecx \
2004 |         __asm cpuid \
2005 |         __asm mov a, eax \
2006 |         __asm mov b, ebx \
2007 |         __asm mov c, ecx \
2008 |         __asm mov d, edx \
2009 | }
2010 | #elif defined(_MSC_VER) && defined(_M_X64)
2011 | #define XM_CPU_ID(func, a, b, c, d) \
2012 | { \
2013 |     int CPUInfo[4]; \
2014 |     __cpuid(CPUInfo, func); \
2015 |     a = CPUInfo[0]; \
2016 |     b = CPUInfo[1]; \
2017 |     c = CPUInfo[2]; \
2018 |     d = CPUInfo[3]; \
2019 | }
2020 | #else
2021 | #define XM_CPU_ID(func, a, b, c, d) \
2022 |     a = b = c = d = 0
2023 | #endif
2024 | #endif
2025 | 
2026 | inline bool XMVerifyCPUSupport()
2027 | {
2028 | 	return true;
2029 | }
2030 | 
2031 | #ifdef XM_CPU_ID
2032 | #undef XM_CPU_ID
2033 | #endif
2034 | 
2035 | //------------------------------------------------------------------------------
2036 | 
2037 | inline XMVECTOR XM_CALLCONV XMFresnelTerm
2038 | (
2039 | 	FXMVECTOR CosIncidentAngle,
2040 | 	FXMVECTOR RefractionIndex
2041 | )
2042 | {
2043 | 	assert(!XMVector4IsInfinite(CosIncidentAngle));
2044 | 
2045 | 	// Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
2046 | 	// c = CosIncidentAngle
2047 | 	// g = sqrt(c^2 + RefractionIndex^2 - 1)
2048 | 
2049 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2050 | 
2051 | 	XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
2052 | 	G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
2053 | 	G = XMVectorAbs(G);
2054 | 	G = XMVectorSqrt(G);
2055 | 
2056 | 	XMVECTOR S = XMVectorAdd(G, CosIncidentAngle);
2057 | 	XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle);
2058 | 
2059 | 	XMVECTOR V0 = XMVectorMultiply(D, D);
2060 | 	XMVECTOR V1 = XMVectorMultiply(S, S);
2061 | 	V1 = XMVectorReciprocal(V1);
2062 | 	V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
2063 | 	V0 = XMVectorMultiply(V0, V1);
2064 | 
2065 | 	XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
2066 | 	XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
2067 | 	V2 = XMVectorMultiply(V2, V2);
2068 | 	V3 = XMVectorMultiply(V3, V3);
2069 | 	V3 = XMVectorReciprocal(V3);
2070 | 	V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
2071 | 
2072 | 	XMVECTOR Result = XMVectorMultiply(V0, V2);
2073 | 
2074 | 	Result = XMVectorSaturate(Result);
2075 | 
2076 | 	return Result;
2077 | 
2078 | #elif defined(_XM_SSE_INTRINSICS_)
2079 | 	// G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
2080 | 	XMVECTOR G = _mm_mul_ps(RefractionIndex, RefractionIndex);
2081 | 	XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle, CosIncidentAngle);
2082 | 	G = _mm_sub_ps(G, g_XMOne);
2083 | 	vTemp = _mm_add_ps(vTemp, G);
2084 | 	// max((0-vTemp),vTemp) == abs(vTemp)
2085 | 	// The abs is needed to deal with refraction and cosine being zero
2086 | 	G = _mm_setzero_ps();
2087 | 	G = _mm_sub_ps(G, vTemp);
2088 | 	G = _mm_max_ps(G, vTemp);
2089 | 	// Last operation, the sqrt()
2090 | 	G = _mm_sqrt_ps(G);
2091 | 
2092 | 	// Calc G-C and G+C
2093 | 	XMVECTOR GAddC = _mm_add_ps(G, CosIncidentAngle);
2094 | 	XMVECTOR GSubC = _mm_sub_ps(G, CosIncidentAngle);
2095 | 	// Perform the term (0.5f *(g - c)^2) / (g + c)^2 
2096 | 	XMVECTOR vResult = _mm_mul_ps(GSubC, GSubC);
2097 | 	vTemp = _mm_mul_ps(GAddC, GAddC);
2098 | 	vResult = _mm_mul_ps(vResult, g_XMOneHalf);
2099 | 	vResult = _mm_div_ps(vResult, vTemp);
2100 | 	// Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
2101 | 	GAddC = _mm_mul_ps(GAddC, CosIncidentAngle);
2102 | 	GSubC = _mm_mul_ps(GSubC, CosIncidentAngle);
2103 | 	GAddC = _mm_sub_ps(GAddC, g_XMOne);
2104 | 	GSubC = _mm_add_ps(GSubC, g_XMOne);
2105 | 	GAddC = _mm_mul_ps(GAddC, GAddC);
2106 | 	GSubC = _mm_mul_ps(GSubC, GSubC);
2107 | 	GAddC = _mm_div_ps(GAddC, GSubC);
2108 | 	GAddC = _mm_add_ps(GAddC, g_XMOne);
2109 | 	// Multiply the two term parts
2110 | 	vResult = _mm_mul_ps(vResult, GAddC);
2111 | 	// Clamp to 0.0 - 1.0f
2112 | 	vResult = _mm_max_ps(vResult, g_XMZero);
2113 | 	vResult = _mm_min_ps(vResult, g_XMOne);
2114 | 	return vResult;
2115 | #endif
2116 | }
2117 | 
2118 | //------------------------------------------------------------------------------
2119 | 
2120 | inline bool XMScalarNearEqual
2121 | (
2122 | 	float S1,
2123 | 	float S2,
2124 | 	float Epsilon
2125 | )
2126 | {
2127 | 	float Delta = S1 - S2;
2128 | 	return (fabsf(Delta) <= Epsilon);
2129 | }
2130 | 
2131 | //------------------------------------------------------------------------------
2132 | // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
2133 | inline float XMScalarModAngle
2134 | (
2135 | 	float Angle
2136 | )
2137 | {
2138 | 	// Note: The modulo is performed with unsigned math only to work
2139 | 	// around a precision error on numbers that are close to PI
2140 | 
2141 | 	// Normalize the range from 0.0f to XM_2PI
2142 | 	Angle = Angle + XM_PI;
2143 | 	// Perform the modulo, unsigned
2144 | 	float fTemp = fabsf(Angle);
2145 | 	fTemp = fTemp - (XM_2PI * (float)((int32_t)(fTemp / XM_2PI)));
2146 | 	// Restore the number to the range of -XM_PI to XM_PI-epsilon
2147 | 	fTemp = fTemp - XM_PI;
2148 | 	// If the modulo'd value was negative, restore negation
2149 | 	if (Angle<0.0f) {
2150 | 		fTemp = -fTemp;
2151 | 	}
2152 | 	return fTemp;
2153 | }
2154 | 
2155 | //------------------------------------------------------------------------------
2156 | 
2157 | inline float XMScalarSin
2158 | (
2159 | 	float Value
2160 | )
2161 | {
2162 | 	// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2163 | 	float quotient = XM_1DIV2PI*Value;
2164 | 	if (Value >= 0.0f)
2165 | 	{
2166 | 		quotient = (float)((int)(quotient + 0.5f));
2167 | 	}
2168 | 	else
2169 | 	{
2170 | 		quotient = (float)((int)(quotient - 0.5f));
2171 | 	}
2172 | 	float y = Value - XM_2PI*quotient;
2173 | 
2174 | 	// Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
2175 | 	if (y > XM_PIDIV2)
2176 | 	{
2177 | 		y = XM_PI - y;
2178 | 	}
2179 | 	else if (y < -XM_PIDIV2)
2180 | 	{
2181 | 		y = -XM_PI - y;
2182 | 	}
2183 | 
2184 | 	// 11-degree minimax approximation
2185 | 	float y2 = y * y;
2186 | 	return (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y;
2187 | }
2188 | 
2189 | //------------------------------------------------------------------------------
2190 | 
2191 | inline float XMScalarSinEst
2192 | (
2193 | 	float Value
2194 | )
2195 | {
2196 | 	// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2197 | 	float quotient = XM_1DIV2PI*Value;
2198 | 	if (Value >= 0.0f)
2199 | 	{
2200 | 		quotient = (float)((int)(quotient + 0.5f));
2201 | 	}
2202 | 	else
2203 | 	{
2204 | 		quotient = (float)((int)(quotient - 0.5f));
2205 | 	}
2206 | 	float y = Value - XM_2PI*quotient;
2207 | 
2208 | 	// Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
2209 | 	if (y > XM_PIDIV2)
2210 | 	{
2211 | 		y = XM_PI - y;
2212 | 	}
2213 | 	else if (y < -XM_PIDIV2)
2214 | 	{
2215 | 		y = -XM_PI - y;
2216 | 	}
2217 | 
2218 | 	// 7-degree minimax approximation
2219 | 	float y2 = y * y;
2220 | 	return (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + 1.0f) * y;
2221 | }
2222 | 
2223 | //------------------------------------------------------------------------------
2224 | 
2225 | inline float XMScalarCos
2226 | (
2227 | 	float Value
2228 | )
2229 | {
2230 | 	// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2231 | 	float quotient = XM_1DIV2PI*Value;
2232 | 	if (Value >= 0.0f)
2233 | 	{
2234 | 		quotient = (float)((int)(quotient + 0.5f));
2235 | 	}
2236 | 	else
2237 | 	{
2238 | 		quotient = (float)((int)(quotient - 0.5f));
2239 | 	}
2240 | 	float y = Value - XM_2PI*quotient;
2241 | 
2242 | 	// Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
2243 | 	float sign;
2244 | 	if (y > XM_PIDIV2)
2245 | 	{
2246 | 		y = XM_PI - y;
2247 | 		sign = -1.0f;
2248 | 	}
2249 | 	else if (y < -XM_PIDIV2)
2250 | 	{
2251 | 		y = -XM_PI - y;
2252 | 		sign = -1.0f;
2253 | 	}
2254 | 	else
2255 | 	{
2256 | 		sign = +1.0f;
2257 | 	}
2258 | 
2259 | 	// 10-degree minimax approximation
2260 | 	float y2 = y*y;
2261 | 	float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f;
2262 | 	return sign*p;
2263 | }
2264 | 
2265 | //------------------------------------------------------------------------------
2266 | 
2267 | inline float XMScalarCosEst
2268 | (
2269 | 	float Value
2270 | )
2271 | {
2272 | 	// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2273 | 	float quotient = XM_1DIV2PI*Value;
2274 | 	if (Value >= 0.0f)
2275 | 	{
2276 | 		quotient = (float)((int)(quotient + 0.5f));
2277 | 	}
2278 | 	else
2279 | 	{
2280 | 		quotient = (float)((int)(quotient - 0.5f));
2281 | 	}
2282 | 	float y = Value - XM_2PI*quotient;
2283 | 
2284 | 	// Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
2285 | 	float sign;
2286 | 	if (y > XM_PIDIV2)
2287 | 	{
2288 | 		y = XM_PI - y;
2289 | 		sign = -1.0f;
2290 | 	}
2291 | 	else if (y < -XM_PIDIV2)
2292 | 	{
2293 | 		y = -XM_PI - y;
2294 | 		sign = -1.0f;
2295 | 	}
2296 | 	else
2297 | 	{
2298 | 		sign = +1.0f;
2299 | 	}
2300 | 
2301 | 	// 6-degree minimax approximation
2302 | 	float y2 = y * y;
2303 | 	float p = ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f;
2304 | 	return sign*p;
2305 | }
2306 | 
2307 | //------------------------------------------------------------------------------
2308 | 
2309 | _Use_decl_annotations_
2310 | inline void XMScalarSinCos
2311 | (
2312 | 	float* pSin,
2313 | 	float* pCos,
2314 | 	float  Value
2315 | )
2316 | {
2317 | 	assert(pSin);
2318 | 	assert(pCos);
2319 | 
2320 | 	// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2321 | 	float quotient = XM_1DIV2PI*Value;
2322 | 	if (Value >= 0.0f)
2323 | 	{
2324 | 		quotient = (float)((int)(quotient + 0.5f));
2325 | 	}
2326 | 	else
2327 | 	{
2328 | 		quotient = (float)((int)(quotient - 0.5f));
2329 | 	}
2330 | 	float y = Value - XM_2PI*quotient;
2331 | 
2332 | 	// Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
2333 | 	float sign;
2334 | 	if (y > XM_PIDIV2)
2335 | 	{
2336 | 		y = XM_PI - y;
2337 | 		sign = -1.0f;
2338 | 	}
2339 | 	else if (y < -XM_PIDIV2)
2340 | 	{
2341 | 		y = -XM_PI - y;
2342 | 		sign = -1.0f;
2343 | 	}
2344 | 	else
2345 | 	{
2346 | 		sign = +1.0f;
2347 | 	}
2348 | 
2349 | 	float y2 = y * y;
2350 | 
2351 | 	// 11-degree minimax approximation
2352 | 	*pSin = (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y;
2353 | 
2354 | 	// 10-degree minimax approximation
2355 | 	float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f;
2356 | 	*pCos = sign*p;
2357 | }
2358 | 
2359 | //------------------------------------------------------------------------------
2360 | 
2361 | _Use_decl_annotations_
2362 | inline void XMScalarSinCosEst
2363 | (
2364 | 	float* pSin,
2365 | 	float* pCos,
2366 | 	float  Value
2367 | )
2368 | {
2369 | 	assert(pSin);
2370 | 	assert(pCos);
2371 | 
2372 | 	// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2373 | 	float quotient = XM_1DIV2PI*Value;
2374 | 	if (Value >= 0.0f)
2375 | 	{
2376 | 		quotient = (float)((int)(quotient + 0.5f));
2377 | 	}
2378 | 	else
2379 | 	{
2380 | 		quotient = (float)((int)(quotient - 0.5f));
2381 | 	}
2382 | 	float y = Value - XM_2PI*quotient;
2383 | 
2384 | 	// Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
2385 | 	float sign;
2386 | 	if (y > XM_PIDIV2)
2387 | 	{
2388 | 		y = XM_PI - y;
2389 | 		sign = -1.0f;
2390 | 	}
2391 | 	else if (y < -XM_PIDIV2)
2392 | 	{
2393 | 		y = -XM_PI - y;
2394 | 		sign = -1.0f;
2395 | 	}
2396 | 	else
2397 | 	{
2398 | 		sign = +1.0f;
2399 | 	}
2400 | 
2401 | 	float y2 = y * y;
2402 | 
2403 | 	// 7-degree minimax approximation
2404 | 	*pSin = (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + 1.0f) * y;
2405 | 
2406 | 	// 6-degree minimax approximation
2407 | 	float p = ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f;
2408 | 	*pCos = sign*p;
2409 | }
2410 | 
2411 | //------------------------------------------------------------------------------
2412 | 
2413 | inline float XMScalarASin
2414 | (
2415 | 	float Value
2416 | )
2417 | {
2418 | 	// Clamp input to [-1,1].
2419 | 	bool nonnegative = (Value >= 0.0f);
2420 | 	float x = fabsf(Value);
2421 | 	float omx = 1.0f - x;
2422 | 	if (omx < 0.0f)
2423 | 	{
2424 | 		omx = 0.0f;
2425 | 	}
2426 | 	float root = sqrtf(omx);
2427 | 
2428 | 	// 7-degree minimax approximation
2429 | 	float result = ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + 0.0308918810f) * x - 0.0501743046f) * x + 0.0889789874f) * x - 0.2145988016f) * x + 1.5707963050f;
2430 | 	result *= root;  // acos(|x|)
2431 | 
2432 | 					 // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
2433 | 	return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
2434 | }
2435 | 
2436 | //------------------------------------------------------------------------------
2437 | 
2438 | inline float XMScalarASinEst
2439 | (
2440 | 	float Value
2441 | )
2442 | {
2443 | 	// Clamp input to [-1,1].
2444 | 	bool nonnegative = (Value >= 0.0f);
2445 | 	float x = fabsf(Value);
2446 | 	float omx = 1.0f - x;
2447 | 	if (omx < 0.0f)
2448 | 	{
2449 | 		omx = 0.0f;
2450 | 	}
2451 | 	float root = sqrtf(omx);
2452 | 
2453 | 	// 3-degree minimax approximation
2454 | 	float result = ((-0.0187293f*x + 0.0742610f)*x - 0.2121144f)*x + 1.5707288f;
2455 | 	result *= root;  // acos(|x|)
2456 | 
2457 | 					 // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
2458 | 	return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
2459 | }
2460 | 
2461 | //------------------------------------------------------------------------------
2462 | 
2463 | inline float XMScalarACos
2464 | (
2465 | 	float Value
2466 | )
2467 | {
2468 | 	// Clamp input to [-1,1].
2469 | 	bool nonnegative = (Value >= 0.0f);
2470 | 	float x = fabsf(Value);
2471 | 	float omx = 1.0f - x;
2472 | 	if (omx < 0.0f)
2473 | 	{
2474 | 		omx = 0.0f;
2475 | 	}
2476 | 	float root = sqrtf(omx);
2477 | 
2478 | 	// 7-degree minimax approximation
2479 | 	float result = ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + 0.0308918810f) * x - 0.0501743046f) * x + 0.0889789874f) * x - 0.2145988016f) * x + 1.5707963050f;
2480 | 	result *= root;
2481 | 
2482 | 	// acos(x) = pi - acos(-x) when x < 0
2483 | 	return (nonnegative ? result : XM_PI - result);
2484 | }
2485 | 
2486 | //------------------------------------------------------------------------------
2487 | 
2488 | inline float XMScalarACosEst
2489 | (
2490 | 	float Value
2491 | )
2492 | {
2493 | 	// Clamp input to [-1,1].
2494 | 	bool nonnegative = (Value >= 0.0f);
2495 | 	float x = fabsf(Value);
2496 | 	float omx = 1.0f - x;
2497 | 	if (omx < 0.0f)
2498 | 	{
2499 | 		omx = 0.0f;
2500 | 	}
2501 | 	float root = sqrtf(omx);
2502 | 
2503 | 	// 3-degree minimax approximation
2504 | 	float result = ((-0.0187293f * x + 0.0742610f) * x - 0.2121144f) * x + 1.5707288f;
2505 | 	result *= root;
2506 | 
2507 | 	// acos(x) = pi - acos(-x) when x < 0
2508 | 	return (nonnegative ? result : XM_PI - result);
2509 | }
2510 | 


--------------------------------------------------------------------------------