├── README.md ├── MIT.txt └── Inc ├── XColors.h ├── XCollision.h ├── XPackedVector.h ├── XMathConvert.inl └── XMathMisc.inl /README.md: -------------------------------------------------------------------------------- 1 | # XMath 2 | Modified DirectXMath for cross-platform compiling 3 | 4 | Tested on MSVC LinuxGCC OSXClang IOSClang AndroidGCC 5 | 6 | ##Todo 7 | Completed column order matrix mathematics 8 | 9 | ##Sample predefines for including: 10 | 11 | ```cpp 12 | #if defined(BUILD_ARCH_ARM) 13 | # if defined(__ARM_NEON) && BUILD_INTRINSICS_LEVEL > 0 14 | # define _XM_ARM_NEON_INTRINSICS_ 15 | # else 16 | # define _XM_NO_INTRINSICS_ 17 | # endif 18 | #else 19 | # if BUILD_INTRINSICS_LEVEL > 0 20 | # define _XM_SSE_INTRINSICS_ 21 | # endif 22 | # if BUILD_INTRINSICS_LEVEL > 1 23 | # define _XM_SSE3_INTRINSICS_ 24 | # define _XM_SSE4_INTRINSICS_ 25 | # define _XM_AVX_INTRINSICS_ 26 | # endif 27 | # if BUILD_INTRINSICS_LEVEL > 2 28 | # define _XM_F16C_INTRINSICS_ 29 | # endif 30 | #endif 31 | #if defined(VE_COMPILER_GCC) || defined(BUILD_PLATFORM_IOS) 32 | # define _XM_NO_CALL_CONVENTION_ 33 | #endif 34 | #if defined(BUILD_PLATFORM_IOS) || defined(BUILD_PLATFORM_ANDROID) 35 | # define _XM_ARM_NEON_NO_ALIGN_ 36 | #endif 37 | //#define _XM_NO_INTRINSICS_ 38 | #include "XMath/XMath.h" 39 | ``` 40 | -------------------------------------------------------------------------------- /MIT.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Microsoft Corp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | software and associated documentation files (the "Software"), to deal in the Software 7 | without restriction, including without limitation the rights to use, copy, modify, 8 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all copies 13 | or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 16 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 17 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 18 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 19 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 20 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | -------------------------------------------------------------------------------- /Inc/XColors.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------- 2 | // DirectXColors.h -- C++ Color Math library 3 | // 4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF 5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO 6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A 7 | // PARTICULAR PURPOSE. 8 | // 9 | // Copyright (c) Microsoft Corporation. All rights reserved. 10 | //------------------------------------------------------------------------------------- 11 | 12 | #pragma once 13 | 14 | #include "XMath.h" 15 | 16 | namespace XMath 17 | { 18 | 19 | namespace Colors 20 | { 21 | // Standard colors (Red/Green/Blue/Alpha) 22 | XMGLOBALCONST XMVECTORF32 AliceBlue = {0.941176534f, 0.972549081f, 1.000000000f, 1.000000000f}; 23 | XMGLOBALCONST XMVECTORF32 AntiqueWhite = {0.980392218f, 0.921568692f, 0.843137324f, 1.000000000f}; 24 | XMGLOBALCONST XMVECTORF32 Aqua = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f}; 25 | XMGLOBALCONST XMVECTORF32 Aquamarine = {0.498039246f, 1.000000000f, 0.831372619f, 1.000000000f}; 26 | XMGLOBALCONST XMVECTORF32 Azure = {0.941176534f, 1.000000000f, 1.000000000f, 1.000000000f}; 27 | XMGLOBALCONST XMVECTORF32 Beige = {0.960784376f, 0.960784376f, 0.862745166f, 1.000000000f}; 28 | XMGLOBALCONST XMVECTORF32 Bisque = {1.000000000f, 0.894117713f, 0.768627524f, 1.000000000f}; 29 | XMGLOBALCONST XMVECTORF32 Black = {0.000000000f, 0.000000000f, 0.000000000f, 1.000000000f}; 30 | XMGLOBALCONST XMVECTORF32 BlanchedAlmond = {1.000000000f, 0.921568692f, 0.803921640f, 1.000000000f}; 31 | XMGLOBALCONST XMVECTORF32 Blue = {0.000000000f, 0.000000000f, 1.000000000f, 1.000000000f}; 32 | XMGLOBALCONST XMVECTORF32 BlueViolet = {0.541176498f, 0.168627456f, 0.886274576f, 1.000000000f}; 33 | XMGLOBALCONST XMVECTORF32 Brown = {0.647058845f, 0.164705887f, 0.164705887f, 1.000000000f}; 34 | XMGLOBALCONST XMVECTORF32 BurlyWood = {0.870588303f, 0.721568644f, 0.529411793f, 1.000000000f}; 35 | XMGLOBALCONST XMVECTORF32 CadetBlue = {0.372549027f, 0.619607866f, 0.627451003f, 1.000000000f}; 36 | XMGLOBALCONST XMVECTORF32 Chartreuse = {0.498039246f, 1.000000000f, 0.000000000f, 1.000000000f}; 37 | XMGLOBALCONST XMVECTORF32 Chocolate = {0.823529482f, 0.411764741f, 0.117647067f, 1.000000000f}; 38 | XMGLOBALCONST XMVECTORF32 Coral = {1.000000000f, 0.498039246f, 0.313725501f, 1.000000000f}; 39 | XMGLOBALCONST XMVECTORF32 CornflowerBlue = {0.392156899f, 0.584313750f, 0.929411829f, 1.000000000f}; 40 | XMGLOBALCONST XMVECTORF32 Cornsilk = {1.000000000f, 0.972549081f, 0.862745166f, 1.000000000f}; 41 | XMGLOBALCONST XMVECTORF32 Crimson = {0.862745166f, 0.078431375f, 0.235294133f, 1.000000000f}; 42 | XMGLOBALCONST XMVECTORF32 Cyan = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f}; 43 | XMGLOBALCONST XMVECTORF32 DarkBlue = {0.000000000f, 0.000000000f, 0.545098066f, 1.000000000f}; 44 | XMGLOBALCONST XMVECTORF32 DarkCyan = {0.000000000f, 0.545098066f, 0.545098066f, 1.000000000f}; 45 | XMGLOBALCONST XMVECTORF32 DarkGoldenrod = {0.721568644f, 0.525490224f, 0.043137256f, 1.000000000f}; 46 | XMGLOBALCONST XMVECTORF32 DarkGray = {0.662745118f, 0.662745118f, 0.662745118f, 1.000000000f}; 47 | XMGLOBALCONST XMVECTORF32 DarkGreen = {0.000000000f, 0.392156899f, 0.000000000f, 1.000000000f}; 48 | XMGLOBALCONST XMVECTORF32 DarkKhaki = {0.741176486f, 0.717647076f, 0.419607878f, 1.000000000f}; 49 | XMGLOBALCONST XMVECTORF32 DarkMagenta = {0.545098066f, 0.000000000f, 0.545098066f, 1.000000000f}; 50 | XMGLOBALCONST XMVECTORF32 DarkOliveGreen = {0.333333343f, 0.419607878f, 0.184313729f, 1.000000000f}; 51 | XMGLOBALCONST XMVECTORF32 DarkOrange = {1.000000000f, 0.549019635f, 0.000000000f, 1.000000000f}; 52 | XMGLOBALCONST XMVECTORF32 DarkOrchid = {0.600000024f, 0.196078449f, 0.800000072f, 1.000000000f}; 53 | XMGLOBALCONST XMVECTORF32 DarkRed = {0.545098066f, 0.000000000f, 0.000000000f, 1.000000000f}; 54 | XMGLOBALCONST XMVECTORF32 DarkSalmon = {0.913725555f, 0.588235319f, 0.478431404f, 1.000000000f}; 55 | XMGLOBALCONST XMVECTORF32 DarkSeaGreen = {0.560784340f, 0.737254918f, 0.545098066f, 1.000000000f}; 56 | XMGLOBALCONST XMVECTORF32 DarkSlateBlue = {0.282352954f, 0.239215702f, 0.545098066f, 1.000000000f}; 57 | XMGLOBALCONST XMVECTORF32 DarkSlateGray = {0.184313729f, 0.309803933f, 0.309803933f, 1.000000000f}; 58 | XMGLOBALCONST XMVECTORF32 DarkTurquoise = {0.000000000f, 0.807843208f, 0.819607913f, 1.000000000f}; 59 | XMGLOBALCONST XMVECTORF32 DarkViolet = {0.580392182f, 0.000000000f, 0.827451050f, 1.000000000f}; 60 | XMGLOBALCONST XMVECTORF32 DeepPink = {1.000000000f, 0.078431375f, 0.576470613f, 1.000000000f}; 61 | XMGLOBALCONST XMVECTORF32 DeepSkyBlue = {0.000000000f, 0.749019623f, 1.000000000f, 1.000000000f}; 62 | XMGLOBALCONST XMVECTORF32 DimGray = {0.411764741f, 0.411764741f, 0.411764741f, 1.000000000f}; 63 | XMGLOBALCONST XMVECTORF32 DodgerBlue = {0.117647067f, 0.564705908f, 1.000000000f, 1.000000000f}; 64 | XMGLOBALCONST XMVECTORF32 Firebrick = {0.698039234f, 0.133333340f, 0.133333340f, 1.000000000f}; 65 | XMGLOBALCONST XMVECTORF32 FloralWhite = {1.000000000f, 0.980392218f, 0.941176534f, 1.000000000f}; 66 | XMGLOBALCONST XMVECTORF32 ForestGreen = {0.133333340f, 0.545098066f, 0.133333340f, 1.000000000f}; 67 | XMGLOBALCONST XMVECTORF32 Fuchsia = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f}; 68 | XMGLOBALCONST XMVECTORF32 Gainsboro = {0.862745166f, 0.862745166f, 0.862745166f, 1.000000000f}; 69 | XMGLOBALCONST XMVECTORF32 GhostWhite = {0.972549081f, 0.972549081f, 1.000000000f, 1.000000000f}; 70 | XMGLOBALCONST XMVECTORF32 Gold = {1.000000000f, 0.843137324f, 0.000000000f, 1.000000000f}; 71 | XMGLOBALCONST XMVECTORF32 Goldenrod = {0.854902029f, 0.647058845f, 0.125490203f, 1.000000000f}; 72 | XMGLOBALCONST XMVECTORF32 Gray = {0.501960814f, 0.501960814f, 0.501960814f, 1.000000000f}; 73 | XMGLOBALCONST XMVECTORF32 Green = {0.000000000f, 0.501960814f, 0.000000000f, 1.000000000f}; 74 | XMGLOBALCONST XMVECTORF32 GreenYellow = {0.678431392f, 1.000000000f, 0.184313729f, 1.000000000f}; 75 | XMGLOBALCONST XMVECTORF32 Honeydew = {0.941176534f, 1.000000000f, 0.941176534f, 1.000000000f}; 76 | XMGLOBALCONST XMVECTORF32 HotPink = {1.000000000f, 0.411764741f, 0.705882370f, 1.000000000f}; 77 | XMGLOBALCONST XMVECTORF32 IndianRed = {0.803921640f, 0.360784322f, 0.360784322f, 1.000000000f}; 78 | XMGLOBALCONST XMVECTORF32 Indigo = {0.294117659f, 0.000000000f, 0.509803951f, 1.000000000f}; 79 | XMGLOBALCONST XMVECTORF32 Ivory = {1.000000000f, 1.000000000f, 0.941176534f, 1.000000000f}; 80 | XMGLOBALCONST XMVECTORF32 Khaki = {0.941176534f, 0.901960850f, 0.549019635f, 1.000000000f}; 81 | XMGLOBALCONST XMVECTORF32 Lavender = {0.901960850f, 0.901960850f, 0.980392218f, 1.000000000f}; 82 | XMGLOBALCONST XMVECTORF32 LavenderBlush = {1.000000000f, 0.941176534f, 0.960784376f, 1.000000000f}; 83 | XMGLOBALCONST XMVECTORF32 LawnGreen = {0.486274540f, 0.988235354f, 0.000000000f, 1.000000000f}; 84 | XMGLOBALCONST XMVECTORF32 LemonChiffon = {1.000000000f, 0.980392218f, 0.803921640f, 1.000000000f}; 85 | XMGLOBALCONST XMVECTORF32 LightBlue = {0.678431392f, 0.847058892f, 0.901960850f, 1.000000000f}; 86 | XMGLOBALCONST XMVECTORF32 LightCoral = {0.941176534f, 0.501960814f, 0.501960814f, 1.000000000f}; 87 | XMGLOBALCONST XMVECTORF32 LightCyan = {0.878431439f, 1.000000000f, 1.000000000f, 1.000000000f}; 88 | XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = {0.980392218f, 0.980392218f, 0.823529482f, 1.000000000f}; 89 | XMGLOBALCONST XMVECTORF32 LightGreen = {0.564705908f, 0.933333397f, 0.564705908f, 1.000000000f}; 90 | XMGLOBALCONST XMVECTORF32 LightGray = {0.827451050f, 0.827451050f, 0.827451050f, 1.000000000f}; 91 | XMGLOBALCONST XMVECTORF32 LightPink = {1.000000000f, 0.713725507f, 0.756862819f, 1.000000000f}; 92 | XMGLOBALCONST XMVECTORF32 LightSalmon = {1.000000000f, 0.627451003f, 0.478431404f, 1.000000000f}; 93 | XMGLOBALCONST XMVECTORF32 LightSeaGreen = {0.125490203f, 0.698039234f, 0.666666687f, 1.000000000f}; 94 | XMGLOBALCONST XMVECTORF32 LightSkyBlue = {0.529411793f, 0.807843208f, 0.980392218f, 1.000000000f}; 95 | XMGLOBALCONST XMVECTORF32 LightSlateGray = {0.466666698f, 0.533333361f, 0.600000024f, 1.000000000f}; 96 | XMGLOBALCONST XMVECTORF32 LightSteelBlue = {0.690196097f, 0.768627524f, 0.870588303f, 1.000000000f}; 97 | XMGLOBALCONST XMVECTORF32 LightYellow = {1.000000000f, 1.000000000f, 0.878431439f, 1.000000000f}; 98 | XMGLOBALCONST XMVECTORF32 Lime = {0.000000000f, 1.000000000f, 0.000000000f, 1.000000000f}; 99 | XMGLOBALCONST XMVECTORF32 LimeGreen = {0.196078449f, 0.803921640f, 0.196078449f, 1.000000000f}; 100 | XMGLOBALCONST XMVECTORF32 Linen = {0.980392218f, 0.941176534f, 0.901960850f, 1.000000000f}; 101 | XMGLOBALCONST XMVECTORF32 Magenta = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f}; 102 | XMGLOBALCONST XMVECTORF32 Maroon = {0.501960814f, 0.000000000f, 0.000000000f, 1.000000000f}; 103 | XMGLOBALCONST XMVECTORF32 MediumAquamarine = {0.400000036f, 0.803921640f, 0.666666687f, 1.000000000f}; 104 | XMGLOBALCONST XMVECTORF32 MediumBlue = {0.000000000f, 0.000000000f, 0.803921640f, 1.000000000f}; 105 | XMGLOBALCONST XMVECTORF32 MediumOrchid = {0.729411781f, 0.333333343f, 0.827451050f, 1.000000000f}; 106 | XMGLOBALCONST XMVECTORF32 MediumPurple = {0.576470613f, 0.439215720f, 0.858823597f, 1.000000000f}; 107 | XMGLOBALCONST XMVECTORF32 MediumSeaGreen = {0.235294133f, 0.701960802f, 0.443137288f, 1.000000000f}; 108 | XMGLOBALCONST XMVECTORF32 MediumSlateBlue = {0.482352972f, 0.407843173f, 0.933333397f, 1.000000000f}; 109 | XMGLOBALCONST XMVECTORF32 MediumSpringGreen = {0.000000000f, 0.980392218f, 0.603921592f, 1.000000000f}; 110 | XMGLOBALCONST XMVECTORF32 MediumTurquoise = {0.282352954f, 0.819607913f, 0.800000072f, 1.000000000f}; 111 | XMGLOBALCONST XMVECTORF32 MediumVioletRed = {0.780392230f, 0.082352944f, 0.521568656f, 1.000000000f}; 112 | XMGLOBALCONST XMVECTORF32 MidnightBlue = {0.098039225f, 0.098039225f, 0.439215720f, 1.000000000f}; 113 | XMGLOBALCONST XMVECTORF32 MintCream = {0.960784376f, 1.000000000f, 0.980392218f, 1.000000000f}; 114 | XMGLOBALCONST XMVECTORF32 MistyRose = {1.000000000f, 0.894117713f, 0.882353008f, 1.000000000f}; 115 | XMGLOBALCONST XMVECTORF32 Moccasin = {1.000000000f, 0.894117713f, 0.709803939f, 1.000000000f}; 116 | XMGLOBALCONST XMVECTORF32 NavajoWhite = {1.000000000f, 0.870588303f, 0.678431392f, 1.000000000f}; 117 | XMGLOBALCONST XMVECTORF32 Navy = {0.000000000f, 0.000000000f, 0.501960814f, 1.000000000f}; 118 | XMGLOBALCONST XMVECTORF32 OldLace = {0.992156923f, 0.960784376f, 0.901960850f, 1.000000000f}; 119 | XMGLOBALCONST XMVECTORF32 Olive = {0.501960814f, 0.501960814f, 0.000000000f, 1.000000000f}; 120 | XMGLOBALCONST XMVECTORF32 OliveDrab = {0.419607878f, 0.556862772f, 0.137254909f, 1.000000000f}; 121 | XMGLOBALCONST XMVECTORF32 Orange = {1.000000000f, 0.647058845f, 0.000000000f, 1.000000000f}; 122 | XMGLOBALCONST XMVECTORF32 OrangeRed = {1.000000000f, 0.270588249f, 0.000000000f, 1.000000000f}; 123 | XMGLOBALCONST XMVECTORF32 Orchid = {0.854902029f, 0.439215720f, 0.839215755f, 1.000000000f}; 124 | XMGLOBALCONST XMVECTORF32 PaleGoldenrod = {0.933333397f, 0.909803987f, 0.666666687f, 1.000000000f}; 125 | XMGLOBALCONST XMVECTORF32 PaleGreen = {0.596078455f, 0.984313786f, 0.596078455f, 1.000000000f}; 126 | XMGLOBALCONST XMVECTORF32 PaleTurquoise = {0.686274529f, 0.933333397f, 0.933333397f, 1.000000000f}; 127 | XMGLOBALCONST XMVECTORF32 PaleVioletRed = {0.858823597f, 0.439215720f, 0.576470613f, 1.000000000f}; 128 | XMGLOBALCONST XMVECTORF32 PapayaWhip = {1.000000000f, 0.937254965f, 0.835294187f, 1.000000000f}; 129 | XMGLOBALCONST XMVECTORF32 PeachPuff = {1.000000000f, 0.854902029f, 0.725490212f, 1.000000000f}; 130 | XMGLOBALCONST XMVECTORF32 Peru = {0.803921640f, 0.521568656f, 0.247058839f, 1.000000000f}; 131 | XMGLOBALCONST XMVECTORF32 Pink = {1.000000000f, 0.752941251f, 0.796078503f, 1.000000000f}; 132 | XMGLOBALCONST XMVECTORF32 Plum = {0.866666734f, 0.627451003f, 0.866666734f, 1.000000000f}; 133 | XMGLOBALCONST XMVECTORF32 PowderBlue = {0.690196097f, 0.878431439f, 0.901960850f, 1.000000000f}; 134 | XMGLOBALCONST XMVECTORF32 Purple = {0.501960814f, 0.000000000f, 0.501960814f, 1.000000000f}; 135 | XMGLOBALCONST XMVECTORF32 Red = {1.000000000f, 0.000000000f, 0.000000000f, 1.000000000f}; 136 | XMGLOBALCONST XMVECTORF32 RosyBrown = {0.737254918f, 0.560784340f, 0.560784340f, 1.000000000f}; 137 | XMGLOBALCONST XMVECTORF32 RoyalBlue = {0.254901975f, 0.411764741f, 0.882353008f, 1.000000000f}; 138 | XMGLOBALCONST XMVECTORF32 SaddleBrown = {0.545098066f, 0.270588249f, 0.074509807f, 1.000000000f}; 139 | XMGLOBALCONST XMVECTORF32 Salmon = {0.980392218f, 0.501960814f, 0.447058856f, 1.000000000f}; 140 | XMGLOBALCONST XMVECTORF32 SandyBrown = {0.956862807f, 0.643137276f, 0.376470625f, 1.000000000f}; 141 | XMGLOBALCONST XMVECTORF32 SeaGreen = {0.180392161f, 0.545098066f, 0.341176480f, 1.000000000f}; 142 | XMGLOBALCONST XMVECTORF32 SeaShell = {1.000000000f, 0.960784376f, 0.933333397f, 1.000000000f}; 143 | XMGLOBALCONST XMVECTORF32 Sienna = {0.627451003f, 0.321568638f, 0.176470593f, 1.000000000f}; 144 | XMGLOBALCONST XMVECTORF32 Silver = {0.752941251f, 0.752941251f, 0.752941251f, 1.000000000f}; 145 | XMGLOBALCONST XMVECTORF32 SkyBlue = {0.529411793f, 0.807843208f, 0.921568692f, 1.000000000f}; 146 | XMGLOBALCONST XMVECTORF32 SlateBlue = {0.415686309f, 0.352941185f, 0.803921640f, 1.000000000f}; 147 | XMGLOBALCONST XMVECTORF32 SlateGray = {0.439215720f, 0.501960814f, 0.564705908f, 1.000000000f}; 148 | XMGLOBALCONST XMVECTORF32 Snow = {1.000000000f, 0.980392218f, 0.980392218f, 1.000000000f}; 149 | XMGLOBALCONST XMVECTORF32 SpringGreen = {0.000000000f, 1.000000000f, 0.498039246f, 1.000000000f}; 150 | XMGLOBALCONST XMVECTORF32 SteelBlue = {0.274509817f, 0.509803951f, 0.705882370f, 1.000000000f}; 151 | XMGLOBALCONST XMVECTORF32 Tan = {0.823529482f, 0.705882370f, 0.549019635f, 1.000000000f}; 152 | XMGLOBALCONST XMVECTORF32 Teal = {0.000000000f, 0.501960814f, 0.501960814f, 1.000000000f}; 153 | XMGLOBALCONST XMVECTORF32 Thistle = {0.847058892f, 0.749019623f, 0.847058892f, 1.000000000f}; 154 | XMGLOBALCONST XMVECTORF32 Tomato = {1.000000000f, 0.388235331f, 0.278431386f, 1.000000000f}; 155 | XMGLOBALCONST XMVECTORF32 Transparent = {0.000000000f, 0.000000000f, 0.000000000f, 0.000000000f}; 156 | XMGLOBALCONST XMVECTORF32 Turquoise = {0.250980407f, 0.878431439f, 0.815686345f, 1.000000000f}; 157 | XMGLOBALCONST XMVECTORF32 Violet = {0.933333397f, 0.509803951f, 0.933333397f, 1.000000000f}; 158 | XMGLOBALCONST XMVECTORF32 Wheat = {0.960784376f, 0.870588303f, 0.701960802f, 1.000000000f}; 159 | XMGLOBALCONST XMVECTORF32 White = {1.000000000f, 1.000000000f, 1.000000000f, 1.000000000f}; 160 | XMGLOBALCONST XMVECTORF32 WhiteSmoke = {0.960784376f, 0.960784376f, 0.960784376f, 1.000000000f}; 161 | XMGLOBALCONST XMVECTORF32 Yellow = {1.000000000f, 1.000000000f, 0.000000000f, 1.000000000f}; 162 | XMGLOBALCONST XMVECTORF32 YellowGreen = {0.603921592f, 0.803921640f, 0.196078449f, 1.000000000f}; 163 | 164 | }; // namespace Colors 165 | 166 | }; // namespace XMath 167 | -------------------------------------------------------------------------------- /Inc/XCollision.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------- 2 | // XCollision.h -- C++ Collision Math library 3 | // 4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF 5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO 6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A 7 | // PARTICULAR PURPOSE. 8 | // 9 | // Copyright (c) Microsoft Corporation. All rights reserved. 10 | //------------------------------------------------------------------------------------- 11 | 12 | #pragma once 13 | 14 | #include "XMath.h" 15 | 16 | namespace XMath 17 | { 18 | 19 | enum ContainmentType 20 | { 21 | DISJOINT = 0, 22 | INTERSECTS = 1, 23 | CONTAINS = 2, 24 | }; 25 | 26 | enum PlaneIntersectionType 27 | { 28 | FRONT = 0, 29 | INTERSECTING = 1, 30 | BACK = 2, 31 | }; 32 | 33 | struct BoundingBox; 34 | struct BoundingOrientedBox; 35 | struct BoundingFrustum; 36 | 37 | #ifdef _MSC_VER 38 | # pragma warning(push) 39 | # pragma warning(disable:4324 4820) 40 | // C4324: alignment padding warnings 41 | // C4820: Off by default noise 42 | #endif 43 | 44 | //------------------------------------------------------------------------------------- 45 | // Bounding sphere 46 | //------------------------------------------------------------------------------------- 47 | struct BoundingSphere 48 | { 49 | XMFLOAT3 Center; // Center of the sphere. 50 | float Radius; // Radius of the sphere. 51 | 52 | // Creators 53 | BoundingSphere() : Center(0, 0, 0), Radius(1.f) {} 54 | XM_CONSTEXPR BoundingSphere(_In_ const XMFLOAT3& center, _In_ float radius) 55 | : Center(center), Radius(radius) {} 56 | BoundingSphere(_In_ const BoundingSphere& sp) 57 | : Center(sp.Center), Radius(sp.Radius) {} 58 | 59 | // Methods 60 | BoundingSphere& operator=(_In_ const BoundingSphere& sp) { Center = sp.Center; Radius = sp.Radius; return *this; } 61 | 62 | void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, _In_ FXMMATRIX M) const; 63 | void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const; 64 | // Transform the sphere 65 | 66 | ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const; 67 | ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const; 68 | ContainmentType Contains(_In_ const BoundingSphere& sh) const; 69 | ContainmentType Contains(_In_ const BoundingBox& box) const; 70 | ContainmentType Contains(_In_ const BoundingOrientedBox& box) const; 71 | ContainmentType Contains(_In_ const BoundingFrustum& fr) const; 72 | 73 | bool Intersects(_In_ const BoundingSphere& sh) const; 74 | bool Intersects(_In_ const BoundingBox& box) const; 75 | bool Intersects(_In_ const BoundingOrientedBox& box) const; 76 | bool Intersects(_In_ const BoundingFrustum& fr) const; 77 | 78 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const; 79 | // Triangle-sphere test 80 | 81 | PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const; 82 | // Plane-sphere test 83 | 84 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const; 85 | // Ray-sphere test 86 | 87 | ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2, 88 | _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const; 89 | // Test sphere against six planes (see BoundingFrustum::GetPlanes) 90 | 91 | // Static methods 92 | static void CreateMerged(_Out_ BoundingSphere& Out, _In_ const BoundingSphere& S1, _In_ const BoundingSphere& S2); 93 | 94 | static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingBox& box); 95 | static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingOrientedBox& box); 96 | 97 | static void CreateFromPoints(_Out_ BoundingSphere& Out, _In_ size_t Count, 98 | _In_reads_bytes_(sizeof(XMFLOAT3) + Stride*(Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride); 99 | 100 | static void CreateFromFrustum(_Out_ BoundingSphere& Out, _In_ const BoundingFrustum& fr); 101 | }; 102 | 103 | //------------------------------------------------------------------------------------- 104 | // Axis-aligned bounding box 105 | //------------------------------------------------------------------------------------- 106 | struct BoundingBox 107 | { 108 | static const size_t CORNER_COUNT = 8; 109 | 110 | XMFLOAT3 Center; // Center of the box. 111 | XMFLOAT3 Extents; // Distance from the center to each side. 112 | 113 | // Creators 114 | BoundingBox() : Center(0, 0, 0), Extents(1.f, 1.f, 1.f) {} 115 | XM_CONSTEXPR BoundingBox(_In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents) 116 | : Center(center), Extents(extents) {} 117 | BoundingBox(_In_ const BoundingBox& box) : Center(box.Center), Extents(box.Extents) {} 118 | 119 | // Methods 120 | BoundingBox& operator=(_In_ const BoundingBox& box) { Center = box.Center; Extents = box.Extents; return *this; } 121 | 122 | void XM_CALLCONV Transform(_Out_ BoundingBox& Out, _In_ FXMMATRIX M) const; 123 | void XM_CALLCONV Transform(_Out_ BoundingBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const; 124 | 125 | void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const; 126 | // Gets the 8 corners of the box 127 | 128 | ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const; 129 | ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const; 130 | ContainmentType Contains(_In_ const BoundingSphere& sh) const; 131 | ContainmentType Contains(_In_ const BoundingBox& box) const; 132 | ContainmentType Contains(_In_ const BoundingOrientedBox& box) const; 133 | ContainmentType Contains(_In_ const BoundingFrustum& fr) const; 134 | 135 | bool Intersects(_In_ const BoundingSphere& sh) const; 136 | bool Intersects(_In_ const BoundingBox& box) const; 137 | bool Intersects(_In_ const BoundingOrientedBox& box) const; 138 | bool Intersects(_In_ const BoundingFrustum& fr) const; 139 | 140 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const; 141 | // Triangle-Box test 142 | 143 | PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const; 144 | // Plane-box test 145 | 146 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const; 147 | // Ray-Box test 148 | 149 | ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2, 150 | _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const; 151 | // Test box against six planes (see BoundingFrustum::GetPlanes) 152 | 153 | // Static methods 154 | static void CreateMerged(_Out_ BoundingBox& Out, _In_ const BoundingBox& b1, _In_ const BoundingBox& b2); 155 | 156 | static void CreateFromSphere(_Out_ BoundingBox& Out, _In_ const BoundingSphere& sh); 157 | 158 | static void XM_CALLCONV CreateFromPoints(_Out_ BoundingBox& Out, _In_ FXMVECTOR pt1, _In_ FXMVECTOR pt2); 159 | static void CreateFromPoints(_Out_ BoundingBox& Out, _In_ size_t Count, 160 | _In_reads_bytes_(sizeof(XMFLOAT3) + Stride*(Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride); 161 | }; 162 | 163 | //------------------------------------------------------------------------------------- 164 | // Oriented bounding box 165 | //------------------------------------------------------------------------------------- 166 | struct BoundingOrientedBox 167 | { 168 | static const size_t CORNER_COUNT = 8; 169 | 170 | XMFLOAT3 Center; // Center of the box. 171 | XMFLOAT3 Extents; // Distance from the center to each side. 172 | XMFLOAT4 Orientation; // Unit quaternion representing rotation (box -> world). 173 | 174 | // Creators 175 | BoundingOrientedBox() : Center(0, 0, 0), Extents(1.f, 1.f, 1.f), Orientation(0, 0, 0, 1.f) {} 176 | XM_CONSTEXPR BoundingOrientedBox(_In_ const XMFLOAT3& _Center, _In_ const XMFLOAT3& _Extents, _In_ const XMFLOAT4& _Orientation) 177 | : Center(_Center), Extents(_Extents), Orientation(_Orientation) {} 178 | BoundingOrientedBox(_In_ const BoundingOrientedBox& box) 179 | : Center(box.Center), Extents(box.Extents), Orientation(box.Orientation) {} 180 | 181 | // Methods 182 | BoundingOrientedBox& operator=(_In_ const BoundingOrientedBox& box) { Center = box.Center; Extents = box.Extents; Orientation = box.Orientation; return *this; } 183 | 184 | void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, _In_ FXMMATRIX M) const; 185 | void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const; 186 | 187 | void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const; 188 | // Gets the 8 corners of the box 189 | 190 | ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const; 191 | ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const; 192 | ContainmentType Contains(_In_ const BoundingSphere& sh) const; 193 | ContainmentType Contains(_In_ const BoundingBox& box) const; 194 | ContainmentType Contains(_In_ const BoundingOrientedBox& box) const; 195 | ContainmentType Contains(_In_ const BoundingFrustum& fr) const; 196 | 197 | bool Intersects(_In_ const BoundingSphere& sh) const; 198 | bool Intersects(_In_ const BoundingBox& box) const; 199 | bool Intersects(_In_ const BoundingOrientedBox& box) const; 200 | bool Intersects(_In_ const BoundingFrustum& fr) const; 201 | 202 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const; 203 | // Triangle-OrientedBox test 204 | 205 | PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const; 206 | // Plane-OrientedBox test 207 | 208 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const; 209 | // Ray-OrientedBox test 210 | 211 | ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2, 212 | _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const; 213 | // Test OrientedBox against six planes (see BoundingFrustum::GetPlanes) 214 | 215 | // Static methods 216 | static void CreateFromBoundingBox(_Out_ BoundingOrientedBox& Out, _In_ const BoundingBox& box); 217 | 218 | static void CreateFromPoints(_Out_ BoundingOrientedBox& Out, _In_ size_t Count, 219 | _In_reads_bytes_(sizeof(XMFLOAT3) + Stride*(Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride); 220 | }; 221 | 222 | //------------------------------------------------------------------------------------- 223 | // Bounding frustum 224 | //------------------------------------------------------------------------------------- 225 | struct BoundingFrustum 226 | { 227 | static const size_t CORNER_COUNT = 8; 228 | 229 | XMFLOAT3 Origin; // Origin of the frustum (and projection). 230 | XMFLOAT4 Orientation; // Quaternion representing rotation. 231 | 232 | float RightSlope; // Positive X slope (X/Z). 233 | float LeftSlope; // Negative X slope. 234 | float TopSlope; // Positive Y slope (Y/Z). 235 | float BottomSlope; // Negative Y slope. 236 | float Near, Far; // Z of the near plane and far plane. 237 | 238 | // Creators 239 | BoundingFrustum() : Origin(0, 0, 0), Orientation(0, 0, 0, 1.f), RightSlope(1.f), LeftSlope(-1.f), 240 | TopSlope(1.f), BottomSlope(-1.f), Near(0), Far(1.f) {} 241 | XM_CONSTEXPR BoundingFrustum(_In_ const XMFLOAT3& _Origin, _In_ const XMFLOAT4& _Orientation, 242 | _In_ float _RightSlope, _In_ float _LeftSlope, _In_ float _TopSlope, _In_ float _BottomSlope, 243 | _In_ float _Near, _In_ float _Far) 244 | : Origin(_Origin), Orientation(_Orientation), 245 | RightSlope(_RightSlope), LeftSlope(_LeftSlope), TopSlope(_TopSlope), BottomSlope(_BottomSlope), 246 | Near(_Near), Far(_Far) {} 247 | BoundingFrustum(_In_ const BoundingFrustum& fr) 248 | : Origin(fr.Origin), Orientation(fr.Orientation), RightSlope(fr.RightSlope), LeftSlope(fr.LeftSlope), 249 | TopSlope(fr.TopSlope), BottomSlope(fr.BottomSlope), Near(fr.Near), Far(fr.Far) {} 250 | BoundingFrustum(_In_ CXMMATRIX Projection) { CreateFromMatrix(*this, Projection); } 251 | 252 | // Methods 253 | BoundingFrustum& operator=(_In_ const BoundingFrustum& fr) { 254 | Origin = fr.Origin; Orientation = fr.Orientation; 255 | RightSlope = fr.RightSlope; LeftSlope = fr.LeftSlope; 256 | TopSlope = fr.TopSlope; BottomSlope = fr.BottomSlope; 257 | Near = fr.Near; Far = fr.Far; return *this; 258 | } 259 | 260 | void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX M) const; 261 | void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const; 262 | 263 | void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const; 264 | // Gets the 8 corners of the frustum 265 | 266 | ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const; 267 | ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const; 268 | ContainmentType Contains(_In_ const BoundingSphere& sp) const; 269 | ContainmentType Contains(_In_ const BoundingBox& box) const; 270 | ContainmentType Contains(_In_ const BoundingOrientedBox& box) const; 271 | ContainmentType Contains(_In_ const BoundingFrustum& fr) const; 272 | // Frustum-Frustum test 273 | 274 | bool Intersects(_In_ const BoundingSphere& sh) const; 275 | bool Intersects(_In_ const BoundingBox& box) const; 276 | bool Intersects(_In_ const BoundingOrientedBox& box) const; 277 | bool Intersects(_In_ const BoundingFrustum& fr) const; 278 | 279 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const; 280 | // Triangle-Frustum test 281 | 282 | PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const; 283 | // Plane-Frustum test 284 | 285 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR rayOrigin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const; 286 | // Ray-Frustum test 287 | 288 | ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2, 289 | _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const; 290 | // Test frustum against six planes (see BoundingFrustum::GetPlanes) 291 | 292 | void GetPlanes(_Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, _Out_opt_ XMVECTOR* RightPlane, 293 | _Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, _Out_opt_ XMVECTOR* BottomPlane) const; 294 | // Create 6 Planes representation of Frustum 295 | 296 | // Static methods 297 | static void XM_CALLCONV CreateFromMatrix(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX Projection); 298 | }; 299 | 300 | //----------------------------------------------------------------------------- 301 | // Triangle intersection testing routines. 302 | //----------------------------------------------------------------------------- 303 | namespace TriangleTests 304 | { 305 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, _In_ HXMVECTOR V2, _Out_ float& Dist); 306 | // Ray-Triangle 307 | 308 | bool XM_CALLCONV Intersects(_In_ FXMVECTOR A0, _In_ FXMVECTOR A1, _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, _In_ HXMVECTOR B1, _In_ HXMVECTOR B2); 309 | // Triangle-Triangle 310 | 311 | PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, _In_ GXMVECTOR Plane); 312 | // Plane-Triangle 313 | 314 | ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, 315 | _In_ GXMVECTOR Plane0, _In_ HXMVECTOR Plane1, _In_ HXMVECTOR Plane2, 316 | _In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5); 317 | // Test a triangle against six planes at once (see BoundingFrustum::GetPlanes) 318 | }; 319 | 320 | #ifdef _MSC_VER 321 | # pragma warning(pop) 322 | #endif 323 | 324 | /**************************************************************************** 325 | * 326 | * Implementation 327 | * 328 | ****************************************************************************/ 329 | 330 | #ifdef _MSC_VER 331 | # pragma warning(push) 332 | # pragma warning(disable : 4068 4365 4616 6001) 333 | // C4068/4616: ignore unknown pragmas 334 | // C4365: Off by default noise 335 | // C6001: False positives 336 | # pragma prefast(push) 337 | # pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") 338 | #endif 339 | 340 | #include "XCollision.inl" 341 | 342 | #ifdef _MSC_VER 343 | # pragma prefast(pop) 344 | # pragma warning(pop) 345 | #endif 346 | 347 | }; // namespace XMath 348 | -------------------------------------------------------------------------------- /Inc/XPackedVector.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------- 2 | // XPackedVector.h -- SIMD C++ Math library 3 | // 4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF 5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO 6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A 7 | // PARTICULAR PURPOSE. 8 | // 9 | // Copyright (c) Microsoft Corporation. All rights reserved. 10 | //------------------------------------------------------------------------------------- 11 | 12 | #pragma once 13 | 14 | #include "XMath.h" 15 | 16 | namespace XMath 17 | { 18 | 19 | #ifdef _MSC_VER 20 | # pragma warning(push) 21 | # pragma warning(disable:4201 4365 4324) 22 | // C4201: nonstandard extension used 23 | // C4365: Off by default noise 24 | // C4324: alignment padding warnings 25 | #endif 26 | 27 | //------------------------------------------------------------------------------ 28 | // ARGB Color; 8-8-8-8 bit unsigned normalized integer components packed into 29 | // a 32 bit integer. The normalized color is packed into 32 bits using 8 bit 30 | // unsigned, normalized integers for the alpha, red, green, and blue components. 31 | // The alpha component is stored in the most significant bits and the blue 32 | // component in the least significant bits (A8R8G8B8): 33 | // [32] aaaaaaaa rrrrrrrr gggggggg bbbbbbbb [0] 34 | struct XMCOLOR 35 | { 36 | union 37 | { 38 | struct 39 | { 40 | uint8_t b; // Blue: 0/255 to 255/255 41 | uint8_t g; // Green: 0/255 to 255/255 42 | uint8_t r; // Red: 0/255 to 255/255 43 | uint8_t a; // Alpha: 0/255 to 255/255 44 | }; 45 | uint32_t c; 46 | }; 47 | 48 | XMCOLOR() XM_CTOR_DEFAULT 49 | XM_CONSTEXPR XMCOLOR(uint32_t Color) : c(Color) {} 50 | XMCOLOR(float _r, float _g, float _b, float _a); 51 | explicit XMCOLOR(_In_reads_(4) const float *pArray); 52 | 53 | operator uint32_t () const { return c; } 54 | 55 | XMCOLOR& operator= (const XMCOLOR& Color) { c = Color.c; return *this; } 56 | XMCOLOR& operator= (const uint32_t Color) { c = Color; return *this; } 57 | }; 58 | 59 | //------------------------------------------------------------------------------ 60 | // 16 bit floating point number consisting of a sign bit, a 5 bit biased 61 | // exponent, and a 10 bit mantissa 62 | typedef uint16_t HALF; 63 | 64 | //------------------------------------------------------------------------------ 65 | // 2D Vector; 16 bit floating point components 66 | struct XMHALF2 67 | { 68 | union 69 | { 70 | struct 71 | { 72 | HALF x; 73 | HALF y; 74 | }; 75 | uint32_t v; 76 | }; 77 | 78 | XMHALF2() XM_CTOR_DEFAULT 79 | explicit XM_CONSTEXPR XMHALF2(uint32_t Packed) : v(Packed) {} 80 | XM_CONSTEXPR XMHALF2(HALF _x, HALF _y) : x(_x), y(_y) {} 81 | explicit XMHALF2(_In_reads_(2) const HALF *pArray) : x(pArray[0]), y(pArray[1]) {} 82 | XMHALF2(float _x, float _y); 83 | explicit XMHALF2(_In_reads_(2) const float *pArray); 84 | 85 | XMHALF2& operator= (const XMHALF2& Half2) { x = Half2.x; y = Half2.y; return *this; } 86 | XMHALF2& operator= (uint32_t Packed) { v = Packed; return *this; } 87 | }; 88 | 89 | //------------------------------------------------------------------------------ 90 | // 2D Vector; 16 bit signed normalized integer components 91 | struct XMSHORTN2 92 | { 93 | union 94 | { 95 | struct 96 | { 97 | int16_t x; 98 | int16_t y; 99 | }; 100 | uint32_t v; 101 | }; 102 | 103 | XMSHORTN2() XM_CTOR_DEFAULT 104 | explicit XM_CONSTEXPR XMSHORTN2(uint32_t Packed) : v(Packed) {} 105 | XM_CONSTEXPR XMSHORTN2(int16_t _x, int16_t _y) : x(_x), y(_y) {} 106 | explicit XMSHORTN2(_In_reads_(2) const int16_t *pArray) : x(pArray[0]), y(pArray[1]) {} 107 | XMSHORTN2(float _x, float _y); 108 | explicit XMSHORTN2(_In_reads_(2) const float *pArray); 109 | 110 | XMSHORTN2& operator= (const XMSHORTN2& ShortN2) { x = ShortN2.x; y = ShortN2.y; return *this; } 111 | XMSHORTN2& operator= (uint32_t Packed) { v = Packed; return *this; } 112 | }; 113 | 114 | // 2D Vector; 16 bit signed integer components 115 | struct XMSHORT2 116 | { 117 | union 118 | { 119 | struct 120 | { 121 | int16_t x; 122 | int16_t y; 123 | }; 124 | uint32_t v; 125 | }; 126 | 127 | XMSHORT2() XM_CTOR_DEFAULT 128 | explicit XM_CONSTEXPR XMSHORT2(uint32_t Packed) : v(Packed) {} 129 | XM_CONSTEXPR XMSHORT2(int16_t _x, int16_t _y) : x(_x), y(_y) {} 130 | explicit XMSHORT2(_In_reads_(2) const int16_t *pArray) : x(pArray[0]), y(pArray[1]) {} 131 | XMSHORT2(float _x, float _y); 132 | explicit XMSHORT2(_In_reads_(2) const float *pArray); 133 | 134 | XMSHORT2& operator= (const XMSHORT2& Short2) { x = Short2.x; y = Short2.y; return *this; } 135 | XMSHORT2& operator= (uint32_t Packed) { v = Packed; return *this; } 136 | }; 137 | 138 | // 2D Vector; 16 bit unsigned normalized integer components 139 | struct XMUSHORTN2 140 | { 141 | union 142 | { 143 | struct 144 | { 145 | uint16_t x; 146 | uint16_t y; 147 | }; 148 | uint32_t v; 149 | }; 150 | 151 | XMUSHORTN2() XM_CTOR_DEFAULT 152 | explicit XM_CONSTEXPR XMUSHORTN2(uint32_t Packed) : v(Packed) {} 153 | XM_CONSTEXPR XMUSHORTN2(uint16_t _x, uint16_t _y) : x(_x), y(_y) {} 154 | explicit XMUSHORTN2(_In_reads_(2) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]) {} 155 | XMUSHORTN2(float _x, float _y); 156 | explicit XMUSHORTN2(_In_reads_(2) const float *pArray); 157 | 158 | XMUSHORTN2& operator= (const XMUSHORTN2& UShortN2) { x = UShortN2.x; y = UShortN2.y; return *this; } 159 | XMUSHORTN2& operator= (uint32_t Packed) { v = Packed; return *this; } 160 | }; 161 | 162 | // 2D Vector; 16 bit unsigned integer components 163 | struct XMUSHORT2 164 | { 165 | union 166 | { 167 | struct 168 | { 169 | uint16_t x; 170 | uint16_t y; 171 | }; 172 | uint32_t v; 173 | }; 174 | 175 | XMUSHORT2() XM_CTOR_DEFAULT 176 | explicit XM_CONSTEXPR XMUSHORT2(uint32_t Packed) : v(Packed) {} 177 | XM_CONSTEXPR XMUSHORT2(uint16_t _x, uint16_t _y) : x(_x), y(_y) {} 178 | explicit XMUSHORT2(_In_reads_(2) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]) {} 179 | XMUSHORT2(float _x, float _y); 180 | explicit XMUSHORT2(_In_reads_(2) const float *pArray); 181 | 182 | XMUSHORT2& operator= (const XMUSHORT2& UShort2) { x = UShort2.x; y = UShort2.y; return *this; } 183 | XMUSHORT2& operator= (uint32_t Packed) { v = Packed; return *this; } 184 | }; 185 | 186 | //------------------------------------------------------------------------------ 187 | // 2D Vector; 8 bit signed normalized integer components 188 | struct XMBYTEN2 189 | { 190 | union 191 | { 192 | struct 193 | { 194 | int8_t x; 195 | int8_t y; 196 | }; 197 | uint16_t v; 198 | }; 199 | 200 | XMBYTEN2() XM_CTOR_DEFAULT 201 | explicit XM_CONSTEXPR XMBYTEN2(uint16_t Packed) : v(Packed) {} 202 | XM_CONSTEXPR XMBYTEN2(int8_t _x, int8_t _y) : x(_x), y(_y) {} 203 | explicit XMBYTEN2(_In_reads_(2) const int8_t *pArray) : x(pArray[0]), y(pArray[1]) {} 204 | XMBYTEN2(float _x, float _y); 205 | explicit XMBYTEN2(_In_reads_(2) const float *pArray); 206 | 207 | XMBYTEN2& operator= (const XMBYTEN2& ByteN2) { x = ByteN2.x; y = ByteN2.y; return *this; } 208 | XMBYTEN2& operator= (uint16_t Packed) { v = Packed; return *this; } 209 | }; 210 | 211 | // 2D Vector; 8 bit signed integer components 212 | struct XMBYTE2 213 | { 214 | union 215 | { 216 | struct 217 | { 218 | int8_t x; 219 | int8_t y; 220 | }; 221 | uint16_t v; 222 | }; 223 | 224 | XMBYTE2() XM_CTOR_DEFAULT 225 | explicit XM_CONSTEXPR XMBYTE2(uint16_t Packed) : v(Packed) {} 226 | XM_CONSTEXPR XMBYTE2(int8_t _x, int8_t _y) : x(_x), y(_y) {} 227 | explicit XMBYTE2(_In_reads_(2) const int8_t *pArray) : x(pArray[0]), y(pArray[1]) {} 228 | XMBYTE2(float _x, float _y); 229 | explicit XMBYTE2(_In_reads_(2) const float *pArray); 230 | 231 | XMBYTE2& operator= (const XMBYTE2& Byte2) { x = Byte2.x; y = Byte2.y; return *this; } 232 | XMBYTE2& operator= (uint16_t Packed) { v = Packed; return *this; } 233 | }; 234 | 235 | // 2D Vector; 8 bit unsigned normalized integer components 236 | struct XMUBYTEN2 237 | { 238 | union 239 | { 240 | struct 241 | { 242 | uint8_t x; 243 | uint8_t y; 244 | }; 245 | uint16_t v; 246 | }; 247 | 248 | XMUBYTEN2() XM_CTOR_DEFAULT 249 | explicit XM_CONSTEXPR XMUBYTEN2(uint16_t Packed) : v(Packed) {} 250 | XM_CONSTEXPR XMUBYTEN2(uint8_t _x, uint8_t _y) : x(_x), y(_y) {} 251 | explicit XMUBYTEN2(_In_reads_(2) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]) {} 252 | XMUBYTEN2(float _x, float _y); 253 | explicit XMUBYTEN2(_In_reads_(2) const float *pArray); 254 | 255 | XMUBYTEN2& operator= (const XMUBYTEN2& UByteN2) { x = UByteN2.x; y = UByteN2.y; return *this; } 256 | XMUBYTEN2& operator= (uint16_t Packed) { v = Packed; return *this; } 257 | }; 258 | 259 | // 2D Vector; 8 bit unsigned integer components 260 | struct XMUBYTE2 261 | { 262 | union 263 | { 264 | struct 265 | { 266 | uint8_t x; 267 | uint8_t y; 268 | }; 269 | uint16_t v; 270 | }; 271 | 272 | XMUBYTE2() XM_CTOR_DEFAULT 273 | explicit XM_CONSTEXPR XMUBYTE2(uint16_t Packed) : v(Packed) {} 274 | XM_CONSTEXPR XMUBYTE2(uint8_t _x, uint8_t _y) : x(_x), y(_y) {} 275 | explicit XMUBYTE2(_In_reads_(2) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]) {} 276 | XMUBYTE2(float _x, float _y); 277 | explicit XMUBYTE2(_In_reads_(2) const float *pArray); 278 | 279 | XMUBYTE2& operator= (const XMUBYTE2& UByte2) { x = UByte2.x; y = UByte2.y; return *this; } 280 | XMUBYTE2& operator= (uint16_t Packed) { v = Packed; return *this; } 281 | }; 282 | 283 | //------------------------------------------------------------------------------ 284 | // 3D vector: 5/6/5 unsigned integer components 285 | struct XMU565 286 | { 287 | union 288 | { 289 | struct 290 | { 291 | uint16_t x : 5; // 0 to 31 292 | uint16_t y : 6; // 0 to 63 293 | uint16_t z : 5; // 0 to 31 294 | }; 295 | uint16_t v; 296 | }; 297 | 298 | XMU565() XM_CTOR_DEFAULT 299 | explicit XM_CONSTEXPR XMU565(uint16_t Packed) : v(Packed) {} 300 | XM_CONSTEXPR XMU565(uint8_t _x, uint8_t _y, uint8_t _z) : x(_x), y(_y), z(_z) {} 301 | explicit XMU565(_In_reads_(3) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {} 302 | XMU565(float _x, float _y, float _z); 303 | explicit XMU565(_In_reads_(3) const float *pArray); 304 | 305 | operator uint16_t () const { return v; } 306 | 307 | XMU565& operator= (const XMU565& U565) { v = U565.v; return *this; } 308 | XMU565& operator= (uint16_t Packed) { v = Packed; return *this; } 309 | }; 310 | 311 | //------------------------------------------------------------------------------ 312 | // 3D vector: 11/11/10 floating-point components 313 | // The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent 314 | // and 6-bit mantissa for x component, a 5-bit biased exponent and 315 | // 6-bit mantissa for y component, a 5-bit biased exponent and a 5-bit 316 | // mantissa for z. The z component is stored in the most significant bits 317 | // and the x component in the least significant bits. No sign bits so 318 | // all partial-precision numbers are positive. 319 | // (Z10Y11X11): [32] ZZZZZzzz zzzYYYYY yyyyyyXX XXXxxxxx [0] 320 | struct XMFLOAT3PK 321 | { 322 | union 323 | { 324 | struct 325 | { 326 | uint32_t xm : 6; // x-mantissa 327 | uint32_t xe : 5; // x-exponent 328 | uint32_t ym : 6; // y-mantissa 329 | uint32_t ye : 5; // y-exponent 330 | uint32_t zm : 5; // z-mantissa 331 | uint32_t ze : 5; // z-exponent 332 | }; 333 | uint32_t v; 334 | }; 335 | 336 | XMFLOAT3PK() XM_CTOR_DEFAULT 337 | explicit XM_CONSTEXPR XMFLOAT3PK(uint32_t Packed) : v(Packed) {} 338 | XMFLOAT3PK(float _x, float _y, float _z); 339 | explicit XMFLOAT3PK(_In_reads_(3) const float *pArray); 340 | 341 | operator uint32_t () const { return v; } 342 | 343 | XMFLOAT3PK& operator= (const XMFLOAT3PK& float3pk) { v = float3pk.v; return *this; } 344 | XMFLOAT3PK& operator= (uint32_t Packed) { v = Packed; return *this; } 345 | }; 346 | 347 | //------------------------------------------------------------------------------ 348 | // 3D vector: 9/9/9 floating-point components with shared 5-bit exponent 349 | // The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent 350 | // with 9-bit mantissa for the x, y, and z component. The shared exponent 351 | // is stored in the most significant bits and the x component mantissa is in 352 | // the least significant bits. No sign bits so all partial-precision numbers 353 | // are positive. 354 | // (E5Z9Y9X9): [32] EEEEEzzz zzzzzzyy yyyyyyyx xxxxxxxx [0] 355 | struct XMFLOAT3SE 356 | { 357 | union 358 | { 359 | struct 360 | { 361 | uint32_t xm : 9; // x-mantissa 362 | uint32_t ym : 9; // y-mantissa 363 | uint32_t zm : 9; // z-mantissa 364 | uint32_t e : 5; // shared exponent 365 | }; 366 | uint32_t v; 367 | }; 368 | 369 | XMFLOAT3SE() XM_CTOR_DEFAULT 370 | explicit XM_CONSTEXPR XMFLOAT3SE(uint32_t Packed) : v(Packed) {} 371 | XMFLOAT3SE(float _x, float _y, float _z); 372 | explicit XMFLOAT3SE(_In_reads_(3) const float *pArray); 373 | 374 | operator uint32_t () const { return v; } 375 | 376 | XMFLOAT3SE& operator= (const XMFLOAT3SE& float3se) { v = float3se.v; return *this; } 377 | XMFLOAT3SE& operator= (uint32_t Packed) { v = Packed; return *this; } 378 | }; 379 | 380 | //------------------------------------------------------------------------------ 381 | // 4D Vector; 16 bit floating point components 382 | struct XMHALF4 383 | { 384 | union 385 | { 386 | struct 387 | { 388 | HALF x; 389 | HALF y; 390 | HALF z; 391 | HALF w; 392 | }; 393 | uint64_t v; 394 | }; 395 | 396 | XMHALF4() XM_CTOR_DEFAULT 397 | explicit XM_CONSTEXPR XMHALF4(uint64_t Packed) : v(Packed) {} 398 | XM_CONSTEXPR XMHALF4(HALF _x, HALF _y, HALF _z, HALF _w) : x(_x), y(_y), z(_z), w(_w) {} 399 | explicit XMHALF4(_In_reads_(4) const HALF *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 400 | XMHALF4(float _x, float _y, float _z, float _w); 401 | explicit XMHALF4(_In_reads_(4) const float *pArray); 402 | 403 | XMHALF4& operator= (const XMHALF4& Half4) { x = Half4.x; y = Half4.y; z = Half4.z; w = Half4.w; return *this; } 404 | XMHALF4& operator= (uint64_t Packed) { v = Packed; return *this; } 405 | }; 406 | 407 | //------------------------------------------------------------------------------ 408 | // 4D Vector; 16 bit signed normalized integer components 409 | struct XMSHORTN4 410 | { 411 | union 412 | { 413 | struct 414 | { 415 | int16_t x; 416 | int16_t y; 417 | int16_t z; 418 | int16_t w; 419 | }; 420 | uint64_t v; 421 | }; 422 | 423 | XMSHORTN4() XM_CTOR_DEFAULT 424 | explicit XM_CONSTEXPR XMSHORTN4(uint64_t Packed) : v(Packed) {} 425 | XM_CONSTEXPR XMSHORTN4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) : x(_x), y(_y), z(_z), w(_w) {} 426 | explicit XMSHORTN4(_In_reads_(4) const int16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 427 | XMSHORTN4(float _x, float _y, float _z, float _w); 428 | explicit XMSHORTN4(_In_reads_(4) const float *pArray); 429 | 430 | XMSHORTN4& operator= (const XMSHORTN4& ShortN4) { x = ShortN4.x; y = ShortN4.y; z = ShortN4.z; w = ShortN4.w; return *this; } 431 | XMSHORTN4& operator= (uint64_t Packed) { v = Packed; return *this; } 432 | }; 433 | 434 | // 4D Vector; 16 bit signed integer components 435 | struct XMSHORT4 436 | { 437 | union 438 | { 439 | struct 440 | { 441 | int16_t x; 442 | int16_t y; 443 | int16_t z; 444 | int16_t w; 445 | }; 446 | uint64_t v; 447 | }; 448 | 449 | XMSHORT4() XM_CTOR_DEFAULT 450 | explicit XM_CONSTEXPR XMSHORT4(uint64_t Packed) : v(Packed) {} 451 | XM_CONSTEXPR XMSHORT4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) : x(_x), y(_y), z(_z), w(_w) {} 452 | explicit XMSHORT4(_In_reads_(4) const int16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 453 | XMSHORT4(float _x, float _y, float _z, float _w); 454 | explicit XMSHORT4(_In_reads_(4) const float *pArray); 455 | 456 | XMSHORT4& operator= (const XMSHORT4& Short4) { x = Short4.x; y = Short4.y; z = Short4.z; w = Short4.w; return *this; } 457 | XMSHORT4& operator= (uint64_t Packed) { v = Packed; return *this; } 458 | }; 459 | 460 | // 4D Vector; 16 bit unsigned normalized integer components 461 | struct XMUSHORTN4 462 | { 463 | union 464 | { 465 | struct 466 | { 467 | uint16_t x; 468 | uint16_t y; 469 | uint16_t z; 470 | uint16_t w; 471 | }; 472 | uint64_t v; 473 | }; 474 | 475 | XMUSHORTN4() XM_CTOR_DEFAULT 476 | explicit XM_CONSTEXPR XMUSHORTN4(uint64_t Packed) : v(Packed) {} 477 | XM_CONSTEXPR XMUSHORTN4(uint16_t _x, uint16_t _y, uint16_t _z, uint16_t _w) : x(_x), y(_y), z(_z), w(_w) {} 478 | explicit XMUSHORTN4(_In_reads_(4) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 479 | XMUSHORTN4(float _x, float _y, float _z, float _w); 480 | explicit XMUSHORTN4(_In_reads_(4) const float *pArray); 481 | 482 | XMUSHORTN4& operator= (const XMUSHORTN4& UShortN4) { x = UShortN4.x; y = UShortN4.y; z = UShortN4.z; w = UShortN4.w; return *this; } 483 | XMUSHORTN4& operator= (uint64_t Packed) { v = Packed; return *this; } 484 | }; 485 | 486 | // 4D Vector; 16 bit unsigned integer components 487 | struct XMUSHORT4 488 | { 489 | union 490 | { 491 | struct 492 | { 493 | uint16_t x; 494 | uint16_t y; 495 | uint16_t z; 496 | uint16_t w; 497 | }; 498 | uint64_t v; 499 | }; 500 | 501 | XMUSHORT4() XM_CTOR_DEFAULT 502 | explicit XM_CONSTEXPR XMUSHORT4(uint64_t Packed) : v(Packed) {} 503 | XM_CONSTEXPR XMUSHORT4(uint16_t _x, uint16_t _y, uint16_t _z, uint16_t _w) : x(_x), y(_y), z(_z), w(_w) {} 504 | explicit XMUSHORT4(_In_reads_(4) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 505 | XMUSHORT4(float _x, float _y, float _z, float _w); 506 | explicit XMUSHORT4(_In_reads_(4) const float *pArray); 507 | 508 | XMUSHORT4& operator= (const XMUSHORT4& UShort4) { x = UShort4.x; y = UShort4.y; z = UShort4.z; w = UShort4.w; return *this; } 509 | XMUSHORT4& operator= (uint32_t Packed) { v = Packed; return *this; } 510 | }; 511 | 512 | //------------------------------------------------------------------------------ 513 | // 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer 514 | // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, 515 | // normalized integer for the w component and 10 bit signed, normalized 516 | // integers for the z, y, and x components. The w component is stored in the 517 | // most significant bits and the x component in the least significant bits 518 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] 519 | struct XMXDECN4 520 | { 521 | union 522 | { 523 | struct 524 | { 525 | int32_t x : 10; // -511/511 to 511/511 526 | int32_t y : 10; // -511/511 to 511/511 527 | int32_t z : 10; // -511/511 to 511/511 528 | uint32_t w : 2; // 0/3 to 3/3 529 | }; 530 | uint32_t v; 531 | }; 532 | 533 | XMXDECN4() XM_CTOR_DEFAULT 534 | explicit XM_CONSTEXPR XMXDECN4(uint32_t Packed) : v(Packed) {} 535 | XMXDECN4(float _x, float _y, float _z, float _w); 536 | explicit XMXDECN4(_In_reads_(4) const float *pArray); 537 | 538 | operator uint32_t () const { return v; } 539 | 540 | XMXDECN4& operator= (const XMXDECN4& XDecN4) { v = XDecN4.v; return *this; } 541 | XMXDECN4& operator= (uint32_t Packed) { v = Packed; return *this; } 542 | }; 543 | 544 | // 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer 545 | // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned 546 | // integer for the w component and 10 bit signed integers for the 547 | // z, y, and x components. The w component is stored in the 548 | // most significant bits and the x component in the least significant bits 549 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] 550 | struct XM_DEPRECATED XMXDEC4 551 | { 552 | union 553 | { 554 | struct 555 | { 556 | int32_t x : 10; // -511 to 511 557 | int32_t y : 10; // -511 to 511 558 | int32_t z : 10; // -511 to 511 559 | uint32_t w : 2; // 0 to 3 560 | }; 561 | uint32_t v; 562 | }; 563 | 564 | XMXDEC4() XM_CTOR_DEFAULT 565 | explicit XM_CONSTEXPR XMXDEC4(uint32_t Packed) : v(Packed) {} 566 | XMXDEC4(float _x, float _y, float _z, float _w); 567 | explicit XMXDEC4(_In_reads_(4) const float *pArray); 568 | 569 | operator uint32_t () const { return v; } 570 | 571 | XMXDEC4& operator= (const XMXDEC4& XDec4) { v = XDec4.v; return *this; } 572 | XMXDEC4& operator= (uint32_t Packed) { v = Packed; return *this; } 573 | }; 574 | 575 | // 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer 576 | // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit signed, 577 | // normalized integer for the w component and 10 bit signed, normalized 578 | // integers for the z, y, and x components. The w component is stored in the 579 | // most significant bits and the x component in the least significant bits 580 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] 581 | struct XM_DEPRECATED XMDECN4 582 | { 583 | union 584 | { 585 | struct 586 | { 587 | int32_t x : 10; // -511/511 to 511/511 588 | int32_t y : 10; // -511/511 to 511/511 589 | int32_t z : 10; // -511/511 to 511/511 590 | int32_t w : 2; // -1/1 to 1/1 591 | }; 592 | uint32_t v; 593 | }; 594 | 595 | XMDECN4() XM_CTOR_DEFAULT 596 | explicit XM_CONSTEXPR XMDECN4(uint32_t Packed) : v(Packed) {} 597 | XMDECN4(float _x, float _y, float _z, float _w); 598 | explicit XMDECN4(_In_reads_(4) const float *pArray); 599 | 600 | operator uint32_t () const { return v; } 601 | 602 | XMDECN4& operator= (const XMDECN4& DecN4) { v = DecN4.v; return *this; } 603 | XMDECN4& operator= (uint32_t Packed) { v = Packed; return *this; } 604 | }; 605 | 606 | // 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer 607 | // The 4D Vector is packed into 32 bits as follows: a 2 bit signed, 608 | // integer for the w component and 10 bit signed integers for the 609 | // z, y, and x components. The w component is stored in the 610 | // most significant bits and the x component in the least significant bits 611 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] 612 | struct XM_DEPRECATED XMDEC4 613 | { 614 | union 615 | { 616 | struct 617 | { 618 | int32_t x : 10; // -511 to 511 619 | int32_t y : 10; // -511 to 511 620 | int32_t z : 10; // -511 to 511 621 | int32_t w : 2; // -1 to 1 622 | }; 623 | uint32_t v; 624 | }; 625 | 626 | XMDEC4() XM_CTOR_DEFAULT 627 | explicit XM_CONSTEXPR XMDEC4(uint32_t Packed) : v(Packed) {} 628 | XMDEC4(float _x, float _y, float _z, float _w); 629 | explicit XMDEC4(_In_reads_(4) const float *pArray); 630 | 631 | operator uint32_t () const { return v; } 632 | 633 | XMDEC4& operator= (const XMDEC4& Dec4) { v = Dec4.v; return *this; } 634 | XMDEC4& operator= (uint32_t Packed) { v = Packed; return *this; } 635 | }; 636 | 637 | // 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer 638 | // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, 639 | // normalized integer for the w component and 10 bit unsigned, normalized 640 | // integers for the z, y, and x components. The w component is stored in the 641 | // most significant bits and the x component in the least significant bits 642 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] 643 | struct XMUDECN4 644 | { 645 | union 646 | { 647 | struct 648 | { 649 | uint32_t x : 10; // 0/1023 to 1023/1023 650 | uint32_t y : 10; // 0/1023 to 1023/1023 651 | uint32_t z : 10; // 0/1023 to 1023/1023 652 | uint32_t w : 2; // 0/3 to 3/3 653 | }; 654 | uint32_t v; 655 | }; 656 | 657 | XMUDECN4() XM_CTOR_DEFAULT 658 | explicit XM_CONSTEXPR XMUDECN4(uint32_t Packed) : v(Packed) {} 659 | XMUDECN4(float _x, float _y, float _z, float _w); 660 | explicit XMUDECN4(_In_reads_(4) const float *pArray); 661 | 662 | operator uint32_t () const { return v; } 663 | 664 | XMUDECN4& operator= (const XMUDECN4& UDecN4) { v = UDecN4.v; return *this; } 665 | XMUDECN4& operator= (uint32_t Packed) { v = Packed; return *this; } 666 | }; 667 | 668 | // 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer 669 | // The 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, 670 | // integer for the w component and 10 bit unsigned integers 671 | // for the z, y, and x components. The w component is stored in the 672 | // most significant bits and the x component in the least significant bits 673 | // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] 674 | struct XMUDEC4 675 | { 676 | union 677 | { 678 | struct 679 | { 680 | uint32_t x : 10; // 0 to 1023 681 | uint32_t y : 10; // 0 to 1023 682 | uint32_t z : 10; // 0 to 1023 683 | uint32_t w : 2; // 0 to 3 684 | }; 685 | uint32_t v; 686 | }; 687 | 688 | XMUDEC4() XM_CTOR_DEFAULT 689 | explicit XM_CONSTEXPR XMUDEC4(uint32_t Packed) : v(Packed) {} 690 | XMUDEC4(float _x, float _y, float _z, float _w); 691 | explicit XMUDEC4(_In_reads_(4) const float *pArray); 692 | 693 | operator uint32_t () const { return v; } 694 | 695 | XMUDEC4& operator= (const XMUDEC4& UDec4) { v = UDec4.v; return *this; } 696 | XMUDEC4& operator= (uint32_t Packed) { v = Packed; return *this; } 697 | }; 698 | 699 | //------------------------------------------------------------------------------ 700 | // 4D Vector; 8 bit signed normalized integer components 701 | struct XMBYTEN4 702 | { 703 | union 704 | { 705 | struct 706 | { 707 | int8_t x; 708 | int8_t y; 709 | int8_t z; 710 | int8_t w; 711 | }; 712 | uint32_t v; 713 | }; 714 | 715 | XMBYTEN4() XM_CTOR_DEFAULT 716 | XM_CONSTEXPR XMBYTEN4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) : x(_x), y(_y), z(_z), w(_w) {} 717 | explicit XM_CONSTEXPR XMBYTEN4(uint32_t Packed) : v(Packed) {} 718 | explicit XMBYTEN4(_In_reads_(4) const int8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 719 | XMBYTEN4(float _x, float _y, float _z, float _w); 720 | explicit XMBYTEN4(_In_reads_(4) const float *pArray); 721 | 722 | XMBYTEN4& operator= (const XMBYTEN4& ByteN4) { x = ByteN4.x; y = ByteN4.y; z = ByteN4.z; w = ByteN4.w; return *this; } 723 | XMBYTEN4& operator= (uint32_t Packed) { v = Packed; return *this; } 724 | }; 725 | 726 | // 4D Vector; 8 bit signed integer components 727 | struct XMBYTE4 728 | { 729 | union 730 | { 731 | struct 732 | { 733 | int8_t x; 734 | int8_t y; 735 | int8_t z; 736 | int8_t w; 737 | }; 738 | uint32_t v; 739 | }; 740 | 741 | XMBYTE4() XM_CTOR_DEFAULT 742 | XM_CONSTEXPR XMBYTE4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) : x(_x), y(_y), z(_z), w(_w) {} 743 | explicit XM_CONSTEXPR XMBYTE4(uint32_t Packed) : v(Packed) {} 744 | explicit XMBYTE4(_In_reads_(4) const int8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 745 | XMBYTE4(float _x, float _y, float _z, float _w); 746 | explicit XMBYTE4(_In_reads_(4) const float *pArray); 747 | 748 | XMBYTE4& operator= (const XMBYTE4& Byte4) { x = Byte4.x; y = Byte4.y; z = Byte4.z; w = Byte4.w; return *this; } 749 | XMBYTE4& operator= (uint32_t Packed) { v = Packed; return *this; } 750 | }; 751 | 752 | // 4D Vector; 8 bit unsigned normalized integer components 753 | struct XMUBYTEN4 754 | { 755 | union 756 | { 757 | struct 758 | { 759 | uint8_t x; 760 | uint8_t y; 761 | uint8_t z; 762 | uint8_t w; 763 | }; 764 | uint32_t v; 765 | }; 766 | 767 | XMUBYTEN4() XM_CTOR_DEFAULT 768 | XM_CONSTEXPR XMUBYTEN4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) : x(_x), y(_y), z(_z), w(_w) {} 769 | explicit XM_CONSTEXPR XMUBYTEN4(uint32_t Packed) : v(Packed) {} 770 | explicit XMUBYTEN4(_In_reads_(4) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 771 | XMUBYTEN4(float _x, float _y, float _z, float _w); 772 | explicit XMUBYTEN4(_In_reads_(4) const float *pArray); 773 | 774 | XMUBYTEN4& operator= (const XMUBYTEN4& UByteN4) { x = UByteN4.x; y = UByteN4.y; z = UByteN4.z; w = UByteN4.w; return *this; } 775 | XMUBYTEN4& operator= (uint32_t Packed) { v = Packed; return *this; } 776 | }; 777 | 778 | // 4D Vector; 8 bit unsigned integer components 779 | struct XMUBYTE4 780 | { 781 | union 782 | { 783 | struct 784 | { 785 | uint8_t x; 786 | uint8_t y; 787 | uint8_t z; 788 | uint8_t w; 789 | }; 790 | uint32_t v; 791 | }; 792 | 793 | XMUBYTE4() XM_CTOR_DEFAULT 794 | XM_CONSTEXPR XMUBYTE4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) : x(_x), y(_y), z(_z), w(_w) {} 795 | explicit XM_CONSTEXPR XMUBYTE4(uint32_t Packed) : v(Packed) {} 796 | explicit XMUBYTE4(_In_reads_(4) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 797 | XMUBYTE4(float _x, float _y, float _z, float _w); 798 | explicit XMUBYTE4(_In_reads_(4) const float *pArray); 799 | 800 | XMUBYTE4& operator= (const XMUBYTE4& UByte4) { x = UByte4.x; y = UByte4.y; z = UByte4.z; w = UByte4.w; return *this; } 801 | XMUBYTE4& operator= (uint32_t Packed) { v = Packed; return *this; } 802 | }; 803 | 804 | //------------------------------------------------------------------------------ 805 | // 4D vector; 4 bit unsigned integer components 806 | struct XMUNIBBLE4 807 | { 808 | union 809 | { 810 | struct 811 | { 812 | uint16_t x : 4; // 0 to 15 813 | uint16_t y : 4; // 0 to 15 814 | uint16_t z : 4; // 0 to 15 815 | uint16_t w : 4; // 0 to 15 816 | }; 817 | uint16_t v; 818 | }; 819 | 820 | XMUNIBBLE4() XM_CTOR_DEFAULT 821 | explicit XM_CONSTEXPR XMUNIBBLE4(uint16_t Packed) : v(Packed) {} 822 | XM_CONSTEXPR XMUNIBBLE4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) : x(_x), y(_y), z(_z), w(_w) {} 823 | explicit XMUNIBBLE4(_In_reads_(4) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} 824 | XMUNIBBLE4(float _x, float _y, float _z, float _w); 825 | explicit XMUNIBBLE4(_In_reads_(4) const float *pArray); 826 | 827 | operator uint16_t () const { return v; } 828 | 829 | XMUNIBBLE4& operator= (const XMUNIBBLE4& UNibble4) { v = UNibble4.v; return *this; } 830 | XMUNIBBLE4& operator= (uint16_t Packed) { v = Packed; return *this; } 831 | }; 832 | 833 | //------------------------------------------------------------------------------ 834 | // 4D vector: 5/5/5/1 unsigned integer components 835 | struct XMU555 836 | { 837 | union 838 | { 839 | struct 840 | { 841 | uint16_t x : 5; // 0 to 31 842 | uint16_t y : 5; // 0 to 31 843 | uint16_t z : 5; // 0 to 31 844 | uint16_t w : 1; // 0 or 1 845 | }; 846 | uint16_t v; 847 | }; 848 | 849 | XMU555() XM_CTOR_DEFAULT 850 | explicit XM_CONSTEXPR XMU555(uint16_t Packed) : v(Packed) {} 851 | XM_CONSTEXPR XMU555(uint8_t _x, uint8_t _y, uint8_t _z, bool _w) : x(_x), y(_y), z(_z), w(_w ? 0x1 : 0) {} 852 | XMU555(_In_reads_(3) const uint8_t *pArray, _In_ bool _w) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(_w ? 0x1 : 0) {} 853 | XMU555(float _x, float _y, float _z, bool _w); 854 | XMU555(_In_reads_(3) const float *pArray, _In_ bool _w); 855 | 856 | operator uint16_t () const { return v; } 857 | 858 | XMU555& operator= (const XMU555& U555) { v = U555.v; return *this; } 859 | XMU555& operator= (uint16_t Packed) { v = Packed; return *this; } 860 | }; 861 | 862 | #ifdef _MSC_VER 863 | # pragma warning(pop) 864 | #endif 865 | 866 | /**************************************************************************** 867 | * 868 | * Data conversion operations 869 | * 870 | ****************************************************************************/ 871 | 872 | float XMConvertHalfToFloat(HALF Value); 873 | float* XMConvertHalfToFloatStream(_Out_writes_bytes_(sizeof(float) + OutputStride*(HalfCount - 1)) float* pOutputStream, 874 | _In_ size_t OutputStride, 875 | _In_reads_bytes_(sizeof(HALF) + InputStride*(HalfCount - 1)) const HALF* pInputStream, 876 | _In_ size_t InputStride, _In_ size_t HalfCount); 877 | HALF XMConvertFloatToHalf(float Value); 878 | HALF* XMConvertFloatToHalfStream(_Out_writes_bytes_(sizeof(HALF) + OutputStride*(FloatCount - 1)) HALF* pOutputStream, 879 | _In_ size_t OutputStride, 880 | _In_reads_bytes_(sizeof(float) + InputStride*(FloatCount - 1)) const float* pInputStream, 881 | _In_ size_t InputStride, _In_ size_t FloatCount); 882 | 883 | /**************************************************************************** 884 | * 885 | * Load operations 886 | * 887 | ****************************************************************************/ 888 | 889 | XMVECTOR XM_CALLCONV XMLoadColor(_In_ const XMCOLOR* pSource); 890 | 891 | XMVECTOR XM_CALLCONV XMLoadHalf2(_In_ const XMHALF2* pSource); 892 | XMVECTOR XM_CALLCONV XMLoadShortN2(_In_ const XMSHORTN2* pSource); 893 | XMVECTOR XM_CALLCONV XMLoadShort2(_In_ const XMSHORT2* pSource); 894 | XMVECTOR XM_CALLCONV XMLoadUShortN2(_In_ const XMUSHORTN2* pSource); 895 | XMVECTOR XM_CALLCONV XMLoadUShort2(_In_ const XMUSHORT2* pSource); 896 | XMVECTOR XM_CALLCONV XMLoadByteN2(_In_ const XMBYTEN2* pSource); 897 | XMVECTOR XM_CALLCONV XMLoadByte2(_In_ const XMBYTE2* pSource); 898 | XMVECTOR XM_CALLCONV XMLoadUByteN2(_In_ const XMUBYTEN2* pSource); 899 | XMVECTOR XM_CALLCONV XMLoadUByte2(_In_ const XMUBYTE2* pSource); 900 | 901 | XMVECTOR XM_CALLCONV XMLoadU565(_In_ const XMU565* pSource); 902 | XMVECTOR XM_CALLCONV XMLoadFloat3PK(_In_ const XMFLOAT3PK* pSource); 903 | XMVECTOR XM_CALLCONV XMLoadFloat3SE(_In_ const XMFLOAT3SE* pSource); 904 | 905 | XMVECTOR XM_CALLCONV XMLoadHalf4(_In_ const XMHALF4* pSource); 906 | XMVECTOR XM_CALLCONV XMLoadShortN4(_In_ const XMSHORTN4* pSource); 907 | XMVECTOR XM_CALLCONV XMLoadShort4(_In_ const XMSHORT4* pSource); 908 | XMVECTOR XM_CALLCONV XMLoadUShortN4(_In_ const XMUSHORTN4* pSource); 909 | XMVECTOR XM_CALLCONV XMLoadUShort4(_In_ const XMUSHORT4* pSource); 910 | XMVECTOR XM_CALLCONV XMLoadXDecN4(_In_ const XMXDECN4* pSource); 911 | XMVECTOR XM_CALLCONV XMLoadUDecN4(_In_ const XMUDECN4* pSource); 912 | XMVECTOR XM_CALLCONV XMLoadUDecN4_XR(_In_ const XMUDECN4* pSource); 913 | XMVECTOR XM_CALLCONV XMLoadUDec4(_In_ const XMUDEC4* pSource); 914 | XMVECTOR XM_CALLCONV XMLoadByteN4(_In_ const XMBYTEN4* pSource); 915 | XMVECTOR XM_CALLCONV XMLoadByte4(_In_ const XMBYTE4* pSource); 916 | XMVECTOR XM_CALLCONV XMLoadUByteN4(_In_ const XMUBYTEN4* pSource); 917 | XMVECTOR XM_CALLCONV XMLoadUByte4(_In_ const XMUBYTE4* pSource); 918 | XMVECTOR XM_CALLCONV XMLoadUNibble4(_In_ const XMUNIBBLE4* pSource); 919 | XMVECTOR XM_CALLCONV XMLoadU555(_In_ const XMU555* pSource); 920 | 921 | #ifdef _MSC_VER 922 | # pragma warning(push) 923 | # pragma warning(disable : 4996) 924 | // C4996: ignore deprecation warning 925 | #endif 926 | 927 | XMVECTOR XM_DEPRECATED XM_CALLCONV XMLoadDecN4(_In_ const XMDECN4* pSource); 928 | XMVECTOR XM_DEPRECATED XM_CALLCONV XMLoadDec4(_In_ const XMDEC4* pSource); 929 | XMVECTOR XM_DEPRECATED XM_CALLCONV XMLoadXDec4(_In_ const XMXDEC4* pSource); 930 | 931 | #ifdef _MSC_VER 932 | # pragma warning(pop) 933 | #endif 934 | 935 | /**************************************************************************** 936 | * 937 | * Store operations 938 | * 939 | ****************************************************************************/ 940 | 941 | void XM_CALLCONV XMStoreColor(_Out_ XMCOLOR* pDestination, _In_ FXMVECTOR V); 942 | 943 | void XM_CALLCONV XMStoreHalf2(_Out_ XMHALF2* pDestination, _In_ FXMVECTOR V); 944 | void XM_CALLCONV XMStoreShortN2(_Out_ XMSHORTN2* pDestination, _In_ FXMVECTOR V); 945 | void XM_CALLCONV XMStoreShort2(_Out_ XMSHORT2* pDestination, _In_ FXMVECTOR V); 946 | void XM_CALLCONV XMStoreUShortN2(_Out_ XMUSHORTN2* pDestination, _In_ FXMVECTOR V); 947 | void XM_CALLCONV XMStoreUShort2(_Out_ XMUSHORT2* pDestination, _In_ FXMVECTOR V); 948 | void XM_CALLCONV XMStoreByteN2(_Out_ XMBYTEN2* pDestination, _In_ FXMVECTOR V); 949 | void XM_CALLCONV XMStoreByte2(_Out_ XMBYTE2* pDestination, _In_ FXMVECTOR V); 950 | void XM_CALLCONV XMStoreUByteN2(_Out_ XMUBYTEN2* pDestination, _In_ FXMVECTOR V); 951 | void XM_CALLCONV XMStoreUByte2(_Out_ XMUBYTE2* pDestination, _In_ FXMVECTOR V); 952 | 953 | void XM_CALLCONV XMStoreU565(_Out_ XMU565* pDestination, _In_ FXMVECTOR V); 954 | void XM_CALLCONV XMStoreFloat3PK(_Out_ XMFLOAT3PK* pDestination, _In_ FXMVECTOR V); 955 | void XM_CALLCONV XMStoreFloat3SE(_Out_ XMFLOAT3SE* pDestination, _In_ FXMVECTOR V); 956 | 957 | void XM_CALLCONV XMStoreHalf4(_Out_ XMHALF4* pDestination, _In_ FXMVECTOR V); 958 | void XM_CALLCONV XMStoreShortN4(_Out_ XMSHORTN4* pDestination, _In_ FXMVECTOR V); 959 | void XM_CALLCONV XMStoreShort4(_Out_ XMSHORT4* pDestination, _In_ FXMVECTOR V); 960 | void XM_CALLCONV XMStoreUShortN4(_Out_ XMUSHORTN4* pDestination, _In_ FXMVECTOR V); 961 | void XM_CALLCONV XMStoreUShort4(_Out_ XMUSHORT4* pDestination, _In_ FXMVECTOR V); 962 | void XM_CALLCONV XMStoreXDecN4(_Out_ XMXDECN4* pDestination, _In_ FXMVECTOR V); 963 | void XM_CALLCONV XMStoreUDecN4(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V); 964 | void XM_CALLCONV XMStoreUDecN4_XR(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V); 965 | void XM_CALLCONV XMStoreUDec4(_Out_ XMUDEC4* pDestination, _In_ FXMVECTOR V); 966 | void XM_CALLCONV XMStoreByteN4(_Out_ XMBYTEN4* pDestination, _In_ FXMVECTOR V); 967 | void XM_CALLCONV XMStoreByte4(_Out_ XMBYTE4* pDestination, _In_ FXMVECTOR V); 968 | void XM_CALLCONV XMStoreUByteN4(_Out_ XMUBYTEN4* pDestination, _In_ FXMVECTOR V); 969 | void XM_CALLCONV XMStoreUByte4(_Out_ XMUBYTE4* pDestination, _In_ FXMVECTOR V); 970 | void XM_CALLCONV XMStoreUNibble4(_Out_ XMUNIBBLE4* pDestination, _In_ FXMVECTOR V); 971 | void XM_CALLCONV XMStoreU555(_Out_ XMU555* pDestination, _In_ FXMVECTOR V); 972 | 973 | #ifdef _MSC_VER 974 | # pragma warning(push) 975 | # pragma warning(disable : 4996) 976 | // C4996: ignore deprecation warning 977 | #endif 978 | 979 | void XM_DEPRECATED XM_CALLCONV XMStoreDecN4(_Out_ XMDECN4* pDestination, _In_ FXMVECTOR V); 980 | void XM_DEPRECATED XM_CALLCONV XMStoreDec4(_Out_ XMDEC4* pDestination, _In_ FXMVECTOR V); 981 | void XM_DEPRECATED XM_CALLCONV XMStoreXDec4(_Out_ XMXDEC4* pDestination, _In_ FXMVECTOR V); 982 | 983 | #ifdef _MSC_VER 984 | # pragma warning(pop) 985 | #endif 986 | 987 | /**************************************************************************** 988 | * 989 | * Implementation 990 | * 991 | ****************************************************************************/ 992 | 993 | #ifdef _MSC_VER 994 | # pragma warning(push) 995 | # pragma warning(disable:4068 4214 4204 4365 4616 6001 6101) 996 | // C4068/4616: ignore unknown pragmas 997 | // C4214/4204: nonstandard extension used 998 | // C4365: Off by default noise 999 | // C6001/6101: False positives 1000 | # pragma prefast(push) 1001 | # pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") 1002 | #endif 1003 | 1004 | #include "XPackedVector.inl" 1005 | 1006 | #ifdef _MSC_VER 1007 | # pragma prefast(pop) 1008 | # pragma warning(pop) 1009 | #endif 1010 | 1011 | }; // namespace XMath 1012 | -------------------------------------------------------------------------------- /Inc/XMathConvert.inl: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------- 2 | // XMathConvert.inl -- SIMD C++ Math library 3 | // 4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF 5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO 6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A 7 | // PARTICULAR PURPOSE. 8 | // 9 | // Copyright (c) Microsoft Corporation. All rights reserved. 10 | //------------------------------------------------------------------------------------- 11 | 12 | #pragma once 13 | 14 | /**************************************************************************** 15 | * 16 | * Data conversion 17 | * 18 | ****************************************************************************/ 19 | 20 | //------------------------------------------------------------------------------ 21 | 22 | #ifdef _MSC_VER 23 | # pragma warning(push) 24 | # pragma warning(disable:4701) 25 | // C4701: false positives 26 | #endif 27 | 28 | inline XMVECTOR XM_CALLCONV XMConvertVectorIntToFloat 29 | ( 30 | FXMVECTOR VInt, 31 | uint32_t DivExponent 32 | ) 33 | { 34 | assert(DivExponent < 32); 35 | #if defined(_XM_NO_INTRINSICS_) 36 | float fScale = 1.0f / (float)(1U << DivExponent); 37 | uint32_t ElementIndex = 0; 38 | XMVECTOR Result; 39 | do { 40 | int32_t iTemp = (int32_t)VInt.vector4_u32[ElementIndex]; 41 | Result.vector4_f32[ElementIndex] = ((float)iTemp) * fScale; 42 | } while (++ElementIndex < 4); 43 | return Result; 44 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 45 | float fScale = 1.0f / (float)(1U << DivExponent); 46 | float32x4_t vResult = vcvtq_f32_s32(VInt); 47 | return vmulq_n_f32(vResult, fScale); 48 | #else // _XM_SSE_INTRINSICS_ 49 | // Convert to floats 50 | XMVECTOR vResult = _mm_cvtepi32_ps(_mm_castps_si128(VInt)); 51 | // Convert DivExponent into 1.0f/(1< (65536.0f*32768.0f) - 128.0f) { 81 | iResult = 0x7FFFFFFF; 82 | } 83 | else { 84 | iResult = (int32_t)fTemp; 85 | } 86 | Result.vector4_u32[ElementIndex] = (uint32_t)iResult; 87 | } while (++ElementIndex < 4); 88 | return Result; 89 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 90 | float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent)); 91 | // In case of positive overflow, detect it 92 | uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxInt); 93 | // Float to int conversion 94 | int32x4_t vResulti = vcvtq_s32_f32(vResult); 95 | // If there was positive overflow, set to 0x7FFFFFFF 96 | vResult = vandq_u32(vOverflow, g_XMAbsMask); 97 | vOverflow = vbicq_u32(vResulti, vOverflow); 98 | vOverflow = vorrq_u32(vOverflow, vResult); 99 | return vOverflow; 100 | #else // _XM_SSE_INTRINSICS_ 101 | XMVECTOR vResult = _mm_set_ps1((float)(1U << MulExponent)); 102 | vResult = _mm_mul_ps(vResult, VFloat); 103 | // In case of positive overflow, detect it 104 | XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxInt); 105 | // Float to int conversion 106 | __m128i vResulti = _mm_cvttps_epi32(vResult); 107 | // If there was positive overflow, set to 0x7FFFFFFF 108 | vResult = _mm_and_ps(vOverflow, g_XMAbsMask); 109 | vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); 110 | vOverflow = _mm_or_ps(vOverflow, vResult); 111 | return vOverflow; 112 | #endif 113 | } 114 | 115 | //------------------------------------------------------------------------------ 116 | 117 | inline XMVECTOR XM_CALLCONV XMConvertVectorUIntToFloat 118 | ( 119 | FXMVECTOR VUInt, 120 | uint32_t DivExponent 121 | ) 122 | { 123 | assert(DivExponent < 32); 124 | #if defined(_XM_NO_INTRINSICS_) 125 | float fScale = 1.0f / (float)(1U << DivExponent); 126 | uint32_t ElementIndex = 0; 127 | XMVECTOR Result; 128 | do { 129 | Result.vector4_f32[ElementIndex] = (float)VUInt.vector4_u32[ElementIndex] * fScale; 130 | } while (++ElementIndex < 4); 131 | return Result; 132 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 133 | float fScale = 1.0f / (float)(1U << DivExponent); 134 | float32x4_t vResult = vcvtq_f32_u32(VUInt); 135 | return vmulq_n_f32(vResult, fScale); 136 | #else // _XM_SSE_INTRINSICS_ 137 | // For the values that are higher than 0x7FFFFFFF, a fixup is needed 138 | // Determine which ones need the fix. 139 | XMVECTOR vMask = _mm_and_ps(VUInt, g_XMNegativeZero); 140 | // Force all values positive 141 | XMVECTOR vResult = _mm_xor_ps(VUInt, vMask); 142 | // Convert to floats 143 | vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); 144 | // Convert 0x80000000 -> 0xFFFFFFFF 145 | __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); 146 | // For only the ones that are too big, add the fixup 147 | vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); 148 | vResult = _mm_add_ps(vResult, vMask); 149 | // Convert DivExponent into 1.0f/(1<= (65536.0f*65536.0f)) { 179 | uResult = 0xFFFFFFFFU; 180 | } 181 | else { 182 | uResult = (uint32_t)fTemp; 183 | } 184 | Result.vector4_u32[ElementIndex] = uResult; 185 | } while (++ElementIndex < 4); 186 | return Result; 187 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 188 | float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent)); 189 | // In case of overflow, detect it 190 | uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxUInt); 191 | // Float to int conversion 192 | uint32x4_t vResulti = vcvtq_u32_f32(vResult); 193 | // If there was overflow, set to 0xFFFFFFFFU 194 | vResult = vbicq_u32(vResulti, vOverflow); 195 | vOverflow = vorrq_u32(vOverflow, vResult); 196 | return vOverflow; 197 | #else // _XM_SSE_INTRINSICS_ 198 | XMVECTOR vResult = _mm_set_ps1(static_cast(1U << MulExponent)); 199 | vResult = _mm_mul_ps(vResult, VFloat); 200 | // Clamp to >=0 201 | vResult = _mm_max_ps(vResult, g_XMZero); 202 | // Any numbers that are too big, set to 0xFFFFFFFFU 203 | XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); 204 | XMVECTOR vValue = g_XMUnsignedFix; 205 | // Too large for a signed integer? 206 | XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); 207 | // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise 208 | vValue = _mm_and_ps(vValue, vMask); 209 | // Perform fixup only on numbers too large (Keeps low bit precision) 210 | vResult = _mm_sub_ps(vResult, vValue); 211 | __m128i vResulti = _mm_cvttps_epi32(vResult); 212 | // Convert from signed to unsigned pnly if greater than 0x80000000 213 | vMask = _mm_and_ps(vMask, g_XMNegativeZero); 214 | vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); 215 | // On those that are too large, set to 0xFFFFFFFF 216 | vResult = _mm_or_ps(vResult, vOverflow); 217 | return vResult; 218 | #endif 219 | } 220 | 221 | #ifdef _MSC_VER 222 | # pragma warning(pop) 223 | #endif 224 | 225 | /**************************************************************************** 226 | * 227 | * Vector and matrix load operations 228 | * 229 | ****************************************************************************/ 230 | 231 | //------------------------------------------------------------------------------ 232 | _Use_decl_annotations_ 233 | inline XMVECTOR XM_CALLCONV XMLoadInt(const uint32_t* pSource) 234 | { 235 | assert(pSource); 236 | #if defined(_XM_NO_INTRINSICS_) 237 | XMVECTOR V; 238 | V.vector4_u32[0] = *pSource; 239 | V.vector4_u32[1] = 0; 240 | V.vector4_u32[2] = 0; 241 | V.vector4_u32[3] = 0; 242 | return V; 243 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 244 | uint32x4_t zero = vdupq_n_u32(0); 245 | return vld1q_lane_u32(pSource, zero, 0); 246 | #elif defined(_XM_SSE_INTRINSICS_) 247 | return _mm_load_ss(reinterpret_cast(pSource)); 248 | #endif 249 | } 250 | 251 | //------------------------------------------------------------------------------ 252 | _Use_decl_annotations_ 253 | inline XMVECTOR XM_CALLCONV XMLoadFloat(const float* pSource) 254 | { 255 | assert(pSource); 256 | #if defined(_XM_NO_INTRINSICS_) 257 | XMVECTOR V; 258 | V.vector4_f32[0] = *pSource; 259 | V.vector4_f32[1] = 0.f; 260 | V.vector4_f32[2] = 0.f; 261 | V.vector4_f32[3] = 0.f; 262 | return V; 263 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 264 | float32x4_t zero = vdupq_n_f32(0); 265 | return vld1q_lane_f32(pSource, zero, 0); 266 | #elif defined(_XM_SSE_INTRINSICS_) 267 | return _mm_load_ss(pSource); 268 | #endif 269 | } 270 | 271 | //------------------------------------------------------------------------------ 272 | _Use_decl_annotations_ 273 | inline XMVECTOR XM_CALLCONV XMLoadInt2 274 | ( 275 | const uint32_t* pSource 276 | ) 277 | { 278 | assert(pSource); 279 | #if defined(_XM_NO_INTRINSICS_) 280 | XMVECTOR V; 281 | V.vector4_u32[0] = pSource[0]; 282 | V.vector4_u32[1] = pSource[1]; 283 | V.vector4_u32[2] = 0; 284 | V.vector4_u32[3] = 0; 285 | return V; 286 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 287 | uint32x2_t x = vld1_u32(pSource); 288 | uint32x2_t zero = vdup_n_u32(0); 289 | return vcombine_u32(x, zero); 290 | #elif defined(_XM_SSE_INTRINSICS_) 291 | __m128 x = _mm_load_ss(reinterpret_cast(pSource)); 292 | __m128 y = _mm_load_ss(reinterpret_cast(pSource + 1)); 293 | return _mm_unpacklo_ps(x, y); 294 | #endif 295 | } 296 | 297 | //------------------------------------------------------------------------------ 298 | _Use_decl_annotations_ 299 | inline XMVECTOR XM_CALLCONV XMLoadInt2A 300 | ( 301 | const uint32_t* pSource 302 | ) 303 | { 304 | assert(pSource); 305 | assert(((uintptr_t)pSource & 0xF) == 0); 306 | #if defined(_XM_NO_INTRINSICS_) 307 | XMVECTOR V; 308 | V.vector4_u32[0] = pSource[0]; 309 | V.vector4_u32[1] = pSource[1]; 310 | V.vector4_u32[2] = 0; 311 | V.vector4_u32[3] = 0; 312 | return V; 313 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 314 | uint32x2_t x = vld1_u32_ex(pSource, 64); 315 | uint32x2_t zero = vdup_n_u32(0); 316 | return vcombine_u32(x, zero); 317 | #elif defined(_XM_SSE_INTRINSICS_) 318 | __m128i V = _mm_loadl_epi64(reinterpret_cast(pSource)); 319 | return _mm_castsi128_ps(V); 320 | #endif 321 | } 322 | 323 | //------------------------------------------------------------------------------ 324 | _Use_decl_annotations_ 325 | inline XMVECTOR XM_CALLCONV XMLoadFloat2 326 | ( 327 | const XMFLOAT2* pSource 328 | ) 329 | { 330 | assert(pSource); 331 | #if defined(_XM_NO_INTRINSICS_) 332 | XMVECTOR V; 333 | V.vector4_f32[0] = pSource->x; 334 | V.vector4_f32[1] = pSource->y; 335 | V.vector4_f32[2] = 0.f; 336 | V.vector4_f32[3] = 0.f; 337 | return V; 338 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 339 | float32x2_t x = vld1_f32(reinterpret_cast(pSource)); 340 | float32x2_t zero = vdup_n_f32(0); 341 | return vcombine_f32(x, zero); 342 | #elif defined(_XM_SSE_INTRINSICS_) 343 | __m128 x = _mm_load_ss(&pSource->x); 344 | __m128 y = _mm_load_ss(&pSource->y); 345 | return _mm_unpacklo_ps(x, y); 346 | #endif 347 | } 348 | 349 | //------------------------------------------------------------------------------ 350 | _Use_decl_annotations_ 351 | inline XMVECTOR XM_CALLCONV XMLoadFloat2A 352 | ( 353 | const XMFLOAT2A* pSource 354 | ) 355 | { 356 | assert(pSource); 357 | assert(((uintptr_t)pSource & 0xF) == 0); 358 | #if defined(_XM_NO_INTRINSICS_) 359 | XMVECTOR V; 360 | V.vector4_f32[0] = pSource->x; 361 | V.vector4_f32[1] = pSource->y; 362 | V.vector4_f32[2] = 0.f; 363 | V.vector4_f32[3] = 0.f; 364 | return V; 365 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 366 | float32x2_t x = vld1_f32_ex(reinterpret_cast(pSource), 64); 367 | float32x2_t zero = vdup_n_f32(0); 368 | return vcombine_f32(x, zero); 369 | #elif defined(_XM_SSE_INTRINSICS_) 370 | __m128i V = _mm_loadl_epi64(reinterpret_cast(pSource)); 371 | return _mm_castsi128_ps(V); 372 | #endif 373 | } 374 | 375 | //------------------------------------------------------------------------------ 376 | _Use_decl_annotations_ 377 | inline XMVECTOR XM_CALLCONV XMLoadSInt2 378 | ( 379 | const XMINT2* pSource 380 | ) 381 | { 382 | assert(pSource); 383 | #if defined(_XM_NO_INTRINSICS_) 384 | XMVECTOR V; 385 | V.vector4_f32[0] = (float)pSource->x; 386 | V.vector4_f32[1] = (float)pSource->y; 387 | V.vector4_f32[2] = 0.f; 388 | V.vector4_f32[3] = 0.f; 389 | return V; 390 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 391 | int32x2_t x = vld1_s32(reinterpret_cast(pSource)); 392 | float32x2_t v = vcvt_f32_s32(x); 393 | float32x2_t zero = vdup_n_f32(0); 394 | return vcombine_f32(v, zero); 395 | #elif defined(_XM_SSE_INTRINSICS_) 396 | __m128 x = _mm_load_ss(reinterpret_cast(&pSource->x)); 397 | __m128 y = _mm_load_ss(reinterpret_cast(&pSource->y)); 398 | __m128 V = _mm_unpacklo_ps(x, y); 399 | return _mm_cvtepi32_ps(_mm_castps_si128(V)); 400 | #endif 401 | } 402 | 403 | //------------------------------------------------------------------------------ 404 | _Use_decl_annotations_ 405 | inline XMVECTOR XM_CALLCONV XMLoadUInt2 406 | ( 407 | const XMUINT2* pSource 408 | ) 409 | { 410 | assert(pSource); 411 | #if defined(_XM_NO_INTRINSICS_) 412 | XMVECTOR V; 413 | V.vector4_f32[0] = (float)pSource->x; 414 | V.vector4_f32[1] = (float)pSource->y; 415 | V.vector4_f32[2] = 0.f; 416 | V.vector4_f32[3] = 0.f; 417 | return V; 418 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 419 | uint32x2_t x = vld1_u32(reinterpret_cast(pSource)); 420 | float32x2_t v = vcvt_f32_u32(x); 421 | float32x2_t zero = vdup_n_f32(0); 422 | return vcombine_f32(v, zero); 423 | #elif defined(_XM_SSE_INTRINSICS_) 424 | __m128 x = _mm_load_ss(reinterpret_cast(&pSource->x)); 425 | __m128 y = _mm_load_ss(reinterpret_cast(&pSource->y)); 426 | __m128 V = _mm_unpacklo_ps(x, y); 427 | // For the values that are higher than 0x7FFFFFFF, a fixup is needed 428 | // Determine which ones need the fix. 429 | XMVECTOR vMask = _mm_and_ps(V, g_XMNegativeZero); 430 | // Force all values positive 431 | XMVECTOR vResult = _mm_xor_ps(V, vMask); 432 | // Convert to floats 433 | vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); 434 | // Convert 0x80000000 -> 0xFFFFFFFF 435 | __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); 436 | // For only the ones that are too big, add the fixup 437 | vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); 438 | vResult = _mm_add_ps(vResult, vMask); 439 | return vResult; 440 | #endif 441 | } 442 | 443 | //------------------------------------------------------------------------------ 444 | _Use_decl_annotations_ 445 | inline XMVECTOR XM_CALLCONV XMLoadInt3 446 | ( 447 | const uint32_t* pSource 448 | ) 449 | { 450 | assert(pSource); 451 | #if defined(_XM_NO_INTRINSICS_) 452 | XMVECTOR V; 453 | V.vector4_u32[0] = pSource[0]; 454 | V.vector4_u32[1] = pSource[1]; 455 | V.vector4_u32[2] = pSource[2]; 456 | V.vector4_u32[3] = 0; 457 | return V; 458 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 459 | uint32x2_t x = vld1_u32(pSource); 460 | uint32x2_t zero = vdup_n_u32(0); 461 | uint32x2_t y = vld1_lane_u32(pSource + 2, zero, 0); 462 | return vcombine_u32(x, y); 463 | #elif defined(_XM_SSE_INTRINSICS_) 464 | __m128 x = _mm_load_ss(reinterpret_cast(pSource)); 465 | __m128 y = _mm_load_ss(reinterpret_cast(pSource + 1)); 466 | __m128 z = _mm_load_ss(reinterpret_cast(pSource + 2)); 467 | __m128 xy = _mm_unpacklo_ps(x, y); 468 | return _mm_movelh_ps(xy, z); 469 | #endif 470 | } 471 | 472 | //------------------------------------------------------------------------------ 473 | _Use_decl_annotations_ 474 | inline XMVECTOR XM_CALLCONV XMLoadInt3A 475 | ( 476 | const uint32_t* pSource 477 | ) 478 | { 479 | assert(pSource); 480 | assert(((uintptr_t)pSource & 0xF) == 0); 481 | #if defined(_XM_NO_INTRINSICS_) 482 | XMVECTOR V; 483 | V.vector4_u32[0] = pSource[0]; 484 | V.vector4_u32[1] = pSource[1]; 485 | V.vector4_u32[2] = pSource[2]; 486 | V.vector4_u32[3] = 0; 487 | return V; 488 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 489 | // Reads an extra integer which is zero'd 490 | uint32x4_t V = vld1q_u32_ex(pSource, 128); 491 | return vsetq_lane_u32(0, V, 3); 492 | #elif defined(_XM_SSE_INTRINSICS_) 493 | // Reads an extra integer which is zero'd 494 | __m128i V = _mm_load_si128(reinterpret_cast(pSource)); 495 | V = _mm_and_si128(V, g_XMMask3); 496 | return _mm_castsi128_ps(V); 497 | #endif 498 | } 499 | 500 | //------------------------------------------------------------------------------ 501 | _Use_decl_annotations_ 502 | inline XMVECTOR XM_CALLCONV XMLoadFloat3 503 | ( 504 | const XMFLOAT3* pSource 505 | ) 506 | { 507 | assert(pSource); 508 | #if defined(_XM_NO_INTRINSICS_) 509 | XMVECTOR V; 510 | V.vector4_f32[0] = pSource->x; 511 | V.vector4_f32[1] = pSource->y; 512 | V.vector4_f32[2] = pSource->z; 513 | V.vector4_f32[3] = 0.f; 514 | return V; 515 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 516 | float32x2_t x = vld1_f32(reinterpret_cast(pSource)); 517 | float32x2_t zero = vdup_n_f32(0); 518 | float32x2_t y = vld1_lane_f32(reinterpret_cast(pSource) + 2, zero, 0); 519 | return vcombine_f32(x, y); 520 | #elif defined(_XM_SSE_INTRINSICS_) 521 | __m128 x = _mm_load_ss(&pSource->x); 522 | __m128 y = _mm_load_ss(&pSource->y); 523 | __m128 z = _mm_load_ss(&pSource->z); 524 | __m128 xy = _mm_unpacklo_ps(x, y); 525 | return _mm_movelh_ps(xy, z); 526 | #endif 527 | } 528 | 529 | //------------------------------------------------------------------------------ 530 | _Use_decl_annotations_ 531 | inline XMVECTOR XM_CALLCONV XMLoadFloat3A 532 | ( 533 | const XMFLOAT3A* pSource 534 | ) 535 | { 536 | assert(pSource); 537 | assert(((uintptr_t)pSource & 0xF) == 0); 538 | #if defined(_XM_NO_INTRINSICS_) 539 | XMVECTOR V; 540 | V.vector4_f32[0] = pSource->x; 541 | V.vector4_f32[1] = pSource->y; 542 | V.vector4_f32[2] = pSource->z; 543 | V.vector4_f32[3] = 0.f; 544 | return V; 545 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 546 | // Reads an extra float which is zero'd 547 | float32x4_t V = vld1q_f32_ex(reinterpret_cast(pSource), 128); 548 | return vsetq_lane_f32(0, V, 3); 549 | #elif defined(_XM_SSE_INTRINSICS_) 550 | // Reads an extra float which is zero'd 551 | __m128 V = _mm_load_ps(&pSource->x); 552 | return _mm_and_ps(V, g_XMMask3); 553 | #endif 554 | } 555 | 556 | //------------------------------------------------------------------------------ 557 | _Use_decl_annotations_ 558 | inline XMVECTOR XM_CALLCONV XMLoadSInt3 559 | ( 560 | const XMINT3* pSource 561 | ) 562 | { 563 | assert(pSource); 564 | #if defined(_XM_NO_INTRINSICS_) 565 | 566 | XMVECTOR V; 567 | V.vector4_f32[0] = (float)pSource->x; 568 | V.vector4_f32[1] = (float)pSource->y; 569 | V.vector4_f32[2] = (float)pSource->z; 570 | V.vector4_f32[3] = 0.f; 571 | return V; 572 | 573 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 574 | int32x2_t x = vld1_s32(reinterpret_cast(pSource)); 575 | int32x2_t zero = vdup_n_s32(0); 576 | int32x2_t y = vld1_lane_s32(reinterpret_cast(pSource) + 2, zero, 0); 577 | int32x4_t v = vcombine_s32(x, y); 578 | return vcvtq_f32_s32(v); 579 | #elif defined(_XM_SSE_INTRINSICS_) 580 | __m128 x = _mm_load_ss(reinterpret_cast(&pSource->x)); 581 | __m128 y = _mm_load_ss(reinterpret_cast(&pSource->y)); 582 | __m128 z = _mm_load_ss(reinterpret_cast(&pSource->z)); 583 | __m128 xy = _mm_unpacklo_ps(x, y); 584 | __m128 V = _mm_movelh_ps(xy, z); 585 | return _mm_cvtepi32_ps(_mm_castps_si128(V)); 586 | #endif 587 | } 588 | 589 | //------------------------------------------------------------------------------ 590 | _Use_decl_annotations_ 591 | inline XMVECTOR XM_CALLCONV XMLoadUInt3 592 | ( 593 | const XMUINT3* pSource 594 | ) 595 | { 596 | assert(pSource); 597 | #if defined(_XM_NO_INTRINSICS_) 598 | XMVECTOR V; 599 | V.vector4_f32[0] = (float)pSource->x; 600 | V.vector4_f32[1] = (float)pSource->y; 601 | V.vector4_f32[2] = (float)pSource->z; 602 | V.vector4_f32[3] = 0.f; 603 | return V; 604 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 605 | uint32x2_t x = vld1_u32(reinterpret_cast(pSource)); 606 | uint32x2_t zero = vdup_n_u32(0); 607 | uint32x2_t y = vld1_lane_u32(reinterpret_cast(pSource) + 2, zero, 0); 608 | uint32x4_t v = vcombine_u32(x, y); 609 | return vcvtq_f32_u32(v); 610 | #elif defined(_XM_SSE_INTRINSICS_) 611 | __m128 x = _mm_load_ss(reinterpret_cast(&pSource->x)); 612 | __m128 y = _mm_load_ss(reinterpret_cast(&pSource->y)); 613 | __m128 z = _mm_load_ss(reinterpret_cast(&pSource->z)); 614 | __m128 xy = _mm_unpacklo_ps(x, y); 615 | __m128 V = _mm_movelh_ps(xy, z); 616 | // For the values that are higher than 0x7FFFFFFF, a fixup is needed 617 | // Determine which ones need the fix. 618 | XMVECTOR vMask = _mm_and_ps(V, g_XMNegativeZero); 619 | // Force all values positive 620 | XMVECTOR vResult = _mm_xor_ps(V, vMask); 621 | // Convert to floats 622 | vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); 623 | // Convert 0x80000000 -> 0xFFFFFFFF 624 | __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); 625 | // For only the ones that are too big, add the fixup 626 | vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); 627 | vResult = _mm_add_ps(vResult, vMask); 628 | return vResult; 629 | #endif 630 | } 631 | 632 | //------------------------------------------------------------------------------ 633 | _Use_decl_annotations_ 634 | inline XMVECTOR XM_CALLCONV XMLoadInt4 635 | ( 636 | const uint32_t* pSource 637 | ) 638 | { 639 | assert(pSource); 640 | 641 | #if defined(_XM_NO_INTRINSICS_) 642 | XMVECTOR V; 643 | V.vector4_u32[0] = pSource[0]; 644 | V.vector4_u32[1] = pSource[1]; 645 | V.vector4_u32[2] = pSource[2]; 646 | V.vector4_u32[3] = pSource[3]; 647 | return V; 648 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 649 | return vld1q_u32(pSource); 650 | #elif defined(_XM_SSE_INTRINSICS_) 651 | __m128i V = _mm_loadu_si128(reinterpret_cast(pSource)); 652 | return _mm_castsi128_ps(V); 653 | #endif 654 | } 655 | 656 | //------------------------------------------------------------------------------ 657 | _Use_decl_annotations_ 658 | inline XMVECTOR XM_CALLCONV XMLoadInt4A 659 | ( 660 | const uint32_t* pSource 661 | ) 662 | { 663 | assert(pSource); 664 | assert(((uintptr_t)pSource & 0xF) == 0); 665 | #if defined(_XM_NO_INTRINSICS_) 666 | XMVECTOR V; 667 | V.vector4_u32[0] = pSource[0]; 668 | V.vector4_u32[1] = pSource[1]; 669 | V.vector4_u32[2] = pSource[2]; 670 | V.vector4_u32[3] = pSource[3]; 671 | return V; 672 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 673 | return vld1q_u32_ex(pSource, 128); 674 | #elif defined(_XM_SSE_INTRINSICS_) 675 | __m128i V = _mm_load_si128(reinterpret_cast(pSource)); 676 | return _mm_castsi128_ps(V); 677 | #endif 678 | } 679 | 680 | //------------------------------------------------------------------------------ 681 | _Use_decl_annotations_ 682 | inline XMVECTOR XM_CALLCONV XMLoadFloat4 683 | ( 684 | const XMFLOAT4* pSource 685 | ) 686 | { 687 | assert(pSource); 688 | #if defined(_XM_NO_INTRINSICS_) 689 | XMVECTOR V; 690 | V.vector4_f32[0] = pSource->x; 691 | V.vector4_f32[1] = pSource->y; 692 | V.vector4_f32[2] = pSource->z; 693 | V.vector4_f32[3] = pSource->w; 694 | return V; 695 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 696 | return vld1q_f32(reinterpret_cast(pSource)); 697 | #elif defined(_XM_SSE_INTRINSICS_) 698 | return _mm_loadu_ps(&pSource->x); 699 | #endif 700 | } 701 | 702 | //------------------------------------------------------------------------------ 703 | _Use_decl_annotations_ 704 | inline XMVECTOR XM_CALLCONV XMLoadFloat4A 705 | ( 706 | const XMFLOAT4A* pSource 707 | ) 708 | { 709 | assert(pSource); 710 | assert(((uintptr_t)pSource & 0xF) == 0); 711 | #if defined(_XM_NO_INTRINSICS_) 712 | XMVECTOR V; 713 | V.vector4_f32[0] = pSource->x; 714 | V.vector4_f32[1] = pSource->y; 715 | V.vector4_f32[2] = pSource->z; 716 | V.vector4_f32[3] = pSource->w; 717 | return V; 718 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 719 | return vld1q_f32_ex(reinterpret_cast(pSource), 128); 720 | #elif defined(_XM_SSE_INTRINSICS_) 721 | return _mm_load_ps(&pSource->x); 722 | #endif 723 | } 724 | 725 | //------------------------------------------------------------------------------ 726 | _Use_decl_annotations_ 727 | inline XMVECTOR XM_CALLCONV XMLoadSInt4 728 | ( 729 | const XMINT4* pSource 730 | ) 731 | { 732 | assert(pSource); 733 | #if defined(_XM_NO_INTRINSICS_) 734 | 735 | XMVECTOR V; 736 | V.vector4_f32[0] = (float)pSource->x; 737 | V.vector4_f32[1] = (float)pSource->y; 738 | V.vector4_f32[2] = (float)pSource->z; 739 | V.vector4_f32[3] = (float)pSource->w; 740 | return V; 741 | 742 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 743 | int32x4_t v = vld1q_s32(reinterpret_cast(pSource)); 744 | return vcvtq_f32_s32(v); 745 | #elif defined(_XM_SSE_INTRINSICS_) 746 | __m128i V = _mm_loadu_si128(reinterpret_cast(pSource)); 747 | return _mm_cvtepi32_ps(V); 748 | #endif 749 | } 750 | 751 | //------------------------------------------------------------------------------ 752 | _Use_decl_annotations_ 753 | inline XMVECTOR XM_CALLCONV XMLoadUInt4 754 | ( 755 | const XMUINT4* pSource 756 | ) 757 | { 758 | assert(pSource); 759 | #if defined(_XM_NO_INTRINSICS_) 760 | XMVECTOR V; 761 | V.vector4_f32[0] = (float)pSource->x; 762 | V.vector4_f32[1] = (float)pSource->y; 763 | V.vector4_f32[2] = (float)pSource->z; 764 | V.vector4_f32[3] = (float)pSource->w; 765 | return V; 766 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 767 | uint32x4_t v = vld1q_u32(reinterpret_cast(pSource)); 768 | return vcvtq_f32_u32(v); 769 | #elif defined(_XM_SSE_INTRINSICS_) 770 | __m128i V = _mm_loadu_si128(reinterpret_cast(pSource)); 771 | // For the values that are higher than 0x7FFFFFFF, a fixup is needed 772 | // Determine which ones need the fix. 773 | XMVECTOR vMask = _mm_and_ps(_mm_castsi128_ps(V), g_XMNegativeZero); 774 | // Force all values positive 775 | XMVECTOR vResult = _mm_xor_ps(_mm_castsi128_ps(V), vMask); 776 | // Convert to floats 777 | vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); 778 | // Convert 0x80000000 -> 0xFFFFFFFF 779 | __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); 780 | // For only the ones that are too big, add the fixup 781 | vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); 782 | vResult = _mm_add_ps(vResult, vMask); 783 | return vResult; 784 | #endif 785 | } 786 | 787 | //------------------------------------------------------------------------------ 788 | _Use_decl_annotations_ 789 | inline XMMATRIX XM_CALLCONV XMLoadFloat3x3 790 | ( 791 | const XMFLOAT3X3* pSource 792 | ) 793 | { 794 | assert(pSource); 795 | #if defined(_XM_NO_INTRINSICS_) 796 | 797 | XMMATRIX M; 798 | M.c[0].vector4_f32[0] = pSource->m[0][0]; 799 | M.c[0].vector4_f32[1] = pSource->m[0][1]; 800 | M.c[0].vector4_f32[2] = pSource->m[0][2]; 801 | M.c[0].vector4_f32[3] = 0.0f; 802 | 803 | M.c[1].vector4_f32[0] = pSource->m[1][0]; 804 | M.c[1].vector4_f32[1] = pSource->m[1][1]; 805 | M.c[1].vector4_f32[2] = pSource->m[1][2]; 806 | M.c[1].vector4_f32[3] = 0.0f; 807 | 808 | M.c[2].vector4_f32[0] = pSource->m[2][0]; 809 | M.c[2].vector4_f32[1] = pSource->m[2][1]; 810 | M.c[2].vector4_f32[2] = pSource->m[2][2]; 811 | M.c[2].vector4_f32[3] = 0.0f; 812 | M.c[3].vector4_f32[0] = 0.0f; 813 | M.c[3].vector4_f32[1] = 0.0f; 814 | M.c[3].vector4_f32[2] = 0.0f; 815 | M.c[3].vector4_f32[3] = 1.0f; 816 | return M; 817 | 818 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 819 | float32x4_t v0 = vld1q_f32(&pSource->m[0][0]); 820 | float32x4_t v1 = vld1q_f32(&pSource->m[1][1]); 821 | float32x2_t v2 = vcreate_f32((uint64_t)*(const uint32_t*)&pSource->m[2][2]); 822 | float32x4_t T = vextq_f32(v0, v1, 3); 823 | 824 | XMMATRIX M; 825 | M.c[0] = vandq_u32(v0, g_XMMask3); 826 | M.c[1] = vandq_u32(T, g_XMMask3); 827 | M.c[2] = vcombine_f32(vget_high_f32(v1), v2); 828 | M.c[3] = g_XMIdentityR3; 829 | return M; 830 | #elif defined(_XM_SSE_INTRINSICS_) 831 | __m128 Z = _mm_setzero_ps(); 832 | 833 | __m128 V1 = _mm_loadu_ps(&pSource->m[0][0]); 834 | __m128 V2 = _mm_loadu_ps(&pSource->m[1][1]); 835 | __m128 V3 = _mm_load_ss(&pSource->m[2][2]); 836 | 837 | __m128 T1 = _mm_unpackhi_ps(V1, Z); 838 | __m128 T2 = _mm_unpacklo_ps(V2, Z); 839 | __m128 T3 = _mm_shuffle_ps(V3, T2, _MM_SHUFFLE(0, 1, 0, 0)); 840 | __m128 T4 = _mm_movehl_ps(T2, T3); 841 | __m128 T5 = _mm_movehl_ps(Z, T1); 842 | 843 | XMMATRIX M; 844 | M.c[0] = _mm_movelh_ps(V1, T1); 845 | M.c[1] = _mm_add_ps(T4, T5); 846 | M.c[2] = _mm_shuffle_ps(V2, V3, _MM_SHUFFLE(1, 0, 3, 2)); 847 | M.c[3] = g_XMIdentityR3; 848 | return M; 849 | #endif 850 | } 851 | 852 | //------------------------------------------------------------------------------ 853 | _Use_decl_annotations_ 854 | inline XMMATRIX XM_CALLCONV XMLoadFloat4x3 855 | ( 856 | const XMFLOAT4X3* pSource 857 | ) 858 | { 859 | assert(pSource); 860 | #if defined(_XM_NO_INTRINSICS_) 861 | 862 | XMMATRIX M; 863 | M.c[0].vector4_f32[0] = pSource->m[0][0]; 864 | M.c[0].vector4_f32[1] = pSource->m[0][1]; 865 | M.c[0].vector4_f32[2] = pSource->m[0][2]; 866 | M.c[0].vector4_f32[3] = pSource->m[0][3]; 867 | 868 | M.c[1].vector4_f32[0] = pSource->m[1][0]; 869 | M.c[1].vector4_f32[1] = pSource->m[1][1]; 870 | M.c[1].vector4_f32[2] = pSource->m[1][2]; 871 | M.c[1].vector4_f32[3] = pSource->m[1][3]; 872 | 873 | M.c[2].vector4_f32[0] = pSource->m[2][0]; 874 | M.c[2].vector4_f32[1] = pSource->m[2][1]; 875 | M.c[2].vector4_f32[2] = pSource->m[2][2]; 876 | M.c[2].vector4_f32[3] = pSource->m[2][3]; 877 | 878 | M.c[3] = g_XMIdentityR3; 879 | return M; 880 | 881 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 882 | XMMATRIX M; 883 | M.c[0] = vld1q_f32(reinterpret_cast(&pSource->_11)); 884 | M.c[1] = vld1q_f32(reinterpret_cast(&pSource->_12)); 885 | M.c[2] = vld1q_f32(reinterpret_cast(&pSource->_13)); 886 | M.c[3] = g_XMIdentityR3; 887 | return M; 888 | #elif defined(_XM_SSE_INTRINSICS_) 889 | XMMATRIX M; 890 | M.c[0] = _mm_loadu_ps(&pSource->_11); 891 | M.c[1] = _mm_loadu_ps(&pSource->_12); 892 | M.c[2] = _mm_loadu_ps(&pSource->_13); 893 | M.c[3] = g_XMIdentityR3; 894 | return M; 895 | #endif 896 | } 897 | 898 | //------------------------------------------------------------------------------ 899 | _Use_decl_annotations_ 900 | inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A 901 | ( 902 | const XMFLOAT4X3A* pSource 903 | ) 904 | { 905 | assert(pSource); 906 | assert(((uintptr_t)pSource & 0xF) == 0); 907 | #if defined(_XM_NO_INTRINSICS_) 908 | 909 | XMMATRIX M; 910 | M.c[0].vector4_f32[0] = pSource->m[0][0]; 911 | M.c[0].vector4_f32[1] = pSource->m[0][1]; 912 | M.c[0].vector4_f32[2] = pSource->m[0][2]; 913 | M.c[0].vector4_f32[3] = pSource->m[0][3]; 914 | 915 | M.c[1].vector4_f32[0] = pSource->m[1][0]; 916 | M.c[1].vector4_f32[1] = pSource->m[1][1]; 917 | M.c[1].vector4_f32[2] = pSource->m[1][2]; 918 | M.c[1].vector4_f32[3] = pSource->m[1][3]; 919 | 920 | M.c[2].vector4_f32[0] = pSource->m[2][0]; 921 | M.c[2].vector4_f32[1] = pSource->m[2][1]; 922 | M.c[2].vector4_f32[2] = pSource->m[2][2]; 923 | M.c[2].vector4_f32[3] = pSource->m[2][3]; 924 | 925 | M.c[3] = g_XMIdentityR3; 926 | return M; 927 | 928 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 929 | XMMATRIX M; 930 | M.c[0] = vld1q_f32_ex(reinterpret_cast(&pSource->_11), 128); 931 | M.c[1] = vld1q_f32_ex(reinterpret_cast(&pSource->_12), 128); 932 | M.c[2] = vld1q_f32_ex(reinterpret_cast(&pSource->_13), 128); 933 | M.c[3] = g_XMIdentityR3; 934 | return M; 935 | #elif defined(_XM_SSE_INTRINSICS_) 936 | XMMATRIX M; 937 | M.c[0] = _mm_load_ps(&pSource->_11); 938 | M.c[1] = _mm_load_ps(&pSource->_12); 939 | M.c[2] = _mm_load_ps(&pSource->_13); 940 | M.c[3] = g_XMIdentityR3; 941 | return M; 942 | #endif 943 | } 944 | 945 | //------------------------------------------------------------------------------ 946 | _Use_decl_annotations_ 947 | inline XMMATRIX XM_CALLCONV XMLoadFloat4x4 948 | ( 949 | const XMFLOAT4X4* pSource 950 | ) 951 | { 952 | assert(pSource); 953 | #if defined(_XM_NO_INTRINSICS_) 954 | 955 | XMMATRIX M; 956 | M.c[0].vector4_f32[0] = pSource->m[0][0]; 957 | M.c[0].vector4_f32[1] = pSource->m[0][1]; 958 | M.c[0].vector4_f32[2] = pSource->m[0][2]; 959 | M.c[0].vector4_f32[3] = pSource->m[0][3]; 960 | 961 | M.c[1].vector4_f32[0] = pSource->m[1][0]; 962 | M.c[1].vector4_f32[1] = pSource->m[1][1]; 963 | M.c[1].vector4_f32[2] = pSource->m[1][2]; 964 | M.c[1].vector4_f32[3] = pSource->m[1][3]; 965 | 966 | M.c[2].vector4_f32[0] = pSource->m[2][0]; 967 | M.c[2].vector4_f32[1] = pSource->m[2][1]; 968 | M.c[2].vector4_f32[2] = pSource->m[2][2]; 969 | M.c[2].vector4_f32[3] = pSource->m[2][3]; 970 | 971 | M.c[3].vector4_f32[0] = pSource->m[3][0]; 972 | M.c[3].vector4_f32[1] = pSource->m[3][1]; 973 | M.c[3].vector4_f32[2] = pSource->m[3][2]; 974 | M.c[3].vector4_f32[3] = pSource->m[3][3]; 975 | return M; 976 | 977 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 978 | XMMATRIX M; 979 | M.c[0] = vld1q_f32(reinterpret_cast(&pSource->_11)); 980 | M.c[1] = vld1q_f32(reinterpret_cast(&pSource->_12)); 981 | M.c[2] = vld1q_f32(reinterpret_cast(&pSource->_13)); 982 | M.c[3] = vld1q_f32(reinterpret_cast(&pSource->_14)); 983 | return M; 984 | #elif defined(_XM_SSE_INTRINSICS_) 985 | XMMATRIX M; 986 | M.c[0] = _mm_loadu_ps(&pSource->_11); 987 | M.c[1] = _mm_loadu_ps(&pSource->_12); 988 | M.c[2] = _mm_loadu_ps(&pSource->_13); 989 | M.c[3] = _mm_loadu_ps(&pSource->_14); 990 | return M; 991 | #endif 992 | } 993 | 994 | //------------------------------------------------------------------------------ 995 | _Use_decl_annotations_ 996 | inline XMMATRIX XM_CALLCONV XMLoadFloat4x4A 997 | ( 998 | const XMFLOAT4X4A* pSource 999 | ) 1000 | { 1001 | assert(pSource); 1002 | assert(((uintptr_t)pSource & 0xF) == 0); 1003 | #if defined(_XM_NO_INTRINSICS_) 1004 | 1005 | XMMATRIX M; 1006 | M.c[0].vector4_f32[0] = pSource->m[0][0]; 1007 | M.c[0].vector4_f32[1] = pSource->m[0][1]; 1008 | M.c[0].vector4_f32[2] = pSource->m[0][2]; 1009 | M.c[0].vector4_f32[3] = pSource->m[0][3]; 1010 | 1011 | M.c[1].vector4_f32[0] = pSource->m[1][0]; 1012 | M.c[1].vector4_f32[1] = pSource->m[1][1]; 1013 | M.c[1].vector4_f32[2] = pSource->m[1][2]; 1014 | M.c[1].vector4_f32[3] = pSource->m[1][3]; 1015 | 1016 | M.c[2].vector4_f32[0] = pSource->m[2][0]; 1017 | M.c[2].vector4_f32[1] = pSource->m[2][1]; 1018 | M.c[2].vector4_f32[2] = pSource->m[2][2]; 1019 | M.c[2].vector4_f32[3] = pSource->m[2][3]; 1020 | 1021 | M.c[3].vector4_f32[0] = pSource->m[3][0]; 1022 | M.c[3].vector4_f32[1] = pSource->m[3][1]; 1023 | M.c[3].vector4_f32[2] = pSource->m[3][2]; 1024 | M.c[3].vector4_f32[3] = pSource->m[3][3]; 1025 | return M; 1026 | 1027 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1028 | XMMATRIX M; 1029 | M.c[0] = vld1q_f32_ex(reinterpret_cast(&pSource->_11), 128); 1030 | M.c[1] = vld1q_f32_ex(reinterpret_cast(&pSource->_12), 128); 1031 | M.c[2] = vld1q_f32_ex(reinterpret_cast(&pSource->_13), 128); 1032 | M.c[3] = vld1q_f32_ex(reinterpret_cast(&pSource->_14), 128); 1033 | return M; 1034 | #elif defined(_XM_SSE_INTRINSICS_) 1035 | XMMATRIX M; 1036 | M.c[0] = _mm_load_ps(&pSource->_11); 1037 | M.c[1] = _mm_load_ps(&pSource->_12); 1038 | M.c[2] = _mm_load_ps(&pSource->_13); 1039 | M.c[3] = _mm_load_ps(&pSource->_14); 1040 | return M; 1041 | #endif 1042 | } 1043 | 1044 | /**************************************************************************** 1045 | * 1046 | * Vector and matrix store operations 1047 | * 1048 | ****************************************************************************/ 1049 | _Use_decl_annotations_ 1050 | inline void XM_CALLCONV XMStoreInt 1051 | ( 1052 | uint32_t* pDestination, 1053 | FXMVECTOR V 1054 | ) 1055 | { 1056 | assert(pDestination); 1057 | #if defined(_XM_NO_INTRINSICS_) 1058 | *pDestination = XMVectorGetIntX(V); 1059 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1060 | vst1q_lane_u32(pDestination, *reinterpret_cast(&V), 0); 1061 | #elif defined(_XM_SSE_INTRINSICS_) 1062 | _mm_store_ss(reinterpret_cast(pDestination), V); 1063 | #endif 1064 | } 1065 | 1066 | //------------------------------------------------------------------------------ 1067 | _Use_decl_annotations_ 1068 | inline void XM_CALLCONV XMStoreFloat 1069 | ( 1070 | float* pDestination, 1071 | FXMVECTOR V 1072 | ) 1073 | { 1074 | assert(pDestination); 1075 | #if defined(_XM_NO_INTRINSICS_) 1076 | *pDestination = XMVectorGetX(V); 1077 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1078 | vst1q_lane_f32(pDestination, V, 0); 1079 | #elif defined(_XM_SSE_INTRINSICS_) 1080 | _mm_store_ss(pDestination, V); 1081 | #endif 1082 | } 1083 | 1084 | //------------------------------------------------------------------------------ 1085 | _Use_decl_annotations_ 1086 | inline void XM_CALLCONV XMStoreInt2 1087 | ( 1088 | uint32_t* pDestination, 1089 | FXMVECTOR V 1090 | ) 1091 | { 1092 | assert(pDestination); 1093 | #if defined(_XM_NO_INTRINSICS_) 1094 | pDestination[0] = V.vector4_u32[0]; 1095 | pDestination[1] = V.vector4_u32[1]; 1096 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1097 | uint32x2_t VL = vget_low_u32(V); 1098 | vst1_u32(pDestination, VL); 1099 | #elif defined(_XM_SSE_INTRINSICS_) 1100 | XMVECTOR T = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); 1101 | _mm_store_ss(reinterpret_cast(&pDestination[0]), V); 1102 | _mm_store_ss(reinterpret_cast(&pDestination[1]), T); 1103 | #endif 1104 | } 1105 | 1106 | //------------------------------------------------------------------------------ 1107 | _Use_decl_annotations_ 1108 | inline void XM_CALLCONV XMStoreInt2A 1109 | ( 1110 | uint32_t* pDestination, 1111 | FXMVECTOR V 1112 | ) 1113 | { 1114 | assert(pDestination); 1115 | assert(((uintptr_t)pDestination & 0xF) == 0); 1116 | #if defined(_XM_NO_INTRINSICS_) 1117 | pDestination[0] = V.vector4_u32[0]; 1118 | pDestination[1] = V.vector4_u32[1]; 1119 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1120 | uint32x2_t VL = vget_low_u32(V); 1121 | vst1_u32_ex(pDestination, VL, 64); 1122 | #elif defined(_XM_SSE_INTRINSICS_) 1123 | _mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V)); 1124 | #endif 1125 | } 1126 | 1127 | //------------------------------------------------------------------------------ 1128 | _Use_decl_annotations_ 1129 | inline void XM_CALLCONV XMStoreFloat2 1130 | ( 1131 | XMFLOAT2* pDestination, 1132 | FXMVECTOR V 1133 | ) 1134 | { 1135 | assert(pDestination); 1136 | #if defined(_XM_NO_INTRINSICS_) 1137 | pDestination->x = V.vector4_f32[0]; 1138 | pDestination->y = V.vector4_f32[1]; 1139 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1140 | float32x2_t VL = vget_low_f32(V); 1141 | vst1_f32(reinterpret_cast(pDestination), VL); 1142 | #elif defined(_XM_SSE_INTRINSICS_) 1143 | XMVECTOR T = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); 1144 | _mm_store_ss(&pDestination->x, V); 1145 | _mm_store_ss(&pDestination->y, T); 1146 | #endif 1147 | } 1148 | 1149 | //------------------------------------------------------------------------------ 1150 | _Use_decl_annotations_ 1151 | inline void XM_CALLCONV XMStoreFloat2A 1152 | ( 1153 | XMFLOAT2A* pDestination, 1154 | FXMVECTOR V 1155 | ) 1156 | { 1157 | assert(pDestination); 1158 | assert(((uintptr_t)pDestination & 0xF) == 0); 1159 | #if defined(_XM_NO_INTRINSICS_) 1160 | pDestination->x = V.vector4_f32[0]; 1161 | pDestination->y = V.vector4_f32[1]; 1162 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1163 | float32x2_t VL = vget_low_f32(V); 1164 | vst1_f32_ex(reinterpret_cast(pDestination), VL, 64); 1165 | #elif defined(_XM_SSE_INTRINSICS_) 1166 | _mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V)); 1167 | #endif 1168 | } 1169 | 1170 | //------------------------------------------------------------------------------ 1171 | _Use_decl_annotations_ 1172 | inline void XM_CALLCONV XMStoreSInt2 1173 | ( 1174 | XMINT2* pDestination, 1175 | FXMVECTOR V 1176 | ) 1177 | { 1178 | assert(pDestination); 1179 | #if defined(_XM_NO_INTRINSICS_) 1180 | pDestination->x = (int32_t)V.vector4_f32[0]; 1181 | pDestination->y = (int32_t)V.vector4_f32[1]; 1182 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1183 | int32x2_t v = vget_low_s32(V); 1184 | v = vcvt_s32_f32(v); 1185 | vst1_s32(reinterpret_cast(pDestination), v); 1186 | #elif defined(_XM_SSE_INTRINSICS_) 1187 | // In case of positive overflow, detect it 1188 | XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt); 1189 | // Float to int conversion 1190 | __m128i vResulti = _mm_cvttps_epi32(V); 1191 | // If there was positive overflow, set to 0x7FFFFFFF 1192 | XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask); 1193 | vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); 1194 | vOverflow = _mm_or_ps(vOverflow, vResult); 1195 | // Write two ints 1196 | XMVECTOR T = XM_PERMUTE_PS(vOverflow, _MM_SHUFFLE(1, 1, 1, 1)); 1197 | _mm_store_ss(reinterpret_cast(&pDestination->x), vOverflow); 1198 | _mm_store_ss(reinterpret_cast(&pDestination->y), T); 1199 | #endif 1200 | } 1201 | 1202 | //------------------------------------------------------------------------------ 1203 | _Use_decl_annotations_ 1204 | inline void XM_CALLCONV XMStoreUInt2 1205 | ( 1206 | XMUINT2* pDestination, 1207 | FXMVECTOR V 1208 | ) 1209 | { 1210 | assert(pDestination); 1211 | #if defined(_XM_NO_INTRINSICS_) 1212 | pDestination->x = (uint32_t)V.vector4_f32[0]; 1213 | pDestination->y = (uint32_t)V.vector4_f32[1]; 1214 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1215 | float32x2_t v = vget_low_f32(V); 1216 | uint32x2_t iv = vcvt_u32_f32(v); 1217 | vst1_u32(reinterpret_cast(pDestination), iv); 1218 | #elif defined(_XM_SSE_INTRINSICS_) 1219 | // Clamp to >=0 1220 | XMVECTOR vResult = _mm_max_ps(V, g_XMZero); 1221 | // Any numbers that are too big, set to 0xFFFFFFFFU 1222 | XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); 1223 | XMVECTOR vValue = g_XMUnsignedFix; 1224 | // Too large for a signed integer? 1225 | XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); 1226 | // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise 1227 | vValue = _mm_and_ps(vValue, vMask); 1228 | // Perform fixup only on numbers too large (Keeps low bit precision) 1229 | vResult = _mm_sub_ps(vResult, vValue); 1230 | __m128i vResulti = _mm_cvttps_epi32(vResult); 1231 | // Convert from signed to unsigned pnly if greater than 0x80000000 1232 | vMask = _mm_and_ps(vMask, g_XMNegativeZero); 1233 | vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); 1234 | // On those that are too large, set to 0xFFFFFFFF 1235 | vResult = _mm_or_ps(vResult, vOverflow); 1236 | // Write two uints 1237 | XMVECTOR T = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(1, 1, 1, 1)); 1238 | _mm_store_ss(reinterpret_cast(&pDestination->x), vResult); 1239 | _mm_store_ss(reinterpret_cast(&pDestination->y), T); 1240 | #endif 1241 | } 1242 | 1243 | //------------------------------------------------------------------------------ 1244 | _Use_decl_annotations_ 1245 | inline void XM_CALLCONV XMStoreInt3 1246 | ( 1247 | uint32_t* pDestination, 1248 | FXMVECTOR V 1249 | ) 1250 | { 1251 | assert(pDestination); 1252 | #if defined(_XM_NO_INTRINSICS_) 1253 | pDestination[0] = V.vector4_u32[0]; 1254 | pDestination[1] = V.vector4_u32[1]; 1255 | pDestination[2] = V.vector4_u32[2]; 1256 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1257 | uint32x2_t VL = vget_low_u32(V); 1258 | vst1_u32(pDestination, VL); 1259 | vst1q_lane_u32(pDestination + 2, *reinterpret_cast(&V), 2); 1260 | #elif defined(_XM_SSE_INTRINSICS_) 1261 | XMVECTOR T1 = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); 1262 | XMVECTOR T2 = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); 1263 | _mm_store_ss(reinterpret_cast(pDestination), V); 1264 | _mm_store_ss(reinterpret_cast(&pDestination[1]), T1); 1265 | _mm_store_ss(reinterpret_cast(&pDestination[2]), T2); 1266 | #endif 1267 | } 1268 | 1269 | //------------------------------------------------------------------------------ 1270 | _Use_decl_annotations_ 1271 | inline void XM_CALLCONV XMStoreInt3A 1272 | ( 1273 | uint32_t* pDestination, 1274 | FXMVECTOR V 1275 | ) 1276 | { 1277 | assert(pDestination); 1278 | assert(((uintptr_t)pDestination & 0xF) == 0); 1279 | #if defined(_XM_NO_INTRINSICS_) 1280 | pDestination[0] = V.vector4_u32[0]; 1281 | pDestination[1] = V.vector4_u32[1]; 1282 | pDestination[2] = V.vector4_u32[2]; 1283 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1284 | uint32x2_t VL = vget_low_u32(V); 1285 | vst1_u32_ex(pDestination, VL, 64); 1286 | vst1q_lane_u32(pDestination + 2, *reinterpret_cast(&V), 2); 1287 | #elif defined(_XM_SSE_INTRINSICS_) 1288 | XMVECTOR T = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); 1289 | _mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V)); 1290 | _mm_store_ss(reinterpret_cast(&pDestination[2]), T); 1291 | #endif 1292 | } 1293 | 1294 | //------------------------------------------------------------------------------ 1295 | _Use_decl_annotations_ 1296 | inline void XM_CALLCONV XMStoreFloat3 1297 | ( 1298 | XMFLOAT3* pDestination, 1299 | FXMVECTOR V 1300 | ) 1301 | { 1302 | assert(pDestination); 1303 | #if defined(_XM_NO_INTRINSICS_) 1304 | pDestination->x = V.vector4_f32[0]; 1305 | pDestination->y = V.vector4_f32[1]; 1306 | pDestination->z = V.vector4_f32[2]; 1307 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1308 | float32x2_t VL = vget_low_f32(V); 1309 | vst1_f32(reinterpret_cast(pDestination), VL); 1310 | vst1q_lane_f32(reinterpret_cast(pDestination) + 2, V, 2); 1311 | #elif defined(_XM_SSE_INTRINSICS_) 1312 | XMVECTOR T1 = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); 1313 | XMVECTOR T2 = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); 1314 | _mm_store_ss(&pDestination->x, V); 1315 | _mm_store_ss(&pDestination->y, T1); 1316 | _mm_store_ss(&pDestination->z, T2); 1317 | #endif 1318 | } 1319 | 1320 | //------------------------------------------------------------------------------ 1321 | _Use_decl_annotations_ 1322 | inline void XM_CALLCONV XMStoreFloat3A 1323 | ( 1324 | XMFLOAT3A* pDestination, 1325 | FXMVECTOR V 1326 | ) 1327 | { 1328 | assert(pDestination); 1329 | assert(((uintptr_t)pDestination & 0xF) == 0); 1330 | #if defined(_XM_NO_INTRINSICS_) 1331 | pDestination->x = V.vector4_f32[0]; 1332 | pDestination->y = V.vector4_f32[1]; 1333 | pDestination->z = V.vector4_f32[2]; 1334 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1335 | float32x2_t VL = vget_low_f32(V); 1336 | vst1_f32_ex(reinterpret_cast(pDestination), VL, 64); 1337 | vst1q_lane_f32(reinterpret_cast(pDestination) + 2, V, 2); 1338 | #elif defined(_XM_SSE_INTRINSICS_) 1339 | XMVECTOR T = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); 1340 | _mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V)); 1341 | _mm_store_ss(&pDestination->z, T); 1342 | #endif 1343 | } 1344 | 1345 | //------------------------------------------------------------------------------ 1346 | _Use_decl_annotations_ 1347 | inline void XM_CALLCONV XMStoreSInt3 1348 | ( 1349 | XMINT3* pDestination, 1350 | FXMVECTOR V 1351 | ) 1352 | { 1353 | assert(pDestination); 1354 | #if defined(_XM_NO_INTRINSICS_) 1355 | pDestination->x = (int32_t)V.vector4_f32[0]; 1356 | pDestination->y = (int32_t)V.vector4_f32[1]; 1357 | pDestination->z = (int32_t)V.vector4_f32[2]; 1358 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1359 | int32x4_t v = vcvtq_s32_f32(V); 1360 | int32x2_t vL = vget_low_s32(v); 1361 | vst1_s32(reinterpret_cast(pDestination), vL); 1362 | vst1q_lane_s32(reinterpret_cast(pDestination) + 2, v, 2); 1363 | #elif defined(_XM_SSE_INTRINSICS_) 1364 | // In case of positive overflow, detect it 1365 | XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt); 1366 | // Float to int conversion 1367 | __m128i vResulti = _mm_cvttps_epi32(V); 1368 | // If there was positive overflow, set to 0x7FFFFFFF 1369 | XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask); 1370 | vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); 1371 | vOverflow = _mm_or_ps(vOverflow, vResult); 1372 | // Write 3 uints 1373 | XMVECTOR T1 = XM_PERMUTE_PS(vOverflow, _MM_SHUFFLE(1, 1, 1, 1)); 1374 | XMVECTOR T2 = XM_PERMUTE_PS(vOverflow, _MM_SHUFFLE(2, 2, 2, 2)); 1375 | _mm_store_ss(reinterpret_cast(&pDestination->x), vOverflow); 1376 | _mm_store_ss(reinterpret_cast(&pDestination->y), T1); 1377 | _mm_store_ss(reinterpret_cast(&pDestination->z), T2); 1378 | #endif 1379 | } 1380 | 1381 | //------------------------------------------------------------------------------ 1382 | _Use_decl_annotations_ 1383 | inline void XM_CALLCONV XMStoreUInt3 1384 | ( 1385 | XMUINT3* pDestination, 1386 | FXMVECTOR V 1387 | ) 1388 | { 1389 | assert(pDestination); 1390 | #if defined(_XM_NO_INTRINSICS_) 1391 | pDestination->x = (uint32_t)V.vector4_f32[0]; 1392 | pDestination->y = (uint32_t)V.vector4_f32[1]; 1393 | pDestination->z = (uint32_t)V.vector4_f32[2]; 1394 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1395 | uint32x4_t v = vcvtq_u32_f32(V); 1396 | uint32x2_t vL = vget_low_u32(v); 1397 | vst1_u32(reinterpret_cast(pDestination), vL); 1398 | vst1q_lane_u32(reinterpret_cast(pDestination) + 2, v, 2); 1399 | #elif defined(_XM_SSE_INTRINSICS_) 1400 | // Clamp to >=0 1401 | XMVECTOR vResult = _mm_max_ps(V, g_XMZero); 1402 | // Any numbers that are too big, set to 0xFFFFFFFFU 1403 | XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); 1404 | XMVECTOR vValue = g_XMUnsignedFix; 1405 | // Too large for a signed integer? 1406 | XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); 1407 | // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise 1408 | vValue = _mm_and_ps(vValue, vMask); 1409 | // Perform fixup only on numbers too large (Keeps low bit precision) 1410 | vResult = _mm_sub_ps(vResult, vValue); 1411 | __m128i vResulti = _mm_cvttps_epi32(vResult); 1412 | // Convert from signed to unsigned pnly if greater than 0x80000000 1413 | vMask = _mm_and_ps(vMask, g_XMNegativeZero); 1414 | vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); 1415 | // On those that are too large, set to 0xFFFFFFFF 1416 | vResult = _mm_or_ps(vResult, vOverflow); 1417 | // Write 3 uints 1418 | XMVECTOR T1 = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(1, 1, 1, 1)); 1419 | XMVECTOR T2 = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(2, 2, 2, 2)); 1420 | _mm_store_ss(reinterpret_cast(&pDestination->x), vResult); 1421 | _mm_store_ss(reinterpret_cast(&pDestination->y), T1); 1422 | _mm_store_ss(reinterpret_cast(&pDestination->z), T2); 1423 | #endif 1424 | } 1425 | 1426 | //------------------------------------------------------------------------------ 1427 | _Use_decl_annotations_ 1428 | inline void XM_CALLCONV XMStoreInt4 1429 | ( 1430 | uint32_t* pDestination, 1431 | FXMVECTOR V 1432 | ) 1433 | { 1434 | assert(pDestination); 1435 | #if defined(_XM_NO_INTRINSICS_) 1436 | pDestination[0] = V.vector4_u32[0]; 1437 | pDestination[1] = V.vector4_u32[1]; 1438 | pDestination[2] = V.vector4_u32[2]; 1439 | pDestination[3] = V.vector4_u32[3]; 1440 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1441 | vst1q_u32(pDestination, V); 1442 | #elif defined(_XM_SSE_INTRINSICS_) 1443 | _mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V)); 1444 | #endif 1445 | } 1446 | 1447 | //------------------------------------------------------------------------------ 1448 | _Use_decl_annotations_ 1449 | inline void XM_CALLCONV XMStoreInt4A 1450 | ( 1451 | uint32_t* pDestination, 1452 | FXMVECTOR V 1453 | ) 1454 | { 1455 | assert(pDestination); 1456 | assert(((uintptr_t)pDestination & 0xF) == 0); 1457 | #if defined(_XM_NO_INTRINSICS_) 1458 | pDestination[0] = V.vector4_u32[0]; 1459 | pDestination[1] = V.vector4_u32[1]; 1460 | pDestination[2] = V.vector4_u32[2]; 1461 | pDestination[3] = V.vector4_u32[3]; 1462 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1463 | vst1q_u32_ex(pDestination, V, 128); 1464 | #elif defined(_XM_SSE_INTRINSICS_) 1465 | _mm_store_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V)); 1466 | #endif 1467 | } 1468 | 1469 | //------------------------------------------------------------------------------ 1470 | _Use_decl_annotations_ 1471 | inline void XM_CALLCONV XMStoreFloat4 1472 | ( 1473 | XMFLOAT4* pDestination, 1474 | FXMVECTOR V 1475 | ) 1476 | { 1477 | assert(pDestination); 1478 | #if defined(_XM_NO_INTRINSICS_) 1479 | pDestination->x = V.vector4_f32[0]; 1480 | pDestination->y = V.vector4_f32[1]; 1481 | pDestination->z = V.vector4_f32[2]; 1482 | pDestination->w = V.vector4_f32[3]; 1483 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1484 | vst1q_f32(reinterpret_cast(pDestination), V); 1485 | #elif defined(_XM_SSE_INTRINSICS_) 1486 | _mm_storeu_ps(&pDestination->x, V); 1487 | #endif 1488 | } 1489 | 1490 | //------------------------------------------------------------------------------ 1491 | _Use_decl_annotations_ 1492 | inline void XM_CALLCONV XMStoreFloat4A 1493 | ( 1494 | XMFLOAT4A* pDestination, 1495 | FXMVECTOR V 1496 | ) 1497 | { 1498 | assert(pDestination); 1499 | assert(((uintptr_t)pDestination & 0xF) == 0); 1500 | #if defined(_XM_NO_INTRINSICS_) 1501 | pDestination->x = V.vector4_f32[0]; 1502 | pDestination->y = V.vector4_f32[1]; 1503 | pDestination->z = V.vector4_f32[2]; 1504 | pDestination->w = V.vector4_f32[3]; 1505 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1506 | vst1q_f32_ex(reinterpret_cast(pDestination), V, 128); 1507 | #elif defined(_XM_SSE_INTRINSICS_) 1508 | _mm_store_ps(&pDestination->x, V); 1509 | #endif 1510 | } 1511 | 1512 | //------------------------------------------------------------------------------ 1513 | _Use_decl_annotations_ 1514 | inline void XM_CALLCONV XMStoreSInt4 1515 | ( 1516 | XMINT4* pDestination, 1517 | FXMVECTOR V 1518 | ) 1519 | { 1520 | assert(pDestination); 1521 | #if defined(_XM_NO_INTRINSICS_) 1522 | pDestination->x = (int32_t)V.vector4_f32[0]; 1523 | pDestination->y = (int32_t)V.vector4_f32[1]; 1524 | pDestination->z = (int32_t)V.vector4_f32[2]; 1525 | pDestination->w = (int32_t)V.vector4_f32[3]; 1526 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1527 | int32x4_t v = vcvtq_s32_f32(V); 1528 | vst1q_s32(reinterpret_cast(pDestination), v); 1529 | #elif defined(_XM_SSE_INTRINSICS_) 1530 | // In case of positive overflow, detect it 1531 | XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt); 1532 | // Float to int conversion 1533 | __m128i vResulti = _mm_cvttps_epi32(V); 1534 | // If there was positive overflow, set to 0x7FFFFFFF 1535 | XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask); 1536 | vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); 1537 | vOverflow = _mm_or_ps(vOverflow, vResult); 1538 | _mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vOverflow)); 1539 | #endif 1540 | } 1541 | 1542 | //------------------------------------------------------------------------------ 1543 | _Use_decl_annotations_ 1544 | inline void XM_CALLCONV XMStoreUInt4 1545 | ( 1546 | XMUINT4* pDestination, 1547 | FXMVECTOR V 1548 | ) 1549 | { 1550 | assert(pDestination); 1551 | #if defined(_XM_NO_INTRINSICS_) 1552 | pDestination->x = (uint32_t)V.vector4_f32[0]; 1553 | pDestination->y = (uint32_t)V.vector4_f32[1]; 1554 | pDestination->z = (uint32_t)V.vector4_f32[2]; 1555 | pDestination->w = (uint32_t)V.vector4_f32[3]; 1556 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1557 | uint32x4_t v = vcvtq_u32_f32(V); 1558 | vst1q_u32(reinterpret_cast(pDestination), v); 1559 | #elif defined(_XM_SSE_INTRINSICS_) 1560 | // Clamp to >=0 1561 | XMVECTOR vResult = _mm_max_ps(V, g_XMZero); 1562 | // Any numbers that are too big, set to 0xFFFFFFFFU 1563 | XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); 1564 | XMVECTOR vValue = g_XMUnsignedFix; 1565 | // Too large for a signed integer? 1566 | XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); 1567 | // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise 1568 | vValue = _mm_and_ps(vValue, vMask); 1569 | // Perform fixup only on numbers too large (Keeps low bit precision) 1570 | vResult = _mm_sub_ps(vResult, vValue); 1571 | __m128i vResulti = _mm_cvttps_epi32(vResult); 1572 | // Convert from signed to unsigned pnly if greater than 0x80000000 1573 | vMask = _mm_and_ps(vMask, g_XMNegativeZero); 1574 | vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); 1575 | // On those that are too large, set to 0xFFFFFFFF 1576 | vResult = _mm_or_ps(vResult, vOverflow); 1577 | _mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vResult)); 1578 | #endif 1579 | } 1580 | 1581 | //------------------------------------------------------------------------------ 1582 | _Use_decl_annotations_ 1583 | inline void XM_CALLCONV XMStoreFloat3x3 1584 | ( 1585 | XMFLOAT3X3* pDestination, 1586 | FXMMATRIX M 1587 | ) 1588 | { 1589 | assert(pDestination); 1590 | #if defined(_XM_NO_INTRINSICS_) 1591 | 1592 | pDestination->m[0][0] = M.c[0].vector4_f32[0]; 1593 | pDestination->m[0][1] = M.c[0].vector4_f32[1]; 1594 | pDestination->m[0][2] = M.c[0].vector4_f32[2]; 1595 | 1596 | pDestination->m[1][0] = M.c[1].vector4_f32[0]; 1597 | pDestination->m[1][1] = M.c[1].vector4_f32[1]; 1598 | pDestination->m[1][2] = M.c[1].vector4_f32[2]; 1599 | 1600 | pDestination->m[2][0] = M.c[2].vector4_f32[0]; 1601 | pDestination->m[2][1] = M.c[2].vector4_f32[1]; 1602 | pDestination->m[2][2] = M.c[2].vector4_f32[2]; 1603 | 1604 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1605 | float32x4_t T1 = vextq_f32(M.c[0], M.c[1], 1); 1606 | float32x4_t T2 = vbslq_f32(g_XMMask3, M.c[0], T1); 1607 | vst1q_f32(&pDestination->m[0][0], T2); 1608 | 1609 | T1 = vextq_f32(M.c[1], M.c[1], 1); 1610 | T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.c[2])); 1611 | vst1q_f32(&pDestination->m[1][1], T2); 1612 | 1613 | vst1q_lane_f32(&pDestination->m[2][2], M.c[2], 2); 1614 | #elif defined(_XM_SSE_INTRINSICS_) 1615 | XMVECTOR vTemp1 = M.c[0]; 1616 | XMVECTOR vTemp2 = M.c[1]; 1617 | XMVECTOR vTemp3 = M.c[2]; 1618 | XMVECTOR vWork = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(0, 0, 2, 2)); 1619 | vTemp1 = _mm_shuffle_ps(vTemp1, vWork, _MM_SHUFFLE(2, 0, 1, 0)); 1620 | _mm_storeu_ps(&pDestination->m[0][0], vTemp1); 1621 | vTemp2 = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1)); 1622 | _mm_storeu_ps(&pDestination->m[1][1], vTemp2); 1623 | vTemp3 = XM_PERMUTE_PS(vTemp3, _MM_SHUFFLE(2, 2, 2, 2)); 1624 | _mm_store_ss(&pDestination->m[2][2], vTemp3); 1625 | #endif 1626 | } 1627 | 1628 | //------------------------------------------------------------------------------ 1629 | _Use_decl_annotations_ 1630 | inline void XM_CALLCONV XMStoreFloat4x3 1631 | ( 1632 | XMFLOAT4X3* pDestination, 1633 | FXMMATRIX M 1634 | ) 1635 | { 1636 | assert(pDestination); 1637 | #if defined(_XM_NO_INTRINSICS_) 1638 | 1639 | pDestination->m[0][0] = M.c[0].vector4_f32[0]; 1640 | pDestination->m[0][1] = M.c[0].vector4_f32[1]; 1641 | pDestination->m[0][2] = M.c[0].vector4_f32[2]; 1642 | pDestination->m[0][3] = M.c[0].vector4_f32[3]; 1643 | 1644 | pDestination->m[1][0] = M.c[1].vector4_f32[0]; 1645 | pDestination->m[1][1] = M.c[1].vector4_f32[1]; 1646 | pDestination->m[1][2] = M.c[1].vector4_f32[2]; 1647 | pDestination->m[1][3] = M.c[1].vector4_f32[3]; 1648 | 1649 | pDestination->m[2][0] = M.c[2].vector4_f32[0]; 1650 | pDestination->m[2][1] = M.c[2].vector4_f32[1]; 1651 | pDestination->m[2][2] = M.c[2].vector4_f32[2]; 1652 | pDestination->m[2][3] = M.c[2].vector4_f32[3]; 1653 | 1654 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1655 | vst1q_f32(reinterpret_cast(&pDestination->_11), M.c[0]); 1656 | vst1q_f32(reinterpret_cast(&pDestination->_12), M.c[1]); 1657 | vst1q_f32(reinterpret_cast(&pDestination->_13), M.c[2]); 1658 | #elif defined(_XM_SSE_INTRINSICS_) 1659 | _mm_storeu_ps(&pDestination->_11, M.c[0]); 1660 | _mm_storeu_ps(&pDestination->_12, M.c[1]); 1661 | _mm_storeu_ps(&pDestination->_13, M.c[2]); 1662 | #endif 1663 | } 1664 | 1665 | //------------------------------------------------------------------------------ 1666 | _Use_decl_annotations_ 1667 | inline void XM_CALLCONV XMStoreFloat4x3A 1668 | ( 1669 | XMFLOAT4X3A* pDestination, 1670 | FXMMATRIX M 1671 | ) 1672 | { 1673 | assert(pDestination); 1674 | assert(((uintptr_t)pDestination & 0xF) == 0); 1675 | #if defined(_XM_NO_INTRINSICS_) 1676 | 1677 | pDestination->m[0][0] = M.c[0].vector4_f32[0]; 1678 | pDestination->m[0][1] = M.c[0].vector4_f32[1]; 1679 | pDestination->m[0][2] = M.c[0].vector4_f32[2]; 1680 | pDestination->m[0][3] = M.c[0].vector4_f32[3]; 1681 | 1682 | pDestination->m[1][0] = M.c[1].vector4_f32[0]; 1683 | pDestination->m[1][1] = M.c[1].vector4_f32[1]; 1684 | pDestination->m[1][2] = M.c[1].vector4_f32[2]; 1685 | pDestination->m[1][3] = M.c[1].vector4_f32[3]; 1686 | 1687 | pDestination->m[2][0] = M.c[2].vector4_f32[0]; 1688 | pDestination->m[2][1] = M.c[2].vector4_f32[1]; 1689 | pDestination->m[2][2] = M.c[2].vector4_f32[2]; 1690 | pDestination->m[2][3] = M.c[2].vector4_f32[3]; 1691 | 1692 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1693 | vst1q_f32_ex(reinterpret_cast(&pDestination->_11), M.c[0], 128); 1694 | vst1q_f32_ex(reinterpret_cast(&pDestination->_12), M.c[1], 128); 1695 | vst1q_f32_ex(reinterpret_cast(&pDestination->_13), M.c[2], 128); 1696 | #elif defined(_XM_SSE_INTRINSICS_) 1697 | _mm_store_ps(&pDestination->_11, M.c[0]); 1698 | _mm_store_ps(&pDestination->_12, M.c[1]); 1699 | _mm_store_ps(&pDestination->_13, M.c[2]); 1700 | #endif 1701 | } 1702 | 1703 | //------------------------------------------------------------------------------ 1704 | _Use_decl_annotations_ 1705 | inline void XM_CALLCONV XMStoreFloat4x4 1706 | ( 1707 | XMFLOAT4X4* pDestination, 1708 | FXMMATRIX M 1709 | ) 1710 | { 1711 | assert(pDestination); 1712 | #if defined(_XM_NO_INTRINSICS_) 1713 | 1714 | pDestination->m[0][0] = M.c[0].vector4_f32[0]; 1715 | pDestination->m[0][1] = M.c[0].vector4_f32[1]; 1716 | pDestination->m[0][2] = M.c[0].vector4_f32[2]; 1717 | pDestination->m[0][3] = M.c[0].vector4_f32[3]; 1718 | 1719 | pDestination->m[1][0] = M.c[1].vector4_f32[0]; 1720 | pDestination->m[1][1] = M.c[1].vector4_f32[1]; 1721 | pDestination->m[1][2] = M.c[1].vector4_f32[2]; 1722 | pDestination->m[1][3] = M.c[1].vector4_f32[3]; 1723 | 1724 | pDestination->m[2][0] = M.c[2].vector4_f32[0]; 1725 | pDestination->m[2][1] = M.c[2].vector4_f32[1]; 1726 | pDestination->m[2][2] = M.c[2].vector4_f32[2]; 1727 | pDestination->m[2][3] = M.c[2].vector4_f32[3]; 1728 | 1729 | pDestination->m[3][0] = M.c[3].vector4_f32[0]; 1730 | pDestination->m[3][1] = M.c[3].vector4_f32[1]; 1731 | pDestination->m[3][2] = M.c[3].vector4_f32[2]; 1732 | pDestination->m[3][3] = M.c[3].vector4_f32[3]; 1733 | 1734 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1735 | vst1q_f32(reinterpret_cast(&pDestination->_11), M.c[0]); 1736 | vst1q_f32(reinterpret_cast(&pDestination->_12), M.c[1]); 1737 | vst1q_f32(reinterpret_cast(&pDestination->_13), M.c[2]); 1738 | vst1q_f32(reinterpret_cast(&pDestination->_14), M.c[3]); 1739 | #elif defined(_XM_SSE_INTRINSICS_) 1740 | _mm_storeu_ps(&pDestination->_11, M.c[0]); 1741 | _mm_storeu_ps(&pDestination->_12, M.c[1]); 1742 | _mm_storeu_ps(&pDestination->_13, M.c[2]); 1743 | _mm_storeu_ps(&pDestination->_14, M.c[3]); 1744 | #endif 1745 | } 1746 | 1747 | //------------------------------------------------------------------------------ 1748 | _Use_decl_annotations_ 1749 | inline void XM_CALLCONV XMStoreFloat4x4A 1750 | ( 1751 | XMFLOAT4X4A* pDestination, 1752 | FXMMATRIX M 1753 | ) 1754 | { 1755 | assert(pDestination); 1756 | assert(((uintptr_t)pDestination & 0xF) == 0); 1757 | #if defined(_XM_NO_INTRINSICS_) 1758 | 1759 | pDestination->m[0][0] = M.c[0].vector4_f32[0]; 1760 | pDestination->m[0][1] = M.c[0].vector4_f32[1]; 1761 | pDestination->m[0][2] = M.c[0].vector4_f32[2]; 1762 | pDestination->m[0][3] = M.c[0].vector4_f32[3]; 1763 | 1764 | pDestination->m[1][0] = M.c[1].vector4_f32[0]; 1765 | pDestination->m[1][1] = M.c[1].vector4_f32[1]; 1766 | pDestination->m[1][2] = M.c[1].vector4_f32[2]; 1767 | pDestination->m[1][3] = M.c[1].vector4_f32[3]; 1768 | 1769 | pDestination->m[2][0] = M.c[2].vector4_f32[0]; 1770 | pDestination->m[2][1] = M.c[2].vector4_f32[1]; 1771 | pDestination->m[2][2] = M.c[2].vector4_f32[2]; 1772 | pDestination->m[2][3] = M.c[2].vector4_f32[3]; 1773 | 1774 | pDestination->m[3][0] = M.c[3].vector4_f32[0]; 1775 | pDestination->m[3][1] = M.c[3].vector4_f32[1]; 1776 | pDestination->m[3][2] = M.c[3].vector4_f32[2]; 1777 | pDestination->m[3][3] = M.c[3].vector4_f32[3]; 1778 | 1779 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1780 | vst1q_f32_ex(reinterpret_cast(&pDestination->_11), M.c[0], 128); 1781 | vst1q_f32_ex(reinterpret_cast(&pDestination->_12), M.c[1], 128); 1782 | vst1q_f32_ex(reinterpret_cast(&pDestination->_13), M.c[2], 128); 1783 | vst1q_f32_ex(reinterpret_cast(&pDestination->_14), M.c[3], 128); 1784 | #elif defined(_XM_SSE_INTRINSICS_) 1785 | _mm_store_ps(&pDestination->_11, M.c[0]); 1786 | _mm_store_ps(&pDestination->_12, M.c[1]); 1787 | _mm_store_ps(&pDestination->_13, M.c[2]); 1788 | _mm_store_ps(&pDestination->_14, M.c[3]); 1789 | #endif 1790 | } 1791 | -------------------------------------------------------------------------------- /Inc/XMathMisc.inl: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------- 2 | // DirectXMathMisc.inl -- SIMD C++ Math library 3 | // 4 | // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF 5 | // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO 6 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A 7 | // PARTICULAR PURPOSE. 8 | // 9 | // Copyright (c) Microsoft Corporation. All rights reserved. 10 | //------------------------------------------------------------------------------------- 11 | 12 | #pragma once 13 | 14 | /**************************************************************************** 15 | * 16 | * Quaternion 17 | * 18 | ****************************************************************************/ 19 | 20 | //------------------------------------------------------------------------------ 21 | // Comparison operations 22 | //------------------------------------------------------------------------------ 23 | 24 | //------------------------------------------------------------------------------ 25 | 26 | inline bool XM_CALLCONV XMQuaternionEqual 27 | ( 28 | FXMVECTOR Q1, 29 | FXMVECTOR Q2 30 | ) 31 | { 32 | return XMVector4Equal(Q1, Q2); 33 | } 34 | 35 | //------------------------------------------------------------------------------ 36 | 37 | inline bool XM_CALLCONV XMQuaternionNotEqual 38 | ( 39 | FXMVECTOR Q1, 40 | FXMVECTOR Q2 41 | ) 42 | { 43 | return XMVector4NotEqual(Q1, Q2); 44 | } 45 | 46 | //------------------------------------------------------------------------------ 47 | 48 | inline bool XM_CALLCONV XMQuaternionIsNaN 49 | ( 50 | FXMVECTOR Q 51 | ) 52 | { 53 | return XMVector4IsNaN(Q); 54 | } 55 | 56 | //------------------------------------------------------------------------------ 57 | 58 | inline bool XM_CALLCONV XMQuaternionIsInfinite 59 | ( 60 | FXMVECTOR Q 61 | ) 62 | { 63 | return XMVector4IsInfinite(Q); 64 | } 65 | 66 | //------------------------------------------------------------------------------ 67 | 68 | inline bool XM_CALLCONV XMQuaternionIsIdentity 69 | ( 70 | FXMVECTOR Q 71 | ) 72 | { 73 | return XMVector4Equal(Q, g_XMIdentityR3.v); 74 | } 75 | 76 | //------------------------------------------------------------------------------ 77 | // Computation operations 78 | //------------------------------------------------------------------------------ 79 | 80 | //------------------------------------------------------------------------------ 81 | 82 | inline XMVECTOR XM_CALLCONV XMQuaternionDot 83 | ( 84 | FXMVECTOR Q1, 85 | FXMVECTOR Q2 86 | ) 87 | { 88 | return XMVector4Dot(Q1, Q2); 89 | } 90 | 91 | //------------------------------------------------------------------------------ 92 | 93 | inline XMVECTOR XM_CALLCONV XMQuaternionMultiply 94 | ( 95 | FXMVECTOR Q1, 96 | FXMVECTOR Q2 97 | ) 98 | { 99 | // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2) 100 | 101 | // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y), 102 | // (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x), 103 | // (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w), 104 | // (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ] 105 | 106 | #if defined(_XM_NO_INTRINSICS_) 107 | XMVECTOR Result = { 108 | (Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]), 109 | (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]), 110 | (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]), 111 | (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2]) }; 112 | return Result; 113 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 114 | static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f }; 115 | static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f }; 116 | static const XMVECTORF32 ControlYXWZ = { -1.0f, 1.0f, 1.0f,-1.0f }; 117 | 118 | float32x2_t Q2L = vget_low_f32(Q2); 119 | float32x2_t Q2H = vget_high_f32(Q2); 120 | 121 | float32x4_t Q2X = vdupq_lane_f32(Q2L, 0); 122 | float32x4_t Q2Y = vdupq_lane_f32(Q2L, 1); 123 | float32x4_t Q2Z = vdupq_lane_f32(Q2H, 0); 124 | XMVECTOR vResult = vmulq_lane_f32(Q1, Q2H, 1); 125 | 126 | // Mul by Q1WZYX 127 | float32x4_t vTemp = vrev64q_f32(Q1); 128 | vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp)); 129 | Q2X = vmulq_f32(Q2X, vTemp); 130 | vResult = vmlaq_f32(vResult, Q2X, ControlWZYX); 131 | 132 | // Mul by Q1ZWXY 133 | vTemp = vrev64q_u32(vTemp); 134 | Q2Y = vmulq_f32(Q2Y, vTemp); 135 | vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY); 136 | 137 | // Mul by Q1YXWZ 138 | vTemp = vrev64q_u32(vTemp); 139 | vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp)); 140 | Q2Z = vmulq_f32(Q2Z, vTemp); 141 | vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ); 142 | return vResult; 143 | #elif defined(_XM_SSE_INTRINSICS_) 144 | static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f }; 145 | static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f }; 146 | static const XMVECTORF32 ControlYXWZ = { -1.0f, 1.0f, 1.0f,-1.0f }; 147 | // Copy to SSE registers and use as few as possible for x86 148 | XMVECTOR Q2X = Q2; 149 | XMVECTOR Q2Y = Q2; 150 | XMVECTOR Q2Z = Q2; 151 | XMVECTOR vResult = Q2; 152 | // Splat with one instruction 153 | vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 3, 3, 3)); 154 | Q2X = XM_PERMUTE_PS(Q2X, _MM_SHUFFLE(0, 0, 0, 0)); 155 | Q2Y = XM_PERMUTE_PS(Q2Y, _MM_SHUFFLE(1, 1, 1, 1)); 156 | Q2Z = XM_PERMUTE_PS(Q2Z, _MM_SHUFFLE(2, 2, 2, 2)); 157 | // Retire Q1 and perform Q1*Q2W 158 | vResult = _mm_mul_ps(vResult, Q1); 159 | XMVECTOR Q1Shuffle = Q1; 160 | // Shuffle the copies of Q1 161 | Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); 162 | // Mul by Q1WZYX 163 | Q2X = _mm_mul_ps(Q2X, Q1Shuffle); 164 | Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1)); 165 | // Flip the signs on y and z 166 | Q2X = _mm_mul_ps(Q2X, ControlWZYX); 167 | // Mul by Q1ZWXY 168 | Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle); 169 | Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); 170 | // Flip the signs on z and w 171 | Q2Y = _mm_mul_ps(Q2Y, ControlZWXY); 172 | // Mul by Q1YXWZ 173 | Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle); 174 | vResult = _mm_add_ps(vResult, Q2X); 175 | // Flip the signs on x and w 176 | Q2Z = _mm_mul_ps(Q2Z, ControlYXWZ); 177 | Q2Y = _mm_add_ps(Q2Y, Q2Z); 178 | vResult = _mm_add_ps(vResult, Q2Y); 179 | return vResult; 180 | #endif 181 | } 182 | 183 | //------------------------------------------------------------------------------ 184 | 185 | inline XMVECTOR XM_CALLCONV XMQuaternionLengthSq 186 | ( 187 | FXMVECTOR Q 188 | ) 189 | { 190 | return XMVector4LengthSq(Q); 191 | } 192 | 193 | //------------------------------------------------------------------------------ 194 | 195 | inline XMVECTOR XM_CALLCONV XMQuaternionReciprocalLength 196 | ( 197 | FXMVECTOR Q 198 | ) 199 | { 200 | return XMVector4ReciprocalLength(Q); 201 | } 202 | 203 | //------------------------------------------------------------------------------ 204 | 205 | inline XMVECTOR XM_CALLCONV XMQuaternionLength 206 | ( 207 | FXMVECTOR Q 208 | ) 209 | { 210 | return XMVector4Length(Q); 211 | } 212 | 213 | //------------------------------------------------------------------------------ 214 | 215 | inline XMVECTOR XM_CALLCONV XMQuaternionNormalizeEst 216 | ( 217 | FXMVECTOR Q 218 | ) 219 | { 220 | return XMVector4NormalizeEst(Q); 221 | } 222 | 223 | //------------------------------------------------------------------------------ 224 | 225 | inline XMVECTOR XM_CALLCONV XMQuaternionNormalize 226 | ( 227 | FXMVECTOR Q 228 | ) 229 | { 230 | return XMVector4Normalize(Q); 231 | } 232 | 233 | //------------------------------------------------------------------------------ 234 | 235 | inline XMVECTOR XM_CALLCONV XMQuaternionConjugate 236 | ( 237 | FXMVECTOR Q 238 | ) 239 | { 240 | #if defined(_XM_NO_INTRINSICS_) 241 | XMVECTOR Result = { 242 | -Q.vector4_f32[0], 243 | -Q.vector4_f32[1], 244 | -Q.vector4_f32[2], 245 | Q.vector4_f32[3] 246 | }; 247 | return Result; 248 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 249 | static const XMVECTORF32 NegativeOne3 = { -1.0f,-1.0f,-1.0f,1.0f }; 250 | return vmulq_f32(Q, NegativeOne3.v); 251 | #elif defined(_XM_SSE_INTRINSICS_) 252 | static const XMVECTORF32 NegativeOne3 = { -1.0f,-1.0f,-1.0f,1.0f }; 253 | return _mm_mul_ps(Q, NegativeOne3); 254 | #endif 255 | } 256 | 257 | //------------------------------------------------------------------------------ 258 | 259 | inline XMVECTOR XM_CALLCONV XMQuaternionInverse 260 | ( 261 | FXMVECTOR Q 262 | ) 263 | { 264 | const XMVECTOR Zero = XMVectorZero(); 265 | 266 | XMVECTOR L = XMVector4LengthSq(Q); 267 | XMVECTOR Conjugate = XMQuaternionConjugate(Q); 268 | 269 | XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v); 270 | 271 | XMVECTOR Result = XMVectorDivide(Conjugate, L); 272 | 273 | Result = XMVectorSelect(Result, Zero, Control); 274 | 275 | return Result; 276 | } 277 | 278 | //------------------------------------------------------------------------------ 279 | 280 | inline XMVECTOR XM_CALLCONV XMQuaternionLn 281 | ( 282 | FXMVECTOR Q 283 | ) 284 | { 285 | static const XMVECTORF32 OneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f }; 286 | 287 | XMVECTOR QW = XMVectorSplatW(Q); 288 | XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v); 289 | 290 | XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v); 291 | 292 | XMVECTOR Theta = XMVectorACos(QW); 293 | XMVECTOR SinTheta = XMVectorSin(Theta); 294 | 295 | XMVECTOR S = XMVectorDivide(Theta, SinTheta); 296 | 297 | XMVECTOR Result = XMVectorMultiply(Q0, S); 298 | Result = XMVectorSelect(Q0, Result, ControlW); 299 | 300 | return Result; 301 | } 302 | 303 | //------------------------------------------------------------------------------ 304 | 305 | inline XMVECTOR XM_CALLCONV XMQuaternionExp 306 | ( 307 | FXMVECTOR Q 308 | ) 309 | { 310 | XMVECTOR Theta = XMVector3Length(Q); 311 | 312 | XMVECTOR SinTheta, CosTheta; 313 | XMVectorSinCos(&SinTheta, &CosTheta, Theta); 314 | 315 | XMVECTOR S = XMVectorDivide(SinTheta, Theta); 316 | 317 | XMVECTOR Result = XMVectorMultiply(Q, S); 318 | 319 | const XMVECTOR Zero = XMVectorZero(); 320 | XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v); 321 | Result = XMVectorSelect(Result, Q, Control); 322 | 323 | Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v); 324 | 325 | return Result; 326 | } 327 | 328 | //------------------------------------------------------------------------------ 329 | 330 | inline XMVECTOR XM_CALLCONV XMQuaternionSlerp 331 | ( 332 | FXMVECTOR Q0, 333 | FXMVECTOR Q1, 334 | float t 335 | ) 336 | { 337 | XMVECTOR T = XMVectorReplicate(t); 338 | return XMQuaternionSlerpV(Q0, Q1, T); 339 | } 340 | 341 | //------------------------------------------------------------------------------ 342 | 343 | inline XMVECTOR XM_CALLCONV XMQuaternionSlerpV 344 | ( 345 | FXMVECTOR Q0, 346 | FXMVECTOR Q1, 347 | FXMVECTOR T 348 | ) 349 | { 350 | assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T))); 351 | 352 | // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega) 353 | 354 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 355 | 356 | const XMVECTORF32 OneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f }; 357 | 358 | XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); 359 | 360 | const XMVECTOR Zero = XMVectorZero(); 361 | XMVECTOR Control = XMVectorLess(CosOmega, Zero); 362 | XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control); 363 | 364 | CosOmega = XMVectorMultiply(CosOmega, Sign); 365 | 366 | Control = XMVectorLess(CosOmega, OneMinusEpsilon); 367 | 368 | XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v); 369 | SinOmega = XMVectorSqrt(SinOmega); 370 | 371 | XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); 372 | 373 | XMVECTOR SignMask = XMVectorSplatSignMask(); 374 | XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2); 375 | SignMask = XMVectorShiftLeft(SignMask, Zero, 3); 376 | V01 = XMVectorXorInt(V01, SignMask); 377 | V01 = XMVectorAdd(g_XMIdentityR0.v, V01); 378 | 379 | XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega); 380 | 381 | XMVECTOR S0 = XMVectorMultiply(V01, Omega); 382 | S0 = XMVectorSin(S0); 383 | S0 = XMVectorMultiply(S0, InvSinOmega); 384 | 385 | S0 = XMVectorSelect(V01, S0, Control); 386 | 387 | XMVECTOR S1 = XMVectorSplatY(S0); 388 | S0 = XMVectorSplatX(S0); 389 | 390 | S1 = XMVectorMultiply(S1, Sign); 391 | 392 | XMVECTOR Result = XMVectorMultiply(Q0, S0); 393 | Result = XMVectorMultiplyAdd(Q1, S1, Result); 394 | 395 | return Result; 396 | 397 | #elif defined(_XM_SSE_INTRINSICS_) 398 | static const XMVECTORF32 OneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f }; 399 | static const XMVECTORU32 SignMask2 = { 0x80000000,0x00000000,0x00000000,0x00000000 }; 400 | 401 | XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); 402 | 403 | const XMVECTOR Zero = XMVectorZero(); 404 | XMVECTOR Control = XMVectorLess(CosOmega, Zero); 405 | XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control); 406 | 407 | CosOmega = _mm_mul_ps(CosOmega, Sign); 408 | 409 | Control = XMVectorLess(CosOmega, OneMinusEpsilon); 410 | 411 | XMVECTOR SinOmega = _mm_mul_ps(CosOmega, CosOmega); 412 | SinOmega = _mm_sub_ps(g_XMOne, SinOmega); 413 | SinOmega = _mm_sqrt_ps(SinOmega); 414 | 415 | XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); 416 | 417 | XMVECTOR V01 = XM_PERMUTE_PS(T, _MM_SHUFFLE(2, 3, 0, 1)); 418 | V01 = _mm_and_ps(V01, g_XMMaskXY); 419 | V01 = _mm_xor_ps(V01, SignMask2); 420 | V01 = _mm_add_ps(g_XMIdentityR0, V01); 421 | 422 | XMVECTOR S0 = _mm_mul_ps(V01, Omega); 423 | S0 = XMVectorSin(S0); 424 | S0 = _mm_div_ps(S0, SinOmega); 425 | 426 | S0 = XMVectorSelect(V01, S0, Control); 427 | 428 | XMVECTOR S1 = XMVectorSplatY(S0); 429 | S0 = XMVectorSplatX(S0); 430 | 431 | S1 = _mm_mul_ps(S1, Sign); 432 | XMVECTOR Result = _mm_mul_ps(Q0, S0); 433 | S1 = _mm_mul_ps(S1, Q1); 434 | Result = _mm_add_ps(Result, S1); 435 | return Result; 436 | #endif 437 | } 438 | 439 | //------------------------------------------------------------------------------ 440 | 441 | inline XMVECTOR XM_CALLCONV XMQuaternionSquad 442 | ( 443 | FXMVECTOR Q0, 444 | FXMVECTOR Q1, 445 | FXMVECTOR Q2, 446 | GXMVECTOR Q3, 447 | float t 448 | ) 449 | { 450 | XMVECTOR T = XMVectorReplicate(t); 451 | return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T); 452 | } 453 | 454 | //------------------------------------------------------------------------------ 455 | 456 | inline XMVECTOR XM_CALLCONV XMQuaternionSquadV 457 | ( 458 | FXMVECTOR Q0, 459 | FXMVECTOR Q1, 460 | FXMVECTOR Q2, 461 | GXMVECTOR Q3, 462 | HXMVECTOR T 463 | ) 464 | { 465 | assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T))); 466 | 467 | XMVECTOR TP = T; 468 | const XMVECTOR Two = XMVectorSplatConstant(2, 0); 469 | 470 | XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T); 471 | XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T); 472 | 473 | TP = XMVectorNegativeMultiplySubtract(TP, TP, TP); 474 | TP = XMVectorMultiply(TP, Two); 475 | 476 | XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP); 477 | 478 | return Result; 479 | } 480 | 481 | //------------------------------------------------------------------------------ 482 | _Use_decl_annotations_ 483 | inline void XM_CALLCONV XMQuaternionSquadSetup 484 | ( 485 | XMVECTOR* pA, 486 | XMVECTOR* pB, 487 | XMVECTOR* pC, 488 | FXMVECTOR Q0, 489 | FXMVECTOR Q1, 490 | FXMVECTOR Q2, 491 | GXMVECTOR Q3 492 | ) 493 | { 494 | assert(pA); 495 | assert(pB); 496 | assert(pC); 497 | 498 | XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2)); 499 | XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2)); 500 | XMVECTOR SQ2 = XMVectorNegate(Q2); 501 | 502 | XMVECTOR Control1 = XMVectorLess(LS12, LD12); 503 | SQ2 = XMVectorSelect(Q2, SQ2, Control1); 504 | 505 | XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1)); 506 | XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1)); 507 | XMVECTOR SQ0 = XMVectorNegate(Q0); 508 | 509 | XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3)); 510 | XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3)); 511 | XMVECTOR SQ3 = XMVectorNegate(Q3); 512 | 513 | XMVECTOR Control0 = XMVectorLess(LS01, LD01); 514 | XMVECTOR Control2 = XMVectorLess(LS23, LD23); 515 | 516 | SQ0 = XMVectorSelect(Q0, SQ0, Control0); 517 | SQ3 = XMVectorSelect(Q3, SQ3, Control2); 518 | 519 | XMVECTOR InvQ1 = XMQuaternionInverse(Q1); 520 | XMVECTOR InvQ2 = XMQuaternionInverse(SQ2); 521 | 522 | XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0)); 523 | XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2)); 524 | XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1)); 525 | XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3)); 526 | 527 | const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2); 528 | 529 | XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter); 530 | XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter); 531 | ExpQ02 = XMQuaternionExp(ExpQ02); 532 | ExpQ13 = XMQuaternionExp(ExpQ13); 533 | 534 | *pA = XMQuaternionMultiply(Q1, ExpQ02); 535 | *pB = XMQuaternionMultiply(SQ2, ExpQ13); 536 | *pC = SQ2; 537 | } 538 | 539 | //------------------------------------------------------------------------------ 540 | 541 | inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentric 542 | ( 543 | FXMVECTOR Q0, 544 | FXMVECTOR Q1, 545 | FXMVECTOR Q2, 546 | float f, 547 | float g 548 | ) 549 | { 550 | float s = f + g; 551 | 552 | XMVECTOR Result; 553 | if ((s < 0.00001f) && (s > -0.00001f)) 554 | { 555 | Result = Q0; 556 | } 557 | else 558 | { 559 | XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s); 560 | XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s); 561 | 562 | Result = XMQuaternionSlerp(Q01, Q02, g / s); 563 | } 564 | 565 | return Result; 566 | } 567 | 568 | //------------------------------------------------------------------------------ 569 | 570 | inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV 571 | ( 572 | FXMVECTOR Q0, 573 | FXMVECTOR Q1, 574 | FXMVECTOR Q2, 575 | GXMVECTOR F, 576 | HXMVECTOR G 577 | ) 578 | { 579 | assert((XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F))); 580 | assert((XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G))); 581 | 582 | const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16); 583 | 584 | XMVECTOR S = XMVectorAdd(F, G); 585 | 586 | XMVECTOR Result; 587 | if (XMVector4InBounds(S, Epsilon)) 588 | { 589 | Result = Q0; 590 | } 591 | else 592 | { 593 | XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S); 594 | XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S); 595 | XMVECTOR GS = XMVectorReciprocal(S); 596 | GS = XMVectorMultiply(G, GS); 597 | 598 | Result = XMQuaternionSlerpV(Q01, Q02, GS); 599 | } 600 | 601 | return Result; 602 | } 603 | 604 | //------------------------------------------------------------------------------ 605 | // Transformation operations 606 | //------------------------------------------------------------------------------ 607 | 608 | //------------------------------------------------------------------------------ 609 | 610 | inline XMVECTOR XM_CALLCONV XMQuaternionIdentity() 611 | { 612 | return g_XMIdentityR3.v; 613 | } 614 | 615 | //------------------------------------------------------------------------------ 616 | 617 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYaw 618 | ( 619 | float Pitch, 620 | float Yaw, 621 | float Roll 622 | ) 623 | { 624 | XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); 625 | XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles); 626 | return Q; 627 | } 628 | 629 | //------------------------------------------------------------------------------ 630 | 631 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector 632 | ( 633 | FXMVECTOR Angles // 634 | ) 635 | { 636 | static const XMVECTORF32 Sign = { 1.0f, -1.0f, -1.0f, 1.0f }; 637 | 638 | XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v); 639 | 640 | XMVECTOR SinAngles, CosAngles; 641 | XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); 642 | 643 | XMVECTOR P0 = XMVectorPermute(SinAngles, CosAngles); 644 | XMVECTOR Y0 = XMVectorPermute(SinAngles, CosAngles); 645 | XMVECTOR R0 = XMVectorPermute(SinAngles, CosAngles); 646 | XMVECTOR P1 = XMVectorPermute(CosAngles, SinAngles); 647 | XMVECTOR Y1 = XMVectorPermute(CosAngles, SinAngles); 648 | XMVECTOR R1 = XMVectorPermute(CosAngles, SinAngles); 649 | 650 | XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v); 651 | XMVECTOR Q0 = XMVectorMultiply(P0, Y0); 652 | Q1 = XMVectorMultiply(Q1, Y1); 653 | Q0 = XMVectorMultiply(Q0, R0); 654 | XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0); 655 | 656 | return Q; 657 | } 658 | 659 | //------------------------------------------------------------------------------ 660 | 661 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationNormal 662 | ( 663 | FXMVECTOR NormalAxis, 664 | float Angle 665 | ) 666 | { 667 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 668 | 669 | XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v); 670 | 671 | float SinV, CosV; 672 | XMScalarSinCos(&SinV, &CosV, 0.5f * Angle); 673 | 674 | XMVECTOR Scale = XMVectorSet(SinV, SinV, SinV, CosV); 675 | return XMVectorMultiply(N, Scale); 676 | #elif defined(_XM_SSE_INTRINSICS_) 677 | XMVECTOR N = _mm_and_ps(NormalAxis, g_XMMask3); 678 | N = _mm_or_ps(N, g_XMIdentityR3); 679 | XMVECTOR Scale = _mm_set_ps1(0.5f * Angle); 680 | XMVECTOR vSine; 681 | XMVECTOR vCosine; 682 | XMVectorSinCos(&vSine, &vCosine, Scale); 683 | Scale = _mm_and_ps(vSine, g_XMMask3); 684 | vCosine = _mm_and_ps(vCosine, g_XMMaskW); 685 | Scale = _mm_or_ps(Scale, vCosine); 686 | N = _mm_mul_ps(N, Scale); 687 | return N; 688 | #endif 689 | } 690 | 691 | //------------------------------------------------------------------------------ 692 | 693 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationAxis 694 | ( 695 | FXMVECTOR Axis, 696 | float Angle 697 | ) 698 | { 699 | assert(!XMVector3Equal(Axis, XMVectorZero())); 700 | assert(!XMVector3IsInfinite(Axis)); 701 | 702 | XMVECTOR Normal = XMVector3Normalize(Axis); 703 | XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle); 704 | return Q; 705 | } 706 | 707 | //------------------------------------------------------------------------------ 708 | 709 | inline XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix 710 | ( 711 | FXMMATRIX M 712 | ) 713 | { 714 | #if defined(_XM_NO_INTRINSICS_) 715 | 716 | XMVECTORF32 q; 717 | float r22 = M.m[2][2]; 718 | if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2 719 | { 720 | float dif10 = M.m[1][1] - M.m[0][0]; 721 | float omr22 = 1.f - r22; 722 | if (dif10 <= 0.f) // x^2 >= y^2 723 | { 724 | float fourXSqr = omr22 - dif10; 725 | float inv4x = 0.5f / sqrtf(fourXSqr); 726 | q.f[0] = fourXSqr*inv4x; 727 | q.f[1] = (M.m[0][1] + M.m[1][0])*inv4x; 728 | q.f[2] = (M.m[0][2] + M.m[2][0])*inv4x; 729 | q.f[3] = (M.m[1][2] - M.m[2][1])*inv4x; 730 | } 731 | else // y^2 >= x^2 732 | { 733 | float fourYSqr = omr22 + dif10; 734 | float inv4y = 0.5f / sqrtf(fourYSqr); 735 | q.f[0] = (M.m[0][1] + M.m[1][0])*inv4y; 736 | q.f[1] = fourYSqr*inv4y; 737 | q.f[2] = (M.m[1][2] + M.m[2][1])*inv4y; 738 | q.f[3] = (M.m[2][0] - M.m[0][2])*inv4y; 739 | } 740 | } 741 | else // z^2 + w^2 >= x^2 + y^2 742 | { 743 | float sum10 = M.m[1][1] + M.m[0][0]; 744 | float opr22 = 1.f + r22; 745 | if (sum10 <= 0.f) // z^2 >= w^2 746 | { 747 | float fourZSqr = opr22 - sum10; 748 | float inv4z = 0.5f / sqrtf(fourZSqr); 749 | q.f[0] = (M.m[0][2] + M.m[2][0])*inv4z; 750 | q.f[1] = (M.m[1][2] + M.m[2][1])*inv4z; 751 | q.f[2] = fourZSqr*inv4z; 752 | q.f[3] = (M.m[0][1] - M.m[1][0])*inv4z; 753 | } 754 | else // w^2 >= z^2 755 | { 756 | float fourWSqr = opr22 + sum10; 757 | float inv4w = 0.5f / sqrtf(fourWSqr); 758 | q.f[0] = (M.m[1][2] - M.m[2][1])*inv4w; 759 | q.f[1] = (M.m[2][0] - M.m[0][2])*inv4w; 760 | q.f[2] = (M.m[0][1] - M.m[1][0])*inv4w; 761 | q.f[3] = fourWSqr*inv4w; 762 | } 763 | } 764 | return q.v; 765 | 766 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 767 | static const XMVECTORF32 XMPMMP = { +1.0f, -1.0f, -1.0f, +1.0f }; 768 | static const XMVECTORF32 XMMPMP = { -1.0f, +1.0f, -1.0f, +1.0f }; 769 | static const XMVECTORF32 XMMMPP = { -1.0f, -1.0f, +1.0f, +1.0f }; 770 | static const XMVECTORU32 Select0110 = { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 }; 771 | static const XMVECTORU32 Select0010 = { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 }; 772 | 773 | XMVECTOR r0 = M.c[0]; 774 | XMVECTOR r1 = M.c[1]; 775 | XMVECTOR r2 = M.c[2]; 776 | 777 | XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0); 778 | XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1); 779 | XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0); 780 | 781 | // x^2 >= y^2 equivalent to r11 - r00 <= 0 782 | XMVECTOR r11mr00 = vsubq_f32(r11, r00); 783 | XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero); 784 | 785 | // z^2 >= w^2 equivalent to r11 + r00 <= 0 786 | XMVECTOR r11pr00 = vaddq_f32(r11, r00); 787 | XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero); 788 | 789 | // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 790 | XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero); 791 | 792 | // (4*x^2, 4*y^2, 4*z^2, 4*w^2) 793 | XMVECTOR t0 = vmulq_f32(XMPMMP, r00); 794 | XMVECTOR x2y2z2w2 = vmlaq_f32(t0, XMMPMP, r11); 795 | x2y2z2w2 = vmlaq_f32(x2y2z2w2, XMMMPP, r22); 796 | x2y2z2w2 = vaddq_f32(x2y2z2w2, g_XMOne); 797 | 798 | // (r01, r02, r12, r11) 799 | t0 = vextq_f32(r0, r0, 1); 800 | XMVECTOR t1 = vextq_f32(r1, r1, 1); 801 | t0 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_low_f32(t1))); 802 | 803 | // (r10, r20, r21, r10) 804 | t1 = vextq_f32(r2, r2, 3); 805 | XMVECTOR r10 = vdupq_lane_f32(vget_low_f32(r1), 0); 806 | t1 = vbslq_f32(Select0110, t1, r10); 807 | 808 | // (4*x*y, 4*x*z, 4*y*z, unused) 809 | XMVECTOR xyxzyz = vaddq_f32(t0, t1); 810 | 811 | // (r21, r20, r10, r10) 812 | t0 = vcombine_f32(vrev64_f32(vget_low_f32(r2)), vget_low_f32(r10)); 813 | 814 | // (r12, r02, r01, r12) 815 | XMVECTOR t2 = vcombine_f32(vrev64_f32(vget_high_f32(r0)), vrev64_f32(vget_low_f32(r0))); 816 | XMVECTOR t3 = vdupq_lane_f32(vget_high_f32(r1), 0); 817 | t1 = vbslq_f32(Select0110, t2, t3); 818 | 819 | // (4*x*w, 4*y*w, 4*z*w, unused) 820 | XMVECTOR xwywzw = vsubq_f32(t0, t1); 821 | xwywzw = vmulq_f32(XMMPMP, xwywzw); 822 | 823 | // (4*x*x, 4*x*y, 4*x*z, 4*x*w) 824 | t0 = vextq_f32(xyxzyz, xyxzyz, 3); 825 | t1 = vbslq_f32(Select0110, t0, x2y2z2w2); 826 | t2 = vdupq_lane_f32(vget_low_f32(xwywzw), 0); 827 | XMVECTOR tensor0 = vbslq_f32(g_XMSelect1110, t1, t2); 828 | 829 | // (4*y*x, 4*y*y, 4*y*z, 4*y*w) 830 | t0 = vbslq_f32(g_XMSelect1011, xyxzyz, x2y2z2w2); 831 | t1 = vdupq_lane_f32(vget_low_f32(xwywzw), 1); 832 | XMVECTOR tensor1 = vbslq_f32(g_XMSelect1110, t0, t1); 833 | 834 | // (4*z*x, 4*z*y, 4*z*z, 4*z*w) 835 | t0 = vextq_f32(xyxzyz, xyxzyz, 1); 836 | t1 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_high_f32(xwywzw))); 837 | XMVECTOR tensor2 = vbslq_f32(Select0010, x2y2z2w2, t1); 838 | 839 | // (4*w*x, 4*w*y, 4*w*z, 4*w*w) 840 | XMVECTOR tensor3 = vbslq_f32(g_XMSelect1110, xwywzw, x2y2z2w2); 841 | 842 | // Select the row of the tensor-product matrix that has the largest 843 | // magnitude. 844 | t0 = vbslq_f32(x2gey2, tensor0, tensor1); 845 | t1 = vbslq_f32(z2gew2, tensor2, tensor3); 846 | t2 = vbslq_f32(x2py2gez2pw2, t0, t1); 847 | 848 | // Normalize the row. No division by zero is possible because the 849 | // quaternion is unit-length (and the row is a nonzero multiple of 850 | // the quaternion). 851 | t0 = XMVector4Length(t2); 852 | return XMVectorDivide(t2, t0); 853 | #elif defined(_XM_SSE_INTRINSICS_) 854 | static const XMVECTORF32 XMPMMP = { +1.0f, -1.0f, -1.0f, +1.0f }; 855 | static const XMVECTORF32 XMMPMP = { -1.0f, +1.0f, -1.0f, +1.0f }; 856 | static const XMVECTORF32 XMMMPP = { -1.0f, -1.0f, +1.0f, +1.0f }; 857 | 858 | XMVECTOR r0 = M.c[0]; // (r00, r01, r02, 0) 859 | XMVECTOR r1 = M.c[1]; // (r10, r11, r12, 0) 860 | XMVECTOR r2 = M.c[2]; // (r20, r21, r22, 0) 861 | 862 | // (r00, r00, r00, r00) 863 | XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0, 0, 0, 0)); 864 | // (r11, r11, r11, r11) 865 | XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1, 1, 1, 1)); 866 | // (r22, r22, r22, r22) 867 | XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2, 2, 2, 2)); 868 | 869 | // x^2 >= y^2 equivalent to r11 - r00 <= 0 870 | // (r11 - r00, r11 - r00, r11 - r00, r11 - r00) 871 | XMVECTOR r11mr00 = _mm_sub_ps(r11, r00); 872 | XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero); 873 | 874 | // z^2 >= w^2 equivalent to r11 + r00 <= 0 875 | // (r11 + r00, r11 + r00, r11 + r00, r11 + r00) 876 | XMVECTOR r11pr00 = _mm_add_ps(r11, r00); 877 | XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero); 878 | 879 | // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 880 | XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero); 881 | 882 | // (+r00, -r00, -r00, +r00) 883 | XMVECTOR t0 = _mm_mul_ps(XMPMMP, r00); 884 | 885 | // (-r11, +r11, -r11, +r11) 886 | XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11); 887 | 888 | // (-r22, -r22, +r22, +r22) 889 | XMVECTOR t2 = _mm_mul_ps(XMMMPP, r22); 890 | 891 | // (4*x^2, 4*y^2, 4*z^2, 4*w^2) 892 | XMVECTOR x2y2z2w2 = _mm_add_ps(t0, t1); 893 | x2y2z2w2 = _mm_add_ps(t2, x2y2z2w2); 894 | x2y2z2w2 = _mm_add_ps(x2y2z2w2, g_XMOne); 895 | 896 | // (r01, r02, r12, r11) 897 | t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1, 2, 2, 1)); 898 | // (r10, r10, r20, r21) 899 | t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1, 0, 0, 0)); 900 | // (r10, r20, r21, r10) 901 | t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0)); 902 | // (4*x*y, 4*x*z, 4*y*z, unused) 903 | XMVECTOR xyxzyz = _mm_add_ps(t0, t1); 904 | 905 | // (r21, r20, r10, r10) 906 | t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 1)); 907 | // (r12, r12, r02, r01) 908 | t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1, 2, 2, 2)); 909 | // (r12, r02, r01, r12) 910 | t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0)); 911 | // (4*x*w, 4*y*w, 4*z*w, unused) 912 | XMVECTOR xwywzw = _mm_sub_ps(t0, t1); 913 | xwywzw = _mm_mul_ps(XMMPMP, xwywzw); 914 | 915 | // (4*x^2, 4*y^2, 4*x*y, unused) 916 | t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0, 0, 1, 0)); 917 | // (4*z^2, 4*w^2, 4*z*w, unused) 918 | t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0, 2, 3, 2)); 919 | // (4*x*z, 4*y*z, 4*x*w, 4*y*w) 920 | t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1, 0, 2, 1)); 921 | 922 | // (4*x*x, 4*x*y, 4*x*z, 4*x*w) 923 | XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2, 0, 2, 0)); 924 | // (4*y*x, 4*y*y, 4*y*z, 4*y*w) 925 | XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 1, 1, 2)); 926 | // (4*z*x, 4*z*y, 4*z*z, 4*z*w) 927 | XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2, 0, 1, 0)); 928 | // (4*w*x, 4*w*y, 4*w*z, 4*w*w) 929 | XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1, 2, 3, 2)); 930 | 931 | // Select the row of the tensor-product matrix that has the largest 932 | // magnitude. 933 | t0 = _mm_and_ps(x2gey2, tensor0); 934 | t1 = _mm_andnot_ps(x2gey2, tensor1); 935 | t0 = _mm_or_ps(t0, t1); 936 | t1 = _mm_and_ps(z2gew2, tensor2); 937 | t2 = _mm_andnot_ps(z2gew2, tensor3); 938 | t1 = _mm_or_ps(t1, t2); 939 | t0 = _mm_and_ps(x2py2gez2pw2, t0); 940 | t1 = _mm_andnot_ps(x2py2gez2pw2, t1); 941 | t2 = _mm_or_ps(t0, t1); 942 | 943 | // Normalize the row. No division by zero is possible because the 944 | // quaternion is unit-length (and the row is a nonzero multiple of 945 | // the quaternion). 946 | t0 = XMVector4Length(t2); 947 | return _mm_div_ps(t2, t0); 948 | #endif 949 | } 950 | 951 | //------------------------------------------------------------------------------ 952 | // Conversion operations 953 | //------------------------------------------------------------------------------ 954 | 955 | //------------------------------------------------------------------------------ 956 | _Use_decl_annotations_ 957 | inline void XM_CALLCONV XMQuaternionToAxisAngle 958 | ( 959 | XMVECTOR* pAxis, 960 | float* pAngle, 961 | FXMVECTOR Q 962 | ) 963 | { 964 | assert(pAxis); 965 | assert(pAngle); 966 | 967 | *pAxis = Q; 968 | 969 | *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q)); 970 | } 971 | 972 | /**************************************************************************** 973 | * 974 | * Plane 975 | * 976 | ****************************************************************************/ 977 | 978 | //------------------------------------------------------------------------------ 979 | // Comparison operations 980 | //------------------------------------------------------------------------------ 981 | 982 | //------------------------------------------------------------------------------ 983 | 984 | inline bool XM_CALLCONV XMPlaneEqual 985 | ( 986 | FXMVECTOR P1, 987 | FXMVECTOR P2 988 | ) 989 | { 990 | return XMVector4Equal(P1, P2); 991 | } 992 | 993 | //------------------------------------------------------------------------------ 994 | 995 | inline bool XM_CALLCONV XMPlaneNearEqual 996 | ( 997 | FXMVECTOR P1, 998 | FXMVECTOR P2, 999 | FXMVECTOR Epsilon 1000 | ) 1001 | { 1002 | XMVECTOR NP1 = XMPlaneNormalize(P1); 1003 | XMVECTOR NP2 = XMPlaneNormalize(P2); 1004 | return XMVector4NearEqual(NP1, NP2, Epsilon); 1005 | } 1006 | 1007 | //------------------------------------------------------------------------------ 1008 | 1009 | inline bool XM_CALLCONV XMPlaneNotEqual 1010 | ( 1011 | FXMVECTOR P1, 1012 | FXMVECTOR P2 1013 | ) 1014 | { 1015 | return XMVector4NotEqual(P1, P2); 1016 | } 1017 | 1018 | //------------------------------------------------------------------------------ 1019 | 1020 | inline bool XM_CALLCONV XMPlaneIsNaN 1021 | ( 1022 | FXMVECTOR P 1023 | ) 1024 | { 1025 | return XMVector4IsNaN(P); 1026 | } 1027 | 1028 | //------------------------------------------------------------------------------ 1029 | 1030 | inline bool XM_CALLCONV XMPlaneIsInfinite 1031 | ( 1032 | FXMVECTOR P 1033 | ) 1034 | { 1035 | return XMVector4IsInfinite(P); 1036 | } 1037 | 1038 | //------------------------------------------------------------------------------ 1039 | // Computation operations 1040 | //------------------------------------------------------------------------------ 1041 | 1042 | //------------------------------------------------------------------------------ 1043 | 1044 | inline XMVECTOR XM_CALLCONV XMPlaneDot 1045 | ( 1046 | FXMVECTOR P, 1047 | FXMVECTOR V 1048 | ) 1049 | { 1050 | return XMVector4Dot(P, V); 1051 | } 1052 | 1053 | //------------------------------------------------------------------------------ 1054 | 1055 | inline XMVECTOR XM_CALLCONV XMPlaneDotCoord 1056 | ( 1057 | FXMVECTOR P, 1058 | FXMVECTOR V 1059 | ) 1060 | { 1061 | // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3] 1062 | 1063 | XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v); 1064 | XMVECTOR Result = XMVector4Dot(P, V3); 1065 | return Result; 1066 | } 1067 | 1068 | //------------------------------------------------------------------------------ 1069 | 1070 | inline XMVECTOR XM_CALLCONV XMPlaneDotNormal 1071 | ( 1072 | FXMVECTOR P, 1073 | FXMVECTOR V 1074 | ) 1075 | { 1076 | return XMVector3Dot(P, V); 1077 | } 1078 | 1079 | //------------------------------------------------------------------------------ 1080 | // XMPlaneNormalizeEst uses a reciprocal estimate and 1081 | // returns QNaN on zero and infinite vectors. 1082 | 1083 | inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst 1084 | ( 1085 | FXMVECTOR P 1086 | ) 1087 | { 1088 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1089 | 1090 | XMVECTOR Result = XMVector3ReciprocalLengthEst(P); 1091 | return XMVectorMultiply(P, Result); 1092 | 1093 | #elif defined(_XM_SSE4_INTRINSICS_) 1094 | XMVECTOR vTemp = _mm_dp_ps(P, P, 0x7f); 1095 | XMVECTOR vResult = _mm_rsqrt_ps(vTemp); 1096 | return _mm_mul_ps(vResult, P); 1097 | #elif defined(_XM_SSE_INTRINSICS_) 1098 | // Perform the dot product 1099 | XMVECTOR vDot = _mm_mul_ps(P, P); 1100 | // x=Dot.y, y=Dot.z 1101 | XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); 1102 | // Result.x = x+y 1103 | vDot = _mm_add_ss(vDot, vTemp); 1104 | // x=Dot.z 1105 | vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); 1106 | // Result.x = (x+y)+z 1107 | vDot = _mm_add_ss(vDot, vTemp); 1108 | // Splat x 1109 | vDot = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); 1110 | // Get the reciprocal 1111 | vDot = _mm_rsqrt_ps(vDot); 1112 | // Get the reciprocal 1113 | vDot = _mm_mul_ps(vDot, P); 1114 | return vDot; 1115 | #endif 1116 | } 1117 | 1118 | //------------------------------------------------------------------------------ 1119 | 1120 | inline XMVECTOR XM_CALLCONV XMPlaneNormalize 1121 | ( 1122 | FXMVECTOR P 1123 | ) 1124 | { 1125 | #if defined(_XM_NO_INTRINSICS_) 1126 | float fLengthSq = sqrtf((P.vector4_f32[0] * P.vector4_f32[0]) + (P.vector4_f32[1] * P.vector4_f32[1]) + (P.vector4_f32[2] * P.vector4_f32[2])); 1127 | // Prevent divide by zero 1128 | if (fLengthSq) { 1129 | fLengthSq = 1.0f / fLengthSq; 1130 | } 1131 | { 1132 | XMVECTOR vResult = { 1133 | P.vector4_f32[0] * fLengthSq, 1134 | P.vector4_f32[1] * fLengthSq, 1135 | P.vector4_f32[2] * fLengthSq, 1136 | P.vector4_f32[3] * fLengthSq 1137 | }; 1138 | return vResult; 1139 | } 1140 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1141 | XMVECTOR vLength = XMVector3ReciprocalLength(P); 1142 | return XMVectorMultiply(P, vLength); 1143 | #elif defined(_XM_SSE4_INTRINSICS_) 1144 | XMVECTOR vLengthSq = _mm_dp_ps(P, P, 0x7f); 1145 | // Prepare for the division 1146 | XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); 1147 | // Failsafe on zero (Or epsilon) length planes 1148 | // If the length is infinity, set the elements to zero 1149 | vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); 1150 | // Reciprocal mul to perform the normalization 1151 | vResult = _mm_div_ps(P, vResult); 1152 | // Any that are infinity, set to zero 1153 | vResult = _mm_and_ps(vResult, vLengthSq); 1154 | return vResult; 1155 | #elif defined(_XM_SSE_INTRINSICS_) 1156 | // Perform the dot product on x,y and z only 1157 | XMVECTOR vLengthSq = _mm_mul_ps(P, P); 1158 | XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 1, 2, 1)); 1159 | vLengthSq = _mm_add_ss(vLengthSq, vTemp); 1160 | vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); 1161 | vLengthSq = _mm_add_ss(vLengthSq, vTemp); 1162 | vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); 1163 | // Prepare for the division 1164 | XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); 1165 | // Failsafe on zero (Or epsilon) length planes 1166 | // If the length is infinity, set the elements to zero 1167 | vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); 1168 | // Reciprocal mul to perform the normalization 1169 | vResult = _mm_div_ps(P, vResult); 1170 | // Any that are infinity, set to zero 1171 | vResult = _mm_and_ps(vResult, vLengthSq); 1172 | return vResult; 1173 | #endif 1174 | } 1175 | 1176 | //------------------------------------------------------------------------------ 1177 | 1178 | inline XMVECTOR XM_CALLCONV XMPlaneIntersectLine 1179 | ( 1180 | FXMVECTOR P, 1181 | FXMVECTOR LinePoint1, 1182 | FXMVECTOR LinePoint2 1183 | ) 1184 | { 1185 | XMVECTOR V1 = XMVector3Dot(P, LinePoint1); 1186 | XMVECTOR V2 = XMVector3Dot(P, LinePoint2); 1187 | XMVECTOR D = XMVectorSubtract(V1, V2); 1188 | 1189 | XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1); 1190 | VT = XMVectorDivide(VT, D); 1191 | 1192 | XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1); 1193 | Point = XMVectorMultiplyAdd(Point, VT, LinePoint1); 1194 | 1195 | const XMVECTOR Zero = XMVectorZero(); 1196 | XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v); 1197 | 1198 | return XMVectorSelect(Point, g_XMQNaN.v, Control); 1199 | } 1200 | 1201 | //------------------------------------------------------------------------------ 1202 | _Use_decl_annotations_ 1203 | inline void XM_CALLCONV XMPlaneIntersectPlane 1204 | ( 1205 | XMVECTOR* pLinePoint1, 1206 | XMVECTOR* pLinePoint2, 1207 | FXMVECTOR P1, 1208 | FXMVECTOR P2 1209 | ) 1210 | { 1211 | assert(pLinePoint1); 1212 | assert(pLinePoint2); 1213 | 1214 | XMVECTOR V1 = XMVector3Cross(P2, P1); 1215 | 1216 | XMVECTOR LengthSq = XMVector3LengthSq(V1); 1217 | 1218 | XMVECTOR V2 = XMVector3Cross(P2, V1); 1219 | 1220 | XMVECTOR P1W = XMVectorSplatW(P1); 1221 | XMVECTOR Point = XMVectorMultiply(V2, P1W); 1222 | 1223 | XMVECTOR V3 = XMVector3Cross(V1, P1); 1224 | 1225 | XMVECTOR P2W = XMVectorSplatW(P2); 1226 | Point = XMVectorMultiplyAdd(V3, P2W, Point); 1227 | 1228 | XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq); 1229 | 1230 | XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1); 1231 | 1232 | XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v); 1233 | *pLinePoint1 = XMVectorSelect(LinePoint1, g_XMQNaN.v, Control); 1234 | *pLinePoint2 = XMVectorSelect(LinePoint2, g_XMQNaN.v, Control); 1235 | } 1236 | 1237 | //------------------------------------------------------------------------------ 1238 | 1239 | inline XMVECTOR XM_CALLCONV XMPlaneTransform 1240 | ( 1241 | FXMVECTOR P, 1242 | FXMMATRIX M 1243 | ) 1244 | { 1245 | XMVECTOR W = XMVectorSplatW(P); 1246 | XMVECTOR Z = XMVectorSplatZ(P); 1247 | XMVECTOR Y = XMVectorSplatY(P); 1248 | XMVECTOR X = XMVectorSplatX(P); 1249 | 1250 | XMVECTOR Result = XMVectorMultiply(W, M.c[3]); 1251 | Result = XMVectorMultiplyAdd(Z, M.c[2], Result); 1252 | Result = XMVectorMultiplyAdd(Y, M.c[1], Result); 1253 | Result = XMVectorMultiplyAdd(X, M.c[0], Result); 1254 | return Result; 1255 | } 1256 | 1257 | //------------------------------------------------------------------------------ 1258 | _Use_decl_annotations_ 1259 | inline XMFLOAT4* XM_CALLCONV XMPlaneTransformStream 1260 | ( 1261 | XMFLOAT4* pOutputStream, 1262 | size_t OutputStride, 1263 | const XMFLOAT4* pInputStream, 1264 | size_t InputStride, 1265 | size_t PlaneCount, 1266 | FXMMATRIX M 1267 | ) 1268 | { 1269 | return XMVector4TransformStream(pOutputStream, 1270 | OutputStride, 1271 | pInputStream, 1272 | InputStride, 1273 | PlaneCount, 1274 | M); 1275 | } 1276 | 1277 | //------------------------------------------------------------------------------ 1278 | // Conversion operations 1279 | //------------------------------------------------------------------------------ 1280 | 1281 | //------------------------------------------------------------------------------ 1282 | 1283 | inline XMVECTOR XM_CALLCONV XMPlaneFromPointNormal 1284 | ( 1285 | FXMVECTOR Point, 1286 | FXMVECTOR Normal 1287 | ) 1288 | { 1289 | XMVECTOR W = XMVector3Dot(Point, Normal); 1290 | W = XMVectorNegate(W); 1291 | return XMVectorSelect(W, Normal, g_XMSelect1110.v); 1292 | } 1293 | 1294 | //------------------------------------------------------------------------------ 1295 | 1296 | inline XMVECTOR XM_CALLCONV XMPlaneFromPoints 1297 | ( 1298 | FXMVECTOR Point1, 1299 | FXMVECTOR Point2, 1300 | FXMVECTOR Point3 1301 | ) 1302 | { 1303 | XMVECTOR V21 = XMVectorSubtract(Point1, Point2); 1304 | XMVECTOR V31 = XMVectorSubtract(Point1, Point3); 1305 | 1306 | XMVECTOR N = XMVector3Cross(V21, V31); 1307 | N = XMVector3Normalize(N); 1308 | 1309 | XMVECTOR D = XMPlaneDotNormal(N, Point1); 1310 | D = XMVectorNegate(D); 1311 | 1312 | XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v); 1313 | 1314 | return Result; 1315 | } 1316 | 1317 | /**************************************************************************** 1318 | * 1319 | * Color 1320 | * 1321 | ****************************************************************************/ 1322 | 1323 | //------------------------------------------------------------------------------ 1324 | // Comparison operations 1325 | //------------------------------------------------------------------------------ 1326 | 1327 | //------------------------------------------------------------------------------ 1328 | 1329 | inline bool XM_CALLCONV XMColorEqual 1330 | ( 1331 | FXMVECTOR C1, 1332 | FXMVECTOR C2 1333 | ) 1334 | { 1335 | return XMVector4Equal(C1, C2); 1336 | } 1337 | 1338 | //------------------------------------------------------------------------------ 1339 | 1340 | inline bool XM_CALLCONV XMColorNotEqual 1341 | ( 1342 | FXMVECTOR C1, 1343 | FXMVECTOR C2 1344 | ) 1345 | { 1346 | return XMVector4NotEqual(C1, C2); 1347 | } 1348 | 1349 | //------------------------------------------------------------------------------ 1350 | 1351 | inline bool XM_CALLCONV XMColorGreater 1352 | ( 1353 | FXMVECTOR C1, 1354 | FXMVECTOR C2 1355 | ) 1356 | { 1357 | return XMVector4Greater(C1, C2); 1358 | } 1359 | 1360 | //------------------------------------------------------------------------------ 1361 | 1362 | inline bool XM_CALLCONV XMColorGreaterOrEqual 1363 | ( 1364 | FXMVECTOR C1, 1365 | FXMVECTOR C2 1366 | ) 1367 | { 1368 | return XMVector4GreaterOrEqual(C1, C2); 1369 | } 1370 | 1371 | //------------------------------------------------------------------------------ 1372 | 1373 | inline bool XM_CALLCONV XMColorLess 1374 | ( 1375 | FXMVECTOR C1, 1376 | FXMVECTOR C2 1377 | ) 1378 | { 1379 | return XMVector4Less(C1, C2); 1380 | } 1381 | 1382 | //------------------------------------------------------------------------------ 1383 | 1384 | inline bool XM_CALLCONV XMColorLessOrEqual 1385 | ( 1386 | FXMVECTOR C1, 1387 | FXMVECTOR C2 1388 | ) 1389 | { 1390 | return XMVector4LessOrEqual(C1, C2); 1391 | } 1392 | 1393 | //------------------------------------------------------------------------------ 1394 | 1395 | inline bool XM_CALLCONV XMColorIsNaN 1396 | ( 1397 | FXMVECTOR C 1398 | ) 1399 | { 1400 | return XMVector4IsNaN(C); 1401 | } 1402 | 1403 | //------------------------------------------------------------------------------ 1404 | 1405 | inline bool XM_CALLCONV XMColorIsInfinite 1406 | ( 1407 | FXMVECTOR C 1408 | ) 1409 | { 1410 | return XMVector4IsInfinite(C); 1411 | } 1412 | 1413 | //------------------------------------------------------------------------------ 1414 | // Computation operations 1415 | //------------------------------------------------------------------------------ 1416 | 1417 | //------------------------------------------------------------------------------ 1418 | 1419 | inline XMVECTOR XM_CALLCONV XMColorNegative 1420 | ( 1421 | FXMVECTOR vColor 1422 | ) 1423 | { 1424 | #if defined(_XM_NO_INTRINSICS_) 1425 | XMVECTORF32 vResult = { 1426 | 1.0f - vColor.vector4_f32[0], 1427 | 1.0f - vColor.vector4_f32[1], 1428 | 1.0f - vColor.vector4_f32[2], 1429 | vColor.vector4_f32[3] 1430 | }; 1431 | return vResult.v; 1432 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1433 | XMVECTOR vTemp = veorq_u32(vColor, g_XMNegate3); 1434 | return vaddq_f32(vTemp, g_XMOne3); 1435 | #elif defined(_XM_SSE_INTRINSICS_) 1436 | // Negate only x,y and z. 1437 | XMVECTOR vTemp = _mm_xor_ps(vColor, g_XMNegate3); 1438 | // Add 1,1,1,0 to -x,-y,-z,w 1439 | return _mm_add_ps(vTemp, g_XMOne3); 1440 | #endif 1441 | } 1442 | 1443 | //------------------------------------------------------------------------------ 1444 | 1445 | inline XMVECTOR XM_CALLCONV XMColorModulate 1446 | ( 1447 | FXMVECTOR C1, 1448 | FXMVECTOR C2 1449 | ) 1450 | { 1451 | return XMVectorMultiply(C1, C2); 1452 | } 1453 | 1454 | //------------------------------------------------------------------------------ 1455 | 1456 | inline XMVECTOR XM_CALLCONV XMColorAdjustSaturation 1457 | ( 1458 | FXMVECTOR vColor, 1459 | float fSaturation 1460 | ) 1461 | { 1462 | // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2]; 1463 | // Result = (C - Luminance) * Saturation + Luminance; 1464 | 1465 | #if defined(_XM_NO_INTRINSICS_) 1466 | const XMVECTORF32 gvLuminance = { 0.2125f, 0.7154f, 0.0721f, 0.0f }; 1467 | 1468 | float fLuminance = (vColor.vector4_f32[0] * gvLuminance.f[0]) + (vColor.vector4_f32[1] * gvLuminance.f[1]) + (vColor.vector4_f32[2] * gvLuminance.f[2]); 1469 | XMVECTOR vResult; 1470 | vResult.vector4_f32[0] = ((vColor.vector4_f32[0] - fLuminance)*fSaturation) + fLuminance; 1471 | vResult.vector4_f32[1] = ((vColor.vector4_f32[1] - fLuminance)*fSaturation) + fLuminance; 1472 | vResult.vector4_f32[2] = ((vColor.vector4_f32[2] - fLuminance)*fSaturation) + fLuminance; 1473 | vResult.vector4_f32[3] = vColor.vector4_f32[3]; 1474 | return vResult; 1475 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1476 | static const XMVECTORF32 gvLuminance = { 0.2125f, 0.7154f, 0.0721f, 0.0f }; 1477 | XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance); 1478 | XMVECTOR vResult = vsubq_f32(vColor, vLuminance); 1479 | vResult = vmlaq_n_f32(vLuminance, vResult, fSaturation); 1480 | return vbslq_f32(g_XMSelect1110, vResult, vColor); 1481 | #elif defined(_XM_SSE_INTRINSICS_) 1482 | static const XMVECTORF32 gvLuminance = { 0.2125f, 0.7154f, 0.0721f, 0.0f }; 1483 | XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance); 1484 | // Splat fSaturation 1485 | XMVECTOR vSaturation = _mm_set_ps1(fSaturation); 1486 | // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance; 1487 | XMVECTOR vResult = _mm_sub_ps(vColor, vLuminance); 1488 | vResult = _mm_mul_ps(vResult, vSaturation); 1489 | vResult = _mm_add_ps(vResult, vLuminance); 1490 | // Retain w from the source color 1491 | vLuminance = _mm_shuffle_ps(vResult, vColor, _MM_SHUFFLE(3, 2, 2, 2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w 1492 | vResult = _mm_shuffle_ps(vResult, vLuminance, _MM_SHUFFLE(3, 0, 1, 0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w 1493 | return vResult; 1494 | #endif 1495 | } 1496 | 1497 | //------------------------------------------------------------------------------ 1498 | 1499 | inline XMVECTOR XM_CALLCONV XMColorAdjustContrast 1500 | ( 1501 | FXMVECTOR vColor, 1502 | float fContrast 1503 | ) 1504 | { 1505 | // Result = (vColor - 0.5f) * fContrast + 0.5f; 1506 | 1507 | #if defined(_XM_NO_INTRINSICS_) 1508 | XMVECTORF32 vResult = { 1509 | ((vColor.vector4_f32[0] - 0.5f) * fContrast) + 0.5f, 1510 | ((vColor.vector4_f32[1] - 0.5f) * fContrast) + 0.5f, 1511 | ((vColor.vector4_f32[2] - 0.5f) * fContrast) + 0.5f, 1512 | vColor.vector4_f32[3] // Leave W untouched 1513 | }; 1514 | return vResult.v; 1515 | #elif defined(_XM_ARM_NEON_INTRINSICS_) 1516 | XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v); 1517 | vResult = vmlaq_n_f32(g_XMOneHalf.v, vResult, fContrast); 1518 | return vbslq_f32(g_XMSelect1110, vResult, vColor); 1519 | #elif defined(_XM_SSE_INTRINSICS_) 1520 | XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale 1521 | XMVECTOR vResult = _mm_sub_ps(vColor, g_XMOneHalf); // Subtract 0.5f from the source (Saving source) 1522 | vResult = _mm_mul_ps(vResult, vScale); // Mul by scale 1523 | vResult = _mm_add_ps(vResult, g_XMOneHalf); // Add 0.5f 1524 | // Retain w from the source color 1525 | vScale = _mm_shuffle_ps(vResult, vColor, _MM_SHUFFLE(3, 2, 2, 2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w 1526 | vResult = _mm_shuffle_ps(vResult, vScale, _MM_SHUFFLE(3, 0, 1, 0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w 1527 | return vResult; 1528 | #endif 1529 | } 1530 | 1531 | //------------------------------------------------------------------------------ 1532 | 1533 | inline XMVECTOR XM_CALLCONV XMColorRGBToHSL(FXMVECTOR rgb) 1534 | { 1535 | XMVECTOR r = XMVectorSplatX(rgb); 1536 | XMVECTOR g = XMVectorSplatY(rgb); 1537 | XMVECTOR b = XMVectorSplatZ(rgb); 1538 | 1539 | XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b)); 1540 | XMVECTOR max = XMVectorMax(r, XMVectorMax(g, b)); 1541 | 1542 | XMVECTOR l = XMVectorMultiply(XMVectorAdd(min, max), g_XMOneHalf); 1543 | 1544 | XMVECTOR d = XMVectorSubtract(max, min); 1545 | 1546 | XMVECTOR la = XMVectorSelect(rgb, l, g_XMSelect1110); 1547 | 1548 | if (XMVector3Less(d, g_XMEpsilon)) 1549 | { 1550 | // Achromatic, assume H and S of 0 1551 | return XMVectorSelect(la, g_XMZero, g_XMSelect1100); 1552 | } 1553 | else 1554 | { 1555 | XMVECTOR s, h; 1556 | 1557 | XMVECTOR d2 = XMVectorAdd(min, max); 1558 | 1559 | if (XMVector3Greater(l, g_XMOneHalf)) 1560 | { 1561 | // d / (2-max-min) 1562 | s = XMVectorDivide(d, XMVectorSubtract(g_XMTwo, d2)); 1563 | } 1564 | else 1565 | { 1566 | // d / (max+min) 1567 | s = XMVectorDivide(d, d2); 1568 | } 1569 | 1570 | if (XMVector3Equal(r, max)) 1571 | { 1572 | // Red is max 1573 | h = XMVectorDivide(XMVectorSubtract(g, b), d); 1574 | } 1575 | else if (XMVector3Equal(g, max)) 1576 | { 1577 | // Green is max 1578 | h = XMVectorDivide(XMVectorSubtract(b, r), d); 1579 | h = XMVectorAdd(h, g_XMTwo); 1580 | } 1581 | else 1582 | { 1583 | // Blue is max 1584 | h = XMVectorDivide(XMVectorSubtract(r, g), d); 1585 | h = XMVectorAdd(h, g_XMFour); 1586 | } 1587 | 1588 | h = XMVectorDivide(h, g_XMSix); 1589 | 1590 | if (XMVector3Less(h, g_XMZero)) 1591 | h = XMVectorAdd(h, g_XMOne); 1592 | 1593 | XMVECTOR lha = XMVectorSelect(la, h, g_XMSelect1100); 1594 | return XMVectorSelect(s, lha, g_XMSelect1011); 1595 | } 1596 | } 1597 | 1598 | //------------------------------------------------------------------------------ 1599 | 1600 | namespace Internal 1601 | { 1602 | 1603 | inline XMVECTOR XM_CALLCONV XMColorHue2Clr(FXMVECTOR p, FXMVECTOR q, FXMVECTOR h) 1604 | { 1605 | static const XMVECTORF32 oneSixth = { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f }; 1606 | static const XMVECTORF32 twoThirds = { 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f }; 1607 | 1608 | XMVECTOR t = h; 1609 | 1610 | if (XMVector3Less(t, g_XMZero)) 1611 | t = XMVectorAdd(t, g_XMOne); 1612 | 1613 | if (XMVector3Greater(t, g_XMOne)) 1614 | t = XMVectorSubtract(t, g_XMOne); 1615 | 1616 | if (XMVector3Less(t, oneSixth)) 1617 | { 1618 | // p + (q - p) * 6 * t 1619 | XMVECTOR t1 = XMVectorSubtract(q, p); 1620 | XMVECTOR t2 = XMVectorMultiply(g_XMSix, t); 1621 | return XMVectorMultiplyAdd(t1, t2, p); 1622 | } 1623 | 1624 | if (XMVector3Less(t, g_XMOneHalf)) 1625 | return q; 1626 | 1627 | if (XMVector3Less(t, twoThirds)) 1628 | { 1629 | // p + (q - p) * 6 * (2/3 - t) 1630 | XMVECTOR t1 = XMVectorSubtract(q, p); 1631 | XMVECTOR t2 = XMVectorMultiply(g_XMSix, XMVectorSubtract(twoThirds, t)); 1632 | return XMVectorMultiplyAdd(t1, t2, p); 1633 | } 1634 | 1635 | return p; 1636 | } 1637 | 1638 | }; // namespace Internal 1639 | 1640 | inline XMVECTOR XM_CALLCONV XMColorHSLToRGB(FXMVECTOR hsl) 1641 | { 1642 | static const XMVECTORF32 oneThird = { 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f }; 1643 | 1644 | XMVECTOR s = XMVectorSplatY(hsl); 1645 | XMVECTOR l = XMVectorSplatZ(hsl); 1646 | 1647 | if (XMVector3NearEqual(s, g_XMZero, g_XMEpsilon)) 1648 | { 1649 | // Achromatic 1650 | return XMVectorSelect(hsl, l, g_XMSelect1110); 1651 | } 1652 | else 1653 | { 1654 | XMVECTOR h = XMVectorSplatX(hsl); 1655 | 1656 | XMVECTOR q; 1657 | if (XMVector3Less(l, g_XMOneHalf)) 1658 | { 1659 | q = XMVectorMultiply(l, XMVectorAdd(g_XMOne, s)); 1660 | } 1661 | else 1662 | { 1663 | q = XMVectorSubtract(XMVectorAdd(l, s), XMVectorMultiply(l, s)); 1664 | } 1665 | 1666 | XMVECTOR p = XMVectorSubtract(XMVectorMultiply(g_XMTwo, l), q); 1667 | 1668 | XMVECTOR r = Internal::XMColorHue2Clr(p, q, XMVectorAdd(h, oneThird)); 1669 | XMVECTOR g = Internal::XMColorHue2Clr(p, q, h); 1670 | XMVECTOR b = Internal::XMColorHue2Clr(p, q, XMVectorSubtract(h, oneThird)); 1671 | 1672 | XMVECTOR rg = XMVectorSelect(g, r, g_XMSelect1000); 1673 | XMVECTOR ba = XMVectorSelect(hsl, b, g_XMSelect1110); 1674 | 1675 | return XMVectorSelect(ba, rg, g_XMSelect1100); 1676 | } 1677 | } 1678 | 1679 | //------------------------------------------------------------------------------ 1680 | 1681 | inline XMVECTOR XM_CALLCONV XMColorRGBToHSV(FXMVECTOR rgb) 1682 | { 1683 | XMVECTOR r = XMVectorSplatX(rgb); 1684 | XMVECTOR g = XMVectorSplatY(rgb); 1685 | XMVECTOR b = XMVectorSplatZ(rgb); 1686 | 1687 | XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b)); 1688 | XMVECTOR v = XMVectorMax(r, XMVectorMax(g, b)); 1689 | 1690 | XMVECTOR d = XMVectorSubtract(v, min); 1691 | 1692 | XMVECTOR s = (XMVector3NearEqual(v, g_XMZero, g_XMEpsilon)) ? g_XMZero : XMVectorDivide(d, v); 1693 | 1694 | if (XMVector3Less(d, g_XMEpsilon)) 1695 | { 1696 | // Achromatic, assume H of 0 1697 | XMVECTOR hv = XMVectorSelect(v, g_XMZero, g_XMSelect1000); 1698 | XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110); 1699 | return XMVectorSelect(s, hva, g_XMSelect1011); 1700 | } 1701 | else 1702 | { 1703 | XMVECTOR h; 1704 | 1705 | if (XMVector3Equal(r, v)) 1706 | { 1707 | // Red is max 1708 | h = XMVectorDivide(XMVectorSubtract(g, b), d); 1709 | 1710 | if (XMVector3Less(g, b)) 1711 | h = XMVectorAdd(h, g_XMSix); 1712 | } 1713 | else if (XMVector3Equal(g, v)) 1714 | { 1715 | // Green is max 1716 | h = XMVectorDivide(XMVectorSubtract(b, r), d); 1717 | h = XMVectorAdd(h, g_XMTwo); 1718 | } 1719 | else 1720 | { 1721 | // Blue is max 1722 | h = XMVectorDivide(XMVectorSubtract(r, g), d); 1723 | h = XMVectorAdd(h, g_XMFour); 1724 | } 1725 | 1726 | h = XMVectorDivide(h, g_XMSix); 1727 | 1728 | XMVECTOR hv = XMVectorSelect(v, h, g_XMSelect1000); 1729 | XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110); 1730 | return XMVectorSelect(s, hva, g_XMSelect1011); 1731 | } 1732 | } 1733 | 1734 | //------------------------------------------------------------------------------ 1735 | 1736 | inline XMVECTOR XM_CALLCONV XMColorHSVToRGB(FXMVECTOR hsv) 1737 | { 1738 | XMVECTOR h = XMVectorSplatX(hsv); 1739 | XMVECTOR s = XMVectorSplatY(hsv); 1740 | XMVECTOR v = XMVectorSplatZ(hsv); 1741 | 1742 | XMVECTOR h6 = XMVectorMultiply(h, g_XMSix); 1743 | 1744 | XMVECTOR i = XMVectorFloor(h6); 1745 | XMVECTOR f = XMVectorSubtract(h6, i); 1746 | 1747 | // p = v* (1-s) 1748 | XMVECTOR p = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, s)); 1749 | 1750 | // q = v*(1-f*s) 1751 | XMVECTOR q = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, XMVectorMultiply(f, s))); 1752 | 1753 | // t = v*(1 - (1-f)*s) 1754 | XMVECTOR t = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, XMVectorMultiply(XMVectorSubtract(g_XMOne, f), s))); 1755 | 1756 | int ii = static_cast(XMVectorGetX(XMVectorMod(i, g_XMSix))); 1757 | 1758 | XMVECTOR _rgb; 1759 | 1760 | switch (ii) 1761 | { 1762 | case 0: // rgb = vtp 1763 | { 1764 | XMVECTOR vt = XMVectorSelect(t, v, g_XMSelect1000); 1765 | _rgb = XMVectorSelect(p, vt, g_XMSelect1100); 1766 | } 1767 | break; 1768 | case 1: // rgb = qvp 1769 | { 1770 | XMVECTOR qv = XMVectorSelect(v, q, g_XMSelect1000); 1771 | _rgb = XMVectorSelect(p, qv, g_XMSelect1100); 1772 | } 1773 | break; 1774 | case 2: // rgb = pvt 1775 | { 1776 | XMVECTOR pv = XMVectorSelect(v, p, g_XMSelect1000); 1777 | _rgb = XMVectorSelect(t, pv, g_XMSelect1100); 1778 | } 1779 | break; 1780 | case 3: // rgb = pqv 1781 | { 1782 | XMVECTOR pq = XMVectorSelect(q, p, g_XMSelect1000); 1783 | _rgb = XMVectorSelect(v, pq, g_XMSelect1100); 1784 | } 1785 | break; 1786 | case 4: // rgb = tpv 1787 | { 1788 | XMVECTOR tp = XMVectorSelect(p, t, g_XMSelect1000); 1789 | _rgb = XMVectorSelect(v, tp, g_XMSelect1100); 1790 | } 1791 | break; 1792 | default: // rgb = vpq 1793 | { 1794 | XMVECTOR vp = XMVectorSelect(p, v, g_XMSelect1000); 1795 | _rgb = XMVectorSelect(q, vp, g_XMSelect1100); 1796 | } 1797 | break; 1798 | } 1799 | 1800 | return XMVectorSelect(hsv, _rgb, g_XMSelect1110); 1801 | } 1802 | 1803 | //------------------------------------------------------------------------------ 1804 | 1805 | inline XMVECTOR XM_CALLCONV XMColorRGBToYUV(FXMVECTOR rgb) 1806 | { 1807 | static const XMVECTORF32 Scale0 = { 0.299f, -0.147f, 0.615f, 0.0f }; 1808 | static const XMVECTORF32 Scale1 = { 0.587f, -0.289f, -0.515f, 0.0f }; 1809 | static const XMVECTORF32 Scale2 = { 0.114f, 0.436f, -0.100f, 0.0f }; 1810 | 1811 | XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); 1812 | XMVECTOR clr = XMVector3Transform(rgb, M); 1813 | 1814 | return XMVectorSelect(rgb, clr, g_XMSelect1110); 1815 | } 1816 | 1817 | //------------------------------------------------------------------------------ 1818 | 1819 | inline XMVECTOR XM_CALLCONV XMColorYUVToRGB(FXMVECTOR yuv) 1820 | { 1821 | static const XMVECTORF32 Scale1 = { 0.0f, -0.395f, 2.032f, 0.0f }; 1822 | static const XMVECTORF32 Scale2 = { 1.140f, -0.581f, 0.0f, 0.0f }; 1823 | 1824 | XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero); 1825 | XMVECTOR clr = XMVector3Transform(yuv, M); 1826 | 1827 | return XMVectorSelect(yuv, clr, g_XMSelect1110); 1828 | } 1829 | 1830 | //------------------------------------------------------------------------------ 1831 | 1832 | inline XMVECTOR XM_CALLCONV XMColorRGBToYUV_HD(FXMVECTOR rgb) 1833 | { 1834 | static const XMVECTORF32 Scale0 = { 0.2126f, -0.0997f, 0.6150f, 0.0f }; 1835 | static const XMVECTORF32 Scale1 = { 0.7152f, -0.3354f, -0.5586f, 0.0f }; 1836 | static const XMVECTORF32 Scale2 = { 0.0722f, 0.4351f, -0.0564f, 0.0f }; 1837 | 1838 | XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); 1839 | XMVECTOR clr = XMVector3Transform(rgb, M); 1840 | 1841 | return XMVectorSelect(rgb, clr, g_XMSelect1110); 1842 | } 1843 | 1844 | //------------------------------------------------------------------------------ 1845 | 1846 | inline XMVECTOR XM_CALLCONV XMColorYUVToRGB_HD(FXMVECTOR yuv) 1847 | { 1848 | static const XMVECTORF32 Scale1 = { 0.0f, -0.2153f, 2.1324f, 0.0f }; 1849 | static const XMVECTORF32 Scale2 = { 1.2803f, -0.3806f, 0.0f, 0.0f }; 1850 | 1851 | XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero); 1852 | XMVECTOR clr = XMVector3Transform(yuv, M); 1853 | 1854 | return XMVectorSelect(yuv, clr, g_XMSelect1110); 1855 | } 1856 | 1857 | //------------------------------------------------------------------------------ 1858 | 1859 | inline XMVECTOR XM_CALLCONV XMColorRGBToXYZ(FXMVECTOR rgb) 1860 | { 1861 | static const XMVECTORF32 Scale0 = { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f }; 1862 | static const XMVECTORF32 Scale1 = { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f }; 1863 | static const XMVECTORF32 Scale2 = { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f }; 1864 | static const XMVECTORF32 Scale = { 1.f / 0.17697f, 1.f / 0.17697f, 1.f / 0.17697f, 0.0f }; 1865 | 1866 | XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); 1867 | XMVECTOR clr = XMVectorMultiply(XMVector3Transform(rgb, M), Scale); 1868 | 1869 | return XMVectorSelect(rgb, clr, g_XMSelect1110); 1870 | } 1871 | 1872 | inline XMVECTOR XM_CALLCONV XMColorXYZToRGB(FXMVECTOR xyz) 1873 | { 1874 | static const XMVECTORF32 Scale0 = { 2.3706743f, -0.5138850f, 0.0052982f, 0.0f }; 1875 | static const XMVECTORF32 Scale1 = { -0.9000405f, 1.4253036f, -0.0146949f, 0.0f }; 1876 | static const XMVECTORF32 Scale2 = { -0.4706338f, 0.0885814f, 1.0093968f, 0.0f }; 1877 | static const XMVECTORF32 Scale = { 0.17697f, 0.17697f, 0.17697f, 0.0f }; 1878 | 1879 | XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); 1880 | XMVECTOR clr = XMVector3Transform(XMVectorMultiply(xyz, Scale), M); 1881 | 1882 | return XMVectorSelect(xyz, clr, g_XMSelect1110); 1883 | } 1884 | 1885 | //------------------------------------------------------------------------------ 1886 | 1887 | inline XMVECTOR XM_CALLCONV XMColorXYZToSRGB(FXMVECTOR xyz) 1888 | { 1889 | static const XMVECTORF32 Scale0 = { 3.2406f, -0.9689f, 0.0557f, 0.0f }; 1890 | static const XMVECTORF32 Scale1 = { -1.5372f, 1.8758f, -0.2040f, 0.0f }; 1891 | static const XMVECTORF32 Scale2 = { -0.4986f, 0.0415f, 1.0570f, 0.0f }; 1892 | static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f }; 1893 | static const XMVECTORF32 Exp = { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.0f }; 1894 | 1895 | XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); 1896 | XMVECTOR lclr = XMVector3Transform(xyz, M); 1897 | 1898 | XMVECTOR sel = XMVectorGreater(lclr, Cutoff); 1899 | 1900 | // clr = 12.92 * lclr for lclr <= 0.0031308f 1901 | XMVECTOR smallC = XMVectorMultiply(lclr, g_XMsrgbScale); 1902 | 1903 | // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055) 1904 | XMVECTOR largeC = XMVectorSubtract(XMVectorMultiply(g_XMsrgbA1, XMVectorPow(lclr, Exp)), g_XMsrgbA); 1905 | 1906 | XMVECTOR clr = XMVectorSelect(smallC, largeC, sel); 1907 | 1908 | return XMVectorSelect(xyz, clr, g_XMSelect1110); 1909 | } 1910 | 1911 | //------------------------------------------------------------------------------ 1912 | 1913 | inline XMVECTOR XM_CALLCONV XMColorSRGBToXYZ(FXMVECTOR srgb) 1914 | { 1915 | static const XMVECTORF32 Scale0 = { 0.4124f, 0.2126f, 0.0193f, 0.0f }; 1916 | static const XMVECTORF32 Scale1 = { 0.3576f, 0.7152f, 0.1192f, 0.0f }; 1917 | static const XMVECTORF32 Scale2 = { 0.1805f, 0.0722f, 0.9505f, 0.0f }; 1918 | static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 0.0f }; 1919 | static const XMVECTORF32 Exp = { 2.4f, 2.4f, 2.4f, 1.0f }; 1920 | 1921 | XMVECTOR sel = XMVectorGreater(srgb, Cutoff); 1922 | 1923 | // lclr = clr / 12.92 1924 | XMVECTOR smallC = XMVectorDivide(srgb, g_XMsrgbScale); 1925 | 1926 | // lclr = pow( (clr + a) / (1+a), 2.4 ) 1927 | XMVECTOR largeC = XMVectorPow(XMVectorDivide(XMVectorAdd(srgb, g_XMsrgbA), g_XMsrgbA1), Exp); 1928 | 1929 | XMVECTOR lclr = XMVectorSelect(smallC, largeC, sel); 1930 | 1931 | XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); 1932 | XMVECTOR clr = XMVector3Transform(lclr, M); 1933 | 1934 | return XMVectorSelect(srgb, clr, g_XMSelect1110); 1935 | } 1936 | 1937 | //------------------------------------------------------------------------------ 1938 | 1939 | inline XMVECTOR XM_CALLCONV XMColorRGBToSRGB(FXMVECTOR rgb) 1940 | { 1941 | static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 1.f }; 1942 | static const XMVECTORF32 Linear = { 12.92f, 12.92f, 12.92f, 1.f }; 1943 | static const XMVECTORF32 Scale = { 1.055f, 1.055f, 1.055f, 1.f }; 1944 | static const XMVECTORF32 Bias = { -0.055f, -0.055f, -0.055f, 0.f }; 1945 | static const XMVECTORF32 InvGamma = { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.f }; 1946 | 1947 | XMVECTOR V = XMVectorSaturate(rgb); 1948 | XMVECTOR V0 = XMVectorMultiply(V, Linear); 1949 | XMVECTOR V1 = XMVectorMultiplyAdd(Scale, XMVectorPow(V, InvGamma), Bias); 1950 | XMVECTOR select = XMVectorLess(V, Cutoff); 1951 | V = XMVectorSelect(V1, V0, select); 1952 | return XMVectorSelect(rgb, V, g_XMSelect1110); 1953 | } 1954 | 1955 | //------------------------------------------------------------------------------ 1956 | 1957 | inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB(FXMVECTOR srgb) 1958 | { 1959 | static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 1.f }; 1960 | static const XMVECTORF32 ILinear = { 1.f / 12.92f, 1.f / 12.92f, 1.f / 12.92f, 1.f }; 1961 | static const XMVECTORF32 Scale = { 1.f / 1.055f, 1.f / 1.055f, 1.f / 1.055f, 1.f }; 1962 | static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f }; 1963 | static const XMVECTORF32 Gamma = { 2.4f, 2.4f, 2.4f, 1.f }; 1964 | 1965 | XMVECTOR V = XMVectorSaturate(srgb); 1966 | XMVECTOR V0 = XMVectorMultiply(V, ILinear); 1967 | XMVECTOR V1 = XMVectorPow(XMVectorMultiply(XMVectorAdd(V, Bias), Scale), Gamma); 1968 | XMVECTOR select = XMVectorGreater(V, Cutoff); 1969 | V = XMVectorSelect(V0, V1, select); 1970 | return XMVectorSelect(srgb, V, g_XMSelect1110); 1971 | } 1972 | 1973 | /**************************************************************************** 1974 | * 1975 | * Miscellaneous 1976 | * 1977 | ****************************************************************************/ 1978 | 1979 | //------------------------------------------------------------------------------ 1980 | #ifndef XM_CPU_ID 1981 | #if defined(__GNUC__) && defined(i386) 1982 | #define XM_CPU_ID(func, a, b, c, d) \ 1983 | __asm__ __volatile__ ( \ 1984 | " pushl %%ebx \n" \ 1985 | " xorl %%ecx,%%ecx \n" \ 1986 | " cpuid \n" \ 1987 | " movl %%ebx, %%esi \n" \ 1988 | " popl %%ebx \n" : \ 1989 | "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func)) 1990 | #elif defined(__GNUC__) && defined(__x86_64__) 1991 | #define XM_CPU_ID(func, a, b, c, d) \ 1992 | __asm__ __volatile__ ( \ 1993 | " pushq %%rbx \n" \ 1994 | " xorq %%rcx,%%rcx \n" \ 1995 | " cpuid \n" \ 1996 | " movq %%rbx, %%rsi \n" \ 1997 | " popq %%rbx \n" : \ 1998 | "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func)) 1999 | #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) 2000 | #define XM_CPU_ID(func, a, b, c, d) \ 2001 | __asm { \ 2002 | __asm mov eax, func \ 2003 | __asm xor ecx, ecx \ 2004 | __asm cpuid \ 2005 | __asm mov a, eax \ 2006 | __asm mov b, ebx \ 2007 | __asm mov c, ecx \ 2008 | __asm mov d, edx \ 2009 | } 2010 | #elif defined(_MSC_VER) && defined(_M_X64) 2011 | #define XM_CPU_ID(func, a, b, c, d) \ 2012 | { \ 2013 | int CPUInfo[4]; \ 2014 | __cpuid(CPUInfo, func); \ 2015 | a = CPUInfo[0]; \ 2016 | b = CPUInfo[1]; \ 2017 | c = CPUInfo[2]; \ 2018 | d = CPUInfo[3]; \ 2019 | } 2020 | #else 2021 | #define XM_CPU_ID(func, a, b, c, d) \ 2022 | a = b = c = d = 0 2023 | #endif 2024 | #endif 2025 | 2026 | inline bool XMVerifyCPUSupport() 2027 | { 2028 | return true; 2029 | } 2030 | 2031 | #ifdef XM_CPU_ID 2032 | #undef XM_CPU_ID 2033 | #endif 2034 | 2035 | //------------------------------------------------------------------------------ 2036 | 2037 | inline XMVECTOR XM_CALLCONV XMFresnelTerm 2038 | ( 2039 | FXMVECTOR CosIncidentAngle, 2040 | FXMVECTOR RefractionIndex 2041 | ) 2042 | { 2043 | assert(!XMVector4IsInfinite(CosIncidentAngle)); 2044 | 2045 | // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where 2046 | // c = CosIncidentAngle 2047 | // g = sqrt(c^2 + RefractionIndex^2 - 1) 2048 | 2049 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2050 | 2051 | XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v); 2052 | G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G); 2053 | G = XMVectorAbs(G); 2054 | G = XMVectorSqrt(G); 2055 | 2056 | XMVECTOR S = XMVectorAdd(G, CosIncidentAngle); 2057 | XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle); 2058 | 2059 | XMVECTOR V0 = XMVectorMultiply(D, D); 2060 | XMVECTOR V1 = XMVectorMultiply(S, S); 2061 | V1 = XMVectorReciprocal(V1); 2062 | V0 = XMVectorMultiply(g_XMOneHalf.v, V0); 2063 | V0 = XMVectorMultiply(V0, V1); 2064 | 2065 | XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v); 2066 | XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v); 2067 | V2 = XMVectorMultiply(V2, V2); 2068 | V3 = XMVectorMultiply(V3, V3); 2069 | V3 = XMVectorReciprocal(V3); 2070 | V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v); 2071 | 2072 | XMVECTOR Result = XMVectorMultiply(V0, V2); 2073 | 2074 | Result = XMVectorSaturate(Result); 2075 | 2076 | return Result; 2077 | 2078 | #elif defined(_XM_SSE_INTRINSICS_) 2079 | // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2)) 2080 | XMVECTOR G = _mm_mul_ps(RefractionIndex, RefractionIndex); 2081 | XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle, CosIncidentAngle); 2082 | G = _mm_sub_ps(G, g_XMOne); 2083 | vTemp = _mm_add_ps(vTemp, G); 2084 | // max((0-vTemp),vTemp) == abs(vTemp) 2085 | // The abs is needed to deal with refraction and cosine being zero 2086 | G = _mm_setzero_ps(); 2087 | G = _mm_sub_ps(G, vTemp); 2088 | G = _mm_max_ps(G, vTemp); 2089 | // Last operation, the sqrt() 2090 | G = _mm_sqrt_ps(G); 2091 | 2092 | // Calc G-C and G+C 2093 | XMVECTOR GAddC = _mm_add_ps(G, CosIncidentAngle); 2094 | XMVECTOR GSubC = _mm_sub_ps(G, CosIncidentAngle); 2095 | // Perform the term (0.5f *(g - c)^2) / (g + c)^2 2096 | XMVECTOR vResult = _mm_mul_ps(GSubC, GSubC); 2097 | vTemp = _mm_mul_ps(GAddC, GAddC); 2098 | vResult = _mm_mul_ps(vResult, g_XMOneHalf); 2099 | vResult = _mm_div_ps(vResult, vTemp); 2100 | // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) 2101 | GAddC = _mm_mul_ps(GAddC, CosIncidentAngle); 2102 | GSubC = _mm_mul_ps(GSubC, CosIncidentAngle); 2103 | GAddC = _mm_sub_ps(GAddC, g_XMOne); 2104 | GSubC = _mm_add_ps(GSubC, g_XMOne); 2105 | GAddC = _mm_mul_ps(GAddC, GAddC); 2106 | GSubC = _mm_mul_ps(GSubC, GSubC); 2107 | GAddC = _mm_div_ps(GAddC, GSubC); 2108 | GAddC = _mm_add_ps(GAddC, g_XMOne); 2109 | // Multiply the two term parts 2110 | vResult = _mm_mul_ps(vResult, GAddC); 2111 | // Clamp to 0.0 - 1.0f 2112 | vResult = _mm_max_ps(vResult, g_XMZero); 2113 | vResult = _mm_min_ps(vResult, g_XMOne); 2114 | return vResult; 2115 | #endif 2116 | } 2117 | 2118 | //------------------------------------------------------------------------------ 2119 | 2120 | inline bool XMScalarNearEqual 2121 | ( 2122 | float S1, 2123 | float S2, 2124 | float Epsilon 2125 | ) 2126 | { 2127 | float Delta = S1 - S2; 2128 | return (fabsf(Delta) <= Epsilon); 2129 | } 2130 | 2131 | //------------------------------------------------------------------------------ 2132 | // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI 2133 | inline float XMScalarModAngle 2134 | ( 2135 | float Angle 2136 | ) 2137 | { 2138 | // Note: The modulo is performed with unsigned math only to work 2139 | // around a precision error on numbers that are close to PI 2140 | 2141 | // Normalize the range from 0.0f to XM_2PI 2142 | Angle = Angle + XM_PI; 2143 | // Perform the modulo, unsigned 2144 | float fTemp = fabsf(Angle); 2145 | fTemp = fTemp - (XM_2PI * (float)((int32_t)(fTemp / XM_2PI))); 2146 | // Restore the number to the range of -XM_PI to XM_PI-epsilon 2147 | fTemp = fTemp - XM_PI; 2148 | // If the modulo'd value was negative, restore negation 2149 | if (Angle<0.0f) { 2150 | fTemp = -fTemp; 2151 | } 2152 | return fTemp; 2153 | } 2154 | 2155 | //------------------------------------------------------------------------------ 2156 | 2157 | inline float XMScalarSin 2158 | ( 2159 | float Value 2160 | ) 2161 | { 2162 | // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2163 | float quotient = XM_1DIV2PI*Value; 2164 | if (Value >= 0.0f) 2165 | { 2166 | quotient = (float)((int)(quotient + 0.5f)); 2167 | } 2168 | else 2169 | { 2170 | quotient = (float)((int)(quotient - 0.5f)); 2171 | } 2172 | float y = Value - XM_2PI*quotient; 2173 | 2174 | // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). 2175 | if (y > XM_PIDIV2) 2176 | { 2177 | y = XM_PI - y; 2178 | } 2179 | else if (y < -XM_PIDIV2) 2180 | { 2181 | y = -XM_PI - y; 2182 | } 2183 | 2184 | // 11-degree minimax approximation 2185 | float y2 = y * y; 2186 | return (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y; 2187 | } 2188 | 2189 | //------------------------------------------------------------------------------ 2190 | 2191 | inline float XMScalarSinEst 2192 | ( 2193 | float Value 2194 | ) 2195 | { 2196 | // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2197 | float quotient = XM_1DIV2PI*Value; 2198 | if (Value >= 0.0f) 2199 | { 2200 | quotient = (float)((int)(quotient + 0.5f)); 2201 | } 2202 | else 2203 | { 2204 | quotient = (float)((int)(quotient - 0.5f)); 2205 | } 2206 | float y = Value - XM_2PI*quotient; 2207 | 2208 | // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). 2209 | if (y > XM_PIDIV2) 2210 | { 2211 | y = XM_PI - y; 2212 | } 2213 | else if (y < -XM_PIDIV2) 2214 | { 2215 | y = -XM_PI - y; 2216 | } 2217 | 2218 | // 7-degree minimax approximation 2219 | float y2 = y * y; 2220 | return (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + 1.0f) * y; 2221 | } 2222 | 2223 | //------------------------------------------------------------------------------ 2224 | 2225 | inline float XMScalarCos 2226 | ( 2227 | float Value 2228 | ) 2229 | { 2230 | // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2231 | float quotient = XM_1DIV2PI*Value; 2232 | if (Value >= 0.0f) 2233 | { 2234 | quotient = (float)((int)(quotient + 0.5f)); 2235 | } 2236 | else 2237 | { 2238 | quotient = (float)((int)(quotient - 0.5f)); 2239 | } 2240 | float y = Value - XM_2PI*quotient; 2241 | 2242 | // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). 2243 | float sign; 2244 | if (y > XM_PIDIV2) 2245 | { 2246 | y = XM_PI - y; 2247 | sign = -1.0f; 2248 | } 2249 | else if (y < -XM_PIDIV2) 2250 | { 2251 | y = -XM_PI - y; 2252 | sign = -1.0f; 2253 | } 2254 | else 2255 | { 2256 | sign = +1.0f; 2257 | } 2258 | 2259 | // 10-degree minimax approximation 2260 | float y2 = y*y; 2261 | float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f; 2262 | return sign*p; 2263 | } 2264 | 2265 | //------------------------------------------------------------------------------ 2266 | 2267 | inline float XMScalarCosEst 2268 | ( 2269 | float Value 2270 | ) 2271 | { 2272 | // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2273 | float quotient = XM_1DIV2PI*Value; 2274 | if (Value >= 0.0f) 2275 | { 2276 | quotient = (float)((int)(quotient + 0.5f)); 2277 | } 2278 | else 2279 | { 2280 | quotient = (float)((int)(quotient - 0.5f)); 2281 | } 2282 | float y = Value - XM_2PI*quotient; 2283 | 2284 | // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). 2285 | float sign; 2286 | if (y > XM_PIDIV2) 2287 | { 2288 | y = XM_PI - y; 2289 | sign = -1.0f; 2290 | } 2291 | else if (y < -XM_PIDIV2) 2292 | { 2293 | y = -XM_PI - y; 2294 | sign = -1.0f; 2295 | } 2296 | else 2297 | { 2298 | sign = +1.0f; 2299 | } 2300 | 2301 | // 6-degree minimax approximation 2302 | float y2 = y * y; 2303 | float p = ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f; 2304 | return sign*p; 2305 | } 2306 | 2307 | //------------------------------------------------------------------------------ 2308 | 2309 | _Use_decl_annotations_ 2310 | inline void XMScalarSinCos 2311 | ( 2312 | float* pSin, 2313 | float* pCos, 2314 | float Value 2315 | ) 2316 | { 2317 | assert(pSin); 2318 | assert(pCos); 2319 | 2320 | // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2321 | float quotient = XM_1DIV2PI*Value; 2322 | if (Value >= 0.0f) 2323 | { 2324 | quotient = (float)((int)(quotient + 0.5f)); 2325 | } 2326 | else 2327 | { 2328 | quotient = (float)((int)(quotient - 0.5f)); 2329 | } 2330 | float y = Value - XM_2PI*quotient; 2331 | 2332 | // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). 2333 | float sign; 2334 | if (y > XM_PIDIV2) 2335 | { 2336 | y = XM_PI - y; 2337 | sign = -1.0f; 2338 | } 2339 | else if (y < -XM_PIDIV2) 2340 | { 2341 | y = -XM_PI - y; 2342 | sign = -1.0f; 2343 | } 2344 | else 2345 | { 2346 | sign = +1.0f; 2347 | } 2348 | 2349 | float y2 = y * y; 2350 | 2351 | // 11-degree minimax approximation 2352 | *pSin = (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y; 2353 | 2354 | // 10-degree minimax approximation 2355 | float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f; 2356 | *pCos = sign*p; 2357 | } 2358 | 2359 | //------------------------------------------------------------------------------ 2360 | 2361 | _Use_decl_annotations_ 2362 | inline void XMScalarSinCosEst 2363 | ( 2364 | float* pSin, 2365 | float* pCos, 2366 | float Value 2367 | ) 2368 | { 2369 | assert(pSin); 2370 | assert(pCos); 2371 | 2372 | // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2373 | float quotient = XM_1DIV2PI*Value; 2374 | if (Value >= 0.0f) 2375 | { 2376 | quotient = (float)((int)(quotient + 0.5f)); 2377 | } 2378 | else 2379 | { 2380 | quotient = (float)((int)(quotient - 0.5f)); 2381 | } 2382 | float y = Value - XM_2PI*quotient; 2383 | 2384 | // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). 2385 | float sign; 2386 | if (y > XM_PIDIV2) 2387 | { 2388 | y = XM_PI - y; 2389 | sign = -1.0f; 2390 | } 2391 | else if (y < -XM_PIDIV2) 2392 | { 2393 | y = -XM_PI - y; 2394 | sign = -1.0f; 2395 | } 2396 | else 2397 | { 2398 | sign = +1.0f; 2399 | } 2400 | 2401 | float y2 = y * y; 2402 | 2403 | // 7-degree minimax approximation 2404 | *pSin = (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + 1.0f) * y; 2405 | 2406 | // 6-degree minimax approximation 2407 | float p = ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f; 2408 | *pCos = sign*p; 2409 | } 2410 | 2411 | //------------------------------------------------------------------------------ 2412 | 2413 | inline float XMScalarASin 2414 | ( 2415 | float Value 2416 | ) 2417 | { 2418 | // Clamp input to [-1,1]. 2419 | bool nonnegative = (Value >= 0.0f); 2420 | float x = fabsf(Value); 2421 | float omx = 1.0f - x; 2422 | if (omx < 0.0f) 2423 | { 2424 | omx = 0.0f; 2425 | } 2426 | float root = sqrtf(omx); 2427 | 2428 | // 7-degree minimax approximation 2429 | float result = ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + 0.0308918810f) * x - 0.0501743046f) * x + 0.0889789874f) * x - 0.2145988016f) * x + 1.5707963050f; 2430 | result *= root; // acos(|x|) 2431 | 2432 | // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) 2433 | return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); 2434 | } 2435 | 2436 | //------------------------------------------------------------------------------ 2437 | 2438 | inline float XMScalarASinEst 2439 | ( 2440 | float Value 2441 | ) 2442 | { 2443 | // Clamp input to [-1,1]. 2444 | bool nonnegative = (Value >= 0.0f); 2445 | float x = fabsf(Value); 2446 | float omx = 1.0f - x; 2447 | if (omx < 0.0f) 2448 | { 2449 | omx = 0.0f; 2450 | } 2451 | float root = sqrtf(omx); 2452 | 2453 | // 3-degree minimax approximation 2454 | float result = ((-0.0187293f*x + 0.0742610f)*x - 0.2121144f)*x + 1.5707288f; 2455 | result *= root; // acos(|x|) 2456 | 2457 | // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) 2458 | return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); 2459 | } 2460 | 2461 | //------------------------------------------------------------------------------ 2462 | 2463 | inline float XMScalarACos 2464 | ( 2465 | float Value 2466 | ) 2467 | { 2468 | // Clamp input to [-1,1]. 2469 | bool nonnegative = (Value >= 0.0f); 2470 | float x = fabsf(Value); 2471 | float omx = 1.0f - x; 2472 | if (omx < 0.0f) 2473 | { 2474 | omx = 0.0f; 2475 | } 2476 | float root = sqrtf(omx); 2477 | 2478 | // 7-degree minimax approximation 2479 | float result = ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + 0.0308918810f) * x - 0.0501743046f) * x + 0.0889789874f) * x - 0.2145988016f) * x + 1.5707963050f; 2480 | result *= root; 2481 | 2482 | // acos(x) = pi - acos(-x) when x < 0 2483 | return (nonnegative ? result : XM_PI - result); 2484 | } 2485 | 2486 | //------------------------------------------------------------------------------ 2487 | 2488 | inline float XMScalarACosEst 2489 | ( 2490 | float Value 2491 | ) 2492 | { 2493 | // Clamp input to [-1,1]. 2494 | bool nonnegative = (Value >= 0.0f); 2495 | float x = fabsf(Value); 2496 | float omx = 1.0f - x; 2497 | if (omx < 0.0f) 2498 | { 2499 | omx = 0.0f; 2500 | } 2501 | float root = sqrtf(omx); 2502 | 2503 | // 3-degree minimax approximation 2504 | float result = ((-0.0187293f * x + 0.0742610f) * x - 0.2121144f) * x + 1.5707288f; 2505 | result *= root; 2506 | 2507 | // acos(x) = pi - acos(-x) when x < 0 2508 | return (nonnegative ? result : XM_PI - result); 2509 | } 2510 | --------------------------------------------------------------------------------