├── RawIntrinsics ├── Utils.ManuallyAdded.cs ├── RawIntrinsics.csproj ├── MMX.ManuallyAdded.cs ├── SSE.ManuallyAdded.cs ├── SSE42.cs ├── SSE2.ManuallyAdded.cs ├── AVX.ManuallyAdded.cs ├── MMX.cs ├── SSE3.cs ├── Other.cs ├── SSSE3.cs ├── Types.cs ├── FMA.cs ├── SSE41.cs └── SSE.cs ├── RawIntrinsicsGenerator ├── RawIntrinsicsGenerator.csproj ├── Program.cs └── Generator.cs ├── README.md ├── Wibic.sln └── .gitignore /RawIntrinsics/Utils.ManuallyAdded.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static class Utils 4 | { 5 | public static int _MM_SHUFFLE(int z, int y, int x, int w) => (z << 6) | (y << 4) | (x << 2) | w; 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /RawIntrinsics/RawIntrinsics.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net5.0 5 | true 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /RawIntrinsics/MMX.ManuallyAdded.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static partial class MMX 4 | { 5 | ///

6 | /// Return vector of type __m64 with all elements set to zero. 7 | ///

8 | /// __m64 dst {FP32} 9 | public static __m64 _mm_setzero_si64() => System.Runtime.Intrinsics.Vector64.Zero; 10 | } 11 | } -------------------------------------------------------------------------------- /RawIntrinsics/SSE.ManuallyAdded.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static partial class SSE 4 | { 5 | ///

6 | /// Return vector of type __m128 with all elements set to zero. 7 | ///

8 | /// __m128 dst {FP32} 9 | public static __m128 _mm_setzero_ps() => System.Runtime.Intrinsics.Vector128.Zero; 10 | } 11 | } -------------------------------------------------------------------------------- /RawIntrinsicsGenerator/RawIntrinsicsGenerator.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net5.0 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /RawIntrinsicsGenerator/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Reflection; 4 | using System.Threading.Tasks; 5 | 6 | namespace RawIntrinsicsGenerator 7 | { 8 | public static class Program 9 | { 10 | private async static Task Main(string[] _) 11 | { 12 | var savePath = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "RawIntrinsics"); 13 | 14 | await Generator.Generate("RawIntrinsics", savePath); 15 | 16 | Console.WriteLine($"Done! Generated files were saved to {savePath}"); 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /RawIntrinsics/SSE42.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static unsafe partial class SSE42 4 | { 5 | ///

6 | /// Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst". 7 | ///

8 | /// PCMPGTQ xmm, xmm 9 | /// __m128i {SI64} 10 | /// __m128i {SI64} 11 | /// __m128i dst {UI64} 12 | public static __m128i _mm_cmpgt_epi64(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Sse42.CompareGreaterThan(a.SI64, b.SI64); 13 | 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /RawIntrinsics/SSE2.ManuallyAdded.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static partial class SSE2 4 | { 5 | ///

6 | /// Return vector of type __m128d with all elements set to zero. 7 | ///

8 | /// __m128d dst {M128} 9 | public static __m128d _mm_setzero_pd() => System.Runtime.Intrinsics.Vector128.Zero; 10 | 11 | ///

12 | /// Return vector of type __m128i with all elements set to zero. 13 | ///

14 | /// __m128i dst {M128} 15 | public static __m128i _mm_setzero_si128() => System.Runtime.Intrinsics.Vector128.Zero; 16 | } 17 | } -------------------------------------------------------------------------------- /RawIntrinsics/AVX.ManuallyAdded.cs: -------------------------------------------------------------------------------- 1 | using System.Runtime.Intrinsics; 2 | 3 | namespace RawIntrinsics 4 | { 5 | public static partial class AVX 6 | { 7 | ///

8 | /// Return vector of type __m256d with all elements set to zero. 9 | ///

10 | /// __m256d dst {FP64} 11 | public static __m256d _mm256_setzero_pd() => System.Runtime.Intrinsics.Vector256.Zero; 12 | 13 | ///

14 | /// Return vector of type __m256 with all elements set to zero. 15 | ///

16 | /// __m256 dst {FP32} 17 | public static __m256 _mm256_setzero_ps() => System.Runtime.Intrinsics.Vector256.Zero; 18 | 19 | ///

20 | /// Return vector of type __m256i with all elements set to zero. 21 | ///

22 | /// __m256i dst {M256} 23 | public static __m256i _mm256_setzero_si256() => System.Runtime.Intrinsics.Vector256.Zero; 24 | } 25 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wibic.RawIntrinsics 2 | 3 | .NET intrinsics represented by methods named after native intrinsics functions. 4 | Those methods can make it a lot easier to port existing SIMD related C++ code into C# (I hope). 5 | 6 | Something like this: 7 | 8 | ```csharp 9 | var v = _mm256_set1_epi8(1); 10 | 11 | var end = data + size; 12 | var ptr = data; 13 | 14 | __m256i tmp; 15 | __m256i global_sum = _mm256_setzero_si256(); 16 | __m256i local_sum; 17 | 18 | while (ptr + 255 * 32 < end) 19 | { 20 | local_sum = _mm256_setzero_si256(); 21 | 22 | for (var i = 0; i < 255; i++, ptr += 32) 23 | { 24 | __m256i src = _mm256_loadu_si256((__m256i*)ptr); 25 | __m256i eq = _mm256_cmpeq_epi8(src, v); 26 | 27 | local_sum = _mm256_sub_epi8(local_sum, eq); 28 | } 29 | 30 | tmp = _mm256_sad_epu8(local_sum, _mm256_setzero_si256()); 31 | global_sum = _mm256_add_epi64(global_sum, tmp); 32 | } 33 | ``` 34 | 35 | All methods generated by parsing and using data from these two sources: 36 | 37 | https://github.com/dotnet/runtime/tree/master/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics 38 | 39 | and: 40 | 41 | https://software.intel.com/sites/landingpage/IntrinsicsGuide/files/data-latest.xml 42 | 43 | PS: Generator project also included in the repo. 44 | -------------------------------------------------------------------------------- /Wibic.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RawIntrinsics", "RawIntrinsics\RawIntrinsics.csproj", "{2C8F57F8-6560-42F3-A24C-C649FA350F72}" 4 | EndProject 5 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RawIntrinsicsGenerator", "RawIntrinsicsGenerator\RawIntrinsicsGenerator.csproj", "{A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}" 6 | EndProject 7 | Global 8 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 9 | Debug|Any CPU = Debug|Any CPU 10 | Release|Any CPU = Release|Any CPU 11 | EndGlobalSection 12 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 13 | {2C8F57F8-6560-42F3-A24C-C649FA350F72}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 14 | {2C8F57F8-6560-42F3-A24C-C649FA350F72}.Debug|Any CPU.Build.0 = Debug|Any CPU 15 | {2C8F57F8-6560-42F3-A24C-C649FA350F72}.Release|Any CPU.ActiveCfg = Release|Any CPU 16 | {2C8F57F8-6560-42F3-A24C-C649FA350F72}.Release|Any CPU.Build.0 = Release|Any CPU 17 | {A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 18 | {A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}.Debug|Any CPU.Build.0 = Debug|Any CPU 19 | {A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}.Release|Any CPU.ActiveCfg = Release|Any CPU 20 | {A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}.Release|Any CPU.Build.0 = Release|Any CPU 21 | EndGlobalSection 22 | EndGlobal 23 | -------------------------------------------------------------------------------- /RawIntrinsics/MMX.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static unsafe partial class MMX 4 | { 5 | ///

6 | /// Broadcast 16-bit integer "a" to all all elements of "dst". 7 | ///

8 | /// 9 | /// short {UI16} 10 | /// __m64 dst {FP32} 11 | public static __m64 _mm_set1_pi16(short a) => System.Runtime.Intrinsics.Vector64.Create((ushort)a); 12 | 13 | ///

14 | /// Broadcast 32-bit integer "a" to all elements of "dst". 15 | ///

16 | /// 17 | /// int {UI32} 18 | /// __m64 dst {FP32} 19 | public static __m64 _mm_set1_pi32(int a) => System.Runtime.Intrinsics.Vector64.Create((uint)a); 20 | 21 | ///

22 | /// Broadcast 8-bit integer "a" to all elements of "dst". 23 | ///

24 | /// 25 | /// byte {UI8} 26 | /// __m64 dst {FP32} 27 | public static __m64 _mm_set1_pi8(byte a) => System.Runtime.Intrinsics.Vector64.Create(a); 28 | 29 | ///

30 | /// Set packed 16-bit integers in "dst" with the supplied values in reverse order. 31 | ///

32 | /// 33 | /// short {UI16} 34 | /// short {UI16} 35 | /// short {UI16} 36 | /// short {UI16} 37 | /// __m64 dst {FP32} 38 | public static __m64 _mm_setr_pi16(short e3, short e2, short e1, short e0) => System.Runtime.Intrinsics.Vector64.Create((ushort)e3, (ushort)e2, (ushort)e1, (ushort)e0); 39 | 40 | ///

41 | /// Set packed 32-bit integers in "dst" with the supplied values in reverse order. 42 | ///

43 | /// 44 | /// int {UI32} 45 | /// int {UI32} 46 | /// __m64 dst {FP32} 47 | public static __m64 _mm_setr_pi32(int e1, int e0) => System.Runtime.Intrinsics.Vector64.Create((uint)e1, (uint)e0); 48 | 49 | ///

50 | /// Set packed 8-bit integers in "dst" with the supplied values in reverse order. 51 | ///

52 | /// 53 | /// byte {UI8} 54 | /// byte {UI8} 55 | /// byte {UI8} 56 | /// byte {UI8} 57 | /// byte {UI8} 58 | /// byte {UI8} 59 | /// byte {UI8} 60 | /// byte {UI8} 61 | /// __m64 dst {FP32} 62 | public static __m64 _mm_setr_pi8(byte e7, byte e6, byte e5, byte e4, byte e3, byte e2, byte e1, byte e0) => System.Runtime.Intrinsics.Vector64.Create(e7, e6, e5, e4, e3, e2, e1, e0); 63 | 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /RawIntrinsics/SSE3.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static unsafe partial class SSE3 4 | { 5 | ///

6 | /// Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". 7 | ///

8 | /// ADDSUBPD xmm, xmm 9 | /// __m128d {FP64} 10 | /// __m128d {FP64} 11 | /// __m128d dst {FP64} 12 | public static __m128d _mm_addsub_pd(__m128d a, __m128d b) => System.Runtime.Intrinsics.X86.Sse3.AddSubtract(a.FP64, b.FP64); 13 | 14 | ///

15 | /// Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". 16 | ///

17 | /// ADDSUBPS xmm, xmm 18 | /// __m128 {FP32} 19 | /// __m128 {FP32} 20 | /// __m128 dst {FP32} 21 | public static __m128 _mm_addsub_ps(__m128 a, __m128 b) => System.Runtime.Intrinsics.X86.Sse3.AddSubtract(a.FP32, b.FP32); 22 | 23 | ///

24 | /// Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". 25 | ///

26 | /// HADDPD xmm, xmm 27 | /// __m128d {FP64} 28 | /// __m128d {FP64} 29 | /// __m128d dst {FP64} 30 | public static __m128d _mm_hadd_pd(__m128d a, __m128d b) => System.Runtime.Intrinsics.X86.Sse3.HorizontalAdd(a.FP64, b.FP64); 31 | 32 | ///

33 | /// Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". 34 | ///

35 | /// HADDPS xmm, xmm 36 | /// __m128 {FP32} 37 | /// __m128 {FP32} 38 | /// __m128 dst {FP32} 39 | public static __m128 _mm_hadd_ps(__m128 a, __m128 b) => System.Runtime.Intrinsics.X86.Sse3.HorizontalAdd(a.FP32, b.FP32); 40 | 41 | ///

42 | /// Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". 43 | ///

44 | /// HSUBPD xmm, xmm 45 | /// __m128d {FP64} 46 | /// __m128d {FP64} 47 | /// __m128d dst {FP64} 48 | public static __m128d _mm_hsub_pd(__m128d a, __m128d b) => System.Runtime.Intrinsics.X86.Sse3.HorizontalSubtract(a.FP64, b.FP64); 49 | 50 | ///

51 | /// Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". 52 | ///

53 | /// HSUBPS xmm, xmm 54 | /// __m128 {FP32} 55 | /// __m128 {FP32} 56 | /// __m128 dst {FP32} 57 | public static __m128 _mm_hsub_ps(__m128 a, __m128 b) => System.Runtime.Intrinsics.X86.Sse3.HorizontalSubtract(a.FP32, b.FP32); 58 | 59 | ///

60 | /// Load 128-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm_loadu_si128" when the data crosses a cache line boundary. 61 | ///

62 | /// LDDQU xmm, m128 63 | /// __m128i {M128} 64 | /// __m128i dst {M128} 65 | public static __m128i _mm_lddqu_si128(__m128i* mem_addr) => System.Runtime.Intrinsics.X86.Sse3.LoadDquVector128((sbyte*)mem_addr); 66 | 67 | ///

68 | /// Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". 69 | ///

70 | /// MOVDDUP xmm, m64 71 | /// double {FP64} 72 | /// __m128d dst {FP64} 73 | public static __m128d _mm_loaddup_pd(double* mem_addr) => System.Runtime.Intrinsics.X86.Sse3.LoadAndDuplicateToVector128(mem_addr); 74 | 75 | ///

76 | /// Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst". 77 | ///

78 | /// MOVDDUP xmm, xmm 79 | /// __m128d {FP64} 80 | /// __m128d dst {FP64} 81 | public static __m128d _mm_movedup_pd(__m128d a) => System.Runtime.Intrinsics.X86.Sse3.MoveAndDuplicate(a.FP64); 82 | 83 | ///

84 | /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". 85 | ///

86 | /// MOVSHDUP xmm, xmm 87 | /// __m128 {FP32} 88 | /// __m128 dst {FP32} 89 | public static __m128 _mm_movehdup_ps(__m128 a) => System.Runtime.Intrinsics.X86.Sse3.MoveHighAndDuplicate(a.FP32); 90 | 91 | ///

92 | /// Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". 93 | ///

94 | /// MOVSLDUP xmm, xmm 95 | /// __m128 {FP32} 96 | /// __m128 dst {FP32} 97 | public static __m128 _mm_moveldup_ps(__m128 a) => System.Runtime.Intrinsics.X86.Sse3.MoveLowAndDuplicate(a.FP32); 98 | 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /RawIntrinsics/Other.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static unsafe partial class Other 4 | { 5 | ///

6 | /// Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst". 7 | ///

8 | /// AESDEC xmm, xmm 9 | /// __m128i {M128} 10 | /// __m128i {M128} 11 | /// __m128i dst {M128} 12 | public static __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey) => System.Runtime.Intrinsics.X86.Aes.Decrypt(a.UI8, RoundKey.UI8); 13 | 14 | ///

15 | /// Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst". 16 | ///

17 | /// AESDECLAST xmm, xmm 18 | /// __m128i {M128} 19 | /// __m128i {M128} 20 | /// __m128i dst {M128} 21 | public static __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey) => System.Runtime.Intrinsics.X86.Aes.DecryptLast(a.UI8, RoundKey.UI8); 22 | 23 | ///

24 | /// Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"." 25 | ///

26 | /// AESENC xmm, xmm 27 | /// __m128i {M128} 28 | /// __m128i {M128} 29 | /// __m128i dst {M128} 30 | public static __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey) => System.Runtime.Intrinsics.X86.Aes.Encrypt(a.UI8, RoundKey.UI8); 31 | 32 | ///

33 | /// Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"." 34 | ///

35 | /// AESENCLAST xmm, xmm 36 | /// __m128i {M128} 37 | /// __m128i {M128} 38 | /// __m128i dst {M128} 39 | public static __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) => System.Runtime.Intrinsics.X86.Aes.EncryptLast(a.UI8, RoundKey.UI8); 40 | 41 | ///

42 | /// Perform the InvMixColumns transformation on "a" and store the result in "dst". 43 | ///

44 | /// AESIMC xmm, xmm 45 | /// __m128i {M128} 46 | /// __m128i dst {M128} 47 | public static __m128i _mm_aesimc_si128(__m128i a) => System.Runtime.Intrinsics.X86.Aes.InverseMixColumns(a.UI8); 48 | 49 | ///

50 | /// Assist in expanding the AES cipher key by computing steps towards generating a round key for encryption cipher using data from "a" and an 8-bit round constant specified in "imm8", and store the result in "dst"." 51 | ///

52 | /// AESKEYGENASSIST xmm, xmm, imm8 53 | /// __m128i {M128} 54 | /// int {IMM} 55 | /// __m128i dst {M128} 56 | public static __m128i _mm_aeskeygenassist_si128(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Aes.KeygenAssist(a.UI8, (byte)imm8); 57 | 58 | ///

59 | /// Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst". 60 | ///

61 | /// PCLMULQDQ xmm, xmm, imm8 62 | /// __m128i {M128} 63 | /// __m128i {M128} 64 | /// int {IMM} 65 | /// __m128i dst {M128} 66 | public static __m128i _mm_clmulepi64_si128(__m128i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(a.SI64, b.SI64, (byte)imm8); 67 | 68 | ///

69 | /// Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst". 70 | ///

71 | /// POPCNT r32, r32 72 | /// int {UI32} 73 | /// int dst {UI32} 74 | public static int _mm_popcnt_u32(int a) => (int)System.Runtime.Intrinsics.X86.Popcnt.PopCount((uint)a); 75 | 76 | ///

77 | /// Count the number of bits set to 1 in unsigned 64-bit integer "a", and return that count in "dst". 78 | ///

79 | /// POPCNT r64, r64 80 | /// long {UI64} 81 | /// long dst {UI64} 82 | public static long _mm_popcnt_u64(long a) => (long)System.Runtime.Intrinsics.X86.Popcnt.X64.PopCount((ulong)a); 83 | 84 | ///

85 | /// Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst". 86 | ///

87 | /// TZCNT r32, r32 88 | /// int {UI32} 89 | /// int dst {UI32} 90 | public static int _mm_tzcnt_32(int a) => (int)System.Runtime.Intrinsics.X86.Bmi1.TrailingZeroCount((uint)a); 91 | 92 | ///

93 | /// Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst". 94 | ///

95 | /// TZCNT r64, r64 96 | /// long {UI64} 97 | /// long dst {UI64} 98 | public static long _mm_tzcnt_64(long a) => (long)System.Runtime.Intrinsics.X86.Bmi1.X64.TrailingZeroCount((ulong)a); 99 | 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | *.DotSettings.user 13 | 14 | # User-specific files (MonoDevelop/Xamarin Studio) 15 | *.userprefs 16 | 17 | # Mono auto generated files 18 | mono_crash.* 19 | 20 | # Build results 21 | [Dd]ebug/ 22 | [Dd]ebugPublic/ 23 | [Rr]elease/ 24 | [Rr]eleases/ 25 | x64/ 26 | x86/ 27 | [Ww][Ii][Nn]32/ 28 | [Aa][Rr][Mm]/ 29 | [Aa][Rr][Mm]64/ 30 | bld/ 31 | [Bb]in/ 32 | [Oo]bj/ 33 | [Ll]og/ 34 | [Ll]ogs/ 35 | .idea/ 36 | 37 | # Visual Studio 2015/2017 cache/options directory 38 | .vs/ 39 | # Uncomment if you have tasks that create the project's static files in wwwroot 40 | #wwwroot/ 41 | 42 | # Visual Studio 2017 auto generated files 43 | Generated\ Files/ 44 | 45 | # MSTest test Results 46 | [Tt]est[Rr]esult*/ 47 | [Bb]uild[Ll]og.* 48 | 49 | # NUnit 50 | *.VisualState.xml 51 | TestResult.xml 52 | nunit-*.xml 53 | 54 | # Build Results of an ATL Project 55 | [Dd]ebugPS/ 56 | [Rr]eleasePS/ 57 | dlldata.c 58 | 59 | # Benchmark Results 60 | BenchmarkDotNet.Artifacts/ 61 | 62 | # .NET Core 63 | project.lock.json 64 | project.fragment.lock.json 65 | artifacts/ 66 | 67 | # ASP.NET Scaffolding 68 | ScaffoldingReadMe.txt 69 | 70 | # StyleCop 71 | StyleCopReport.xml 72 | 73 | # Files built by Visual Studio 74 | *_i.c 75 | *_p.c 76 | *_h.h 77 | *.ilk 78 | *.meta 79 | *.obj 80 | *.iobj 81 | *.pch 82 | *.pdb 83 | *.ipdb 84 | *.pgc 85 | *.pgd 86 | *.rsp 87 | *.sbr 88 | *.tlb 89 | *.tli 90 | *.tlh 91 | *.tmp 92 | *.tmp_proj 93 | *_wpftmp.csproj 94 | *.log 95 | *.vspscc 96 | *.vssscc 97 | .builds 98 | *.pidb 99 | *.svclog 100 | *.scc 101 | 102 | # Chutzpah Test files 103 | _Chutzpah* 104 | 105 | # Visual C++ cache files 106 | ipch/ 107 | *.aps 108 | *.ncb 109 | *.opendb 110 | *.opensdf 111 | *.sdf 112 | *.cachefile 113 | *.VC.db 114 | *.VC.VC.opendb 115 | 116 | # Visual Studio profiler 117 | *.psess 118 | *.vsp 119 | *.vspx 120 | *.sap 121 | 122 | # Visual Studio Trace Files 123 | *.e2e 124 | 125 | # TFS 2012 Local Workspace 126 | $tf/ 127 | 128 | # Guidance Automation Toolkit 129 | *.gpState 130 | 131 | # ReSharper is a .NET coding add-in 132 | _ReSharper*/ 133 | *.[Rr]e[Ss]harper 134 | *.DotSettings.user 135 | 136 | # TeamCity is a build add-in 137 | _TeamCity* 138 | 139 | # DotCover is a Code Coverage Tool 140 | *.dotCover 141 | 142 | # AxoCover is a Code Coverage Tool 143 | .axoCover/* 144 | !.axoCover/settings.json 145 | 146 | # Coverlet is a free, cross platform Code Coverage Tool 147 | coverage*.json 148 | coverage*.xml 149 | coverage*.info 150 | 151 | # Visual Studio code coverage results 152 | *.coverage 153 | *.coveragexml 154 | 155 | # NCrunch 156 | _NCrunch_* 157 | .*crunch*.local.xml 158 | nCrunchTemp_* 159 | 160 | # MightyMoose 161 | *.mm.* 162 | AutoTest.Net/ 163 | 164 | # Web workbench (sass) 165 | .sass-cache/ 166 | 167 | # Installshield output folder 168 | [Ee]xpress/ 169 | 170 | # DocProject is a documentation generator add-in 171 | DocProject/buildhelp/ 172 | DocProject/Help/*.HxT 173 | DocProject/Help/*.HxC 174 | DocProject/Help/*.hhc 175 | DocProject/Help/*.hhk 176 | DocProject/Help/*.hhp 177 | DocProject/Help/Html2 178 | DocProject/Help/html 179 | 180 | # Click-Once directory 181 | publish/ 182 | 183 | # Publish Web Output 184 | *.[Pp]ublish.xml 185 | *.azurePubxml 186 | # Note: Comment the next line if you want to checkin your web deploy settings, 187 | # but database connection strings (with potential passwords) will be unencrypted 188 | *.pubxml 189 | *.publishproj 190 | 191 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 192 | # checkin your Azure Web App publish settings, but sensitive information contained 193 | # in these scripts will be unencrypted 194 | PublishScripts/ 195 | 196 | # NuGet Packages 197 | *.nupkg 198 | # NuGet Symbol Packages 199 | *.snupkg 200 | # The packages folder can be ignored because of Package Restore 201 | **/[Pp]ackages/* 202 | # except build/, which is used as an MSBuild target. 203 | !**/[Pp]ackages/build/ 204 | # Uncomment if necessary however generally it will be regenerated when needed 205 | #!**/[Pp]ackages/repositories.config 206 | # NuGet v3's project.json files produces more ignorable files 207 | *.nuget.props 208 | *.nuget.targets 209 | 210 | # Microsoft Azure Build Output 211 | csx/ 212 | *.build.csdef 213 | 214 | # Microsoft Azure Emulator 215 | ecf/ 216 | rcf/ 217 | 218 | # Windows Store app package directories and files 219 | AppPackages/ 220 | BundleArtifacts/ 221 | Package.StoreAssociation.xml 222 | _pkginfo.txt 223 | *.appx 224 | *.appxbundle 225 | *.appxupload 226 | 227 | # Visual Studio cache files 228 | # files ending in .cache can be ignored 229 | *.[Cc]ache 230 | # but keep track of directories ending in .cache 231 | !?*.[Cc]ache/ 232 | 233 | # Others 234 | ClientBin/ 235 | ~$* 236 | *~ 237 | *.dbmdl 238 | *.dbproj.schemaview 239 | *.jfm 240 | *.pfx 241 | *.publishsettings 242 | orleans.codegen.cs 243 | 244 | # Including strong name files can present a security risk 245 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 246 | #*.snk 247 | 248 | # Since there are multiple workflows, uncomment next line to ignore bower_components 249 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 250 | #bower_components/ 251 | 252 | # RIA/Silverlight projects 253 | Generated_Code/ 254 | 255 | # Backup & report files from converting an old project file 256 | # to a newer Visual Studio version. Backup files are not needed, 257 | # because we have git ;-) 258 | _UpgradeReport_Files/ 259 | Backup*/ 260 | UpgradeLog*.XML 261 | UpgradeLog*.htm 262 | ServiceFabricBackup/ 263 | *.rptproj.bak 264 | 265 | # SQL Server files 266 | *.mdf 267 | *.ldf 268 | *.ndf 269 | 270 | # Business Intelligence projects 271 | *.rdl.data 272 | *.bim.layout 273 | *.bim_*.settings 274 | *.rptproj.rsuser 275 | *- [Bb]ackup.rdl 276 | *- [Bb]ackup ([0-9]).rdl 277 | *- [Bb]ackup ([0-9][0-9]).rdl 278 | 279 | # Microsoft Fakes 280 | FakesAssemblies/ 281 | 282 | # GhostDoc plugin setting file 283 | *.GhostDoc.xml 284 | 285 | # Node.js Tools for Visual Studio 286 | .ntvs_analysis.dat 287 | node_modules/ 288 | 289 | # Visual Studio 6 build log 290 | *.plg 291 | 292 | # Visual Studio 6 workspace options file 293 | *.opt 294 | 295 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 296 | *.vbw 297 | 298 | # Visual Studio LightSwitch build output 299 | **/*.HTMLClient/GeneratedArtifacts 300 | **/*.DesktopClient/GeneratedArtifacts 301 | **/*.DesktopClient/ModelManifest.xml 302 | **/*.Server/GeneratedArtifacts 303 | **/*.Server/ModelManifest.xml 304 | _Pvt_Extensions 305 | 306 | # Paket dependency manager 307 | .paket/paket.exe 308 | paket-files/ 309 | 310 | # FAKE - F# Make 311 | .fake/ 312 | 313 | # CodeRush personal settings 314 | .cr/personal 315 | 316 | # Python Tools for Visual Studio (PTVS) 317 | __pycache__/ 318 | *.pyc 319 | 320 | # Cake - Uncomment if you are using it 321 | # tools/** 322 | # !tools/packages.config 323 | 324 | # Tabs Studio 325 | *.tss 326 | 327 | # Telerik's JustMock configuration file 328 | *.jmconfig 329 | 330 | # BizTalk build output 331 | *.btp.cs 332 | *.btm.cs 333 | *.odx.cs 334 | *.xsd.cs 335 | 336 | # OpenCover UI analysis results 337 | OpenCover/ 338 | 339 | # Azure Stream Analytics local run output 340 | ASALocalRun/ 341 | 342 | # MSBuild Binary and Structured Log 343 | *.binlog 344 | 345 | # NVidia Nsight GPU debugger configuration file 346 | *.nvuser 347 | 348 | # MFractors (Xamarin productivity tool) working folder 349 | .mfractor/ 350 | 351 | # Local History for Visual Studio 352 | .localhistory/ 353 | 354 | # BeatPulse healthcheck temp database 355 | healthchecksdb 356 | 357 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 358 | MigrationBackup/ 359 | 360 | # Ionide (cross platform F# VS Code tools) working folder 361 | .ionide/ 362 | 363 | # Fody - auto-generated XML schema 364 | FodyWeavers.xsd -------------------------------------------------------------------------------- /RawIntrinsics/SSSE3.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static unsafe partial class SSSE3 4 | { 5 | ///

6 | /// Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". 7 | ///

8 | /// PABSW xmm, xmm 9 | /// __m128i {SI16} 10 | /// __m128i dst {UI16} 11 | public static __m128i _mm_abs_epi16(__m128i a) => System.Runtime.Intrinsics.X86.Ssse3.Abs(a.SI16); 12 | 13 | ///

14 | /// Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". 15 | ///

16 | /// PABSD xmm, xmm 17 | /// __m128i {SI32} 18 | /// __m128i dst {UI32} 19 | public static __m128i _mm_abs_epi32(__m128i a) => System.Runtime.Intrinsics.X86.Ssse3.Abs(a.SI32); 20 | 21 | ///

22 | /// Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". 23 | ///

24 | /// PABSB xmm, xmm 25 | /// __m128i {SI8} 26 | /// __m128i dst {UI8} 27 | public static __m128i _mm_abs_epi8(__m128i a) => System.Runtime.Intrinsics.X86.Ssse3.Abs(a.SI8); 28 | 29 | ///

30 | /// Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". 31 | ///

32 | /// PALIGNR xmm, xmm, imm8 33 | /// __m128i {UI8} 34 | /// __m128i {UI8} 35 | /// int {IMM} 36 | /// __m128i dst {UI8} 37 | public static __m128i _mm_alignr_epi8(__m128i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Ssse3.AlignRight(a.UI8, b.UI8, (byte)imm8); 38 | 39 | ///

40 | /// Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". 41 | ///

42 | /// PHADDW xmm, xmm 43 | /// __m128i {SI16} 44 | /// __m128i {SI16} 45 | /// __m128i dst {SI16} 46 | public static __m128i _mm_hadd_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalAdd(a.SI16, b.SI16); 47 | 48 | ///

49 | /// Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". 50 | ///

51 | /// PHADDD xmm, xmm 52 | /// __m128i {SI32} 53 | /// __m128i {SI32} 54 | /// __m128i dst {SI32} 55 | public static __m128i _mm_hadd_epi32(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalAdd(a.SI32, b.SI32); 56 | 57 | ///

58 | /// Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". 59 | ///

60 | /// PHADDSW xmm, xmm 61 | /// __m128i {SI16} 62 | /// __m128i {SI16} 63 | /// __m128i dst {SI16} 64 | public static __m128i _mm_hadds_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalAddSaturate(a.SI16, b.SI16); 65 | 66 | ///

67 | /// Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". 68 | ///

69 | /// PHSUBW xmm, xmm 70 | /// __m128i {SI16} 71 | /// __m128i {SI16} 72 | /// __m128i dst {SI16} 73 | public static __m128i _mm_hsub_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalSubtract(a.SI16, b.SI16); 74 | 75 | ///

76 | /// Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". 77 | ///

78 | /// PHSUBD xmm, xmm 79 | /// __m128i {SI32} 80 | /// __m128i {SI32} 81 | /// __m128i dst {SI32} 82 | public static __m128i _mm_hsub_epi32(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalSubtract(a.SI32, b.SI32); 83 | 84 | ///

85 | /// Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". 86 | ///

87 | /// PHSUBSW xmm, xmm 88 | /// __m128i {SI16} 89 | /// __m128i {SI16} 90 | /// __m128i dst {SI16} 91 | public static __m128i _mm_hsubs_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalSubtractSaturate(a.SI16, b.SI16); 92 | 93 | ///

94 | /// Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". 95 | ///

96 | /// PMADDUBSW xmm, xmm 97 | /// __m128i {UI8} 98 | /// __m128i {SI8} 99 | /// __m128i dst {SI16} 100 | public static __m128i _mm_maddubs_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.MultiplyAddAdjacent(a.UI8, b.SI8); 101 | 102 | ///

103 | /// Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". 104 | ///

105 | /// PMULHRSW xmm, xmm 106 | /// __m128i {SI16} 107 | /// __m128i {SI16} 108 | /// __m128i dst {UI16} 109 | public static __m128i _mm_mulhrs_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.MultiplyHighRoundScale(a.SI16, b.SI16); 110 | 111 | ///

112 | /// Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". 113 | ///

114 | /// PSHUFB xmm, xmm 115 | /// __m128i {UI8} 116 | /// __m128i {UI8} 117 | /// __m128i dst {UI8} 118 | public static __m128i _mm_shuffle_epi8(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.Shuffle(a.UI8, b.UI8); 119 | 120 | ///

121 | /// Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. 122 | ///

123 | /// PSIGNW xmm, xmm 124 | /// __m128i {SI16} 125 | /// __m128i {SI16} 126 | /// __m128i dst {UI16} 127 | public static __m128i _mm_sign_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.Sign(a.SI16, b.SI16); 128 | 129 | ///

130 | /// Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. 131 | ///

132 | /// PSIGND xmm, xmm 133 | /// __m128i {SI32} 134 | /// __m128i {SI32} 135 | /// __m128i dst {UI32} 136 | public static __m128i _mm_sign_epi32(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.Sign(a.SI32, b.SI32); 137 | 138 | ///

139 | /// Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. 140 | ///

141 | /// PSIGNB xmm, xmm 142 | /// __m128i {SI8} 143 | /// __m128i {SI8} 144 | /// __m128i dst {UI8} 145 | public static __m128i _mm_sign_epi8(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.Sign(a.SI8, b.SI8); 146 | 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /RawIntrinsics/Types.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public struct __m64 4 | { 5 | private System.Runtime.Intrinsics.Vector64 _; 6 | public System.Runtime.Intrinsics.Vector64 UI8 => System.Runtime.Intrinsics.Vector64.AsByte(_); 7 | public System.Runtime.Intrinsics.Vector64 SI8 => System.Runtime.Intrinsics.Vector64.AsSByte(_); 8 | public System.Runtime.Intrinsics.Vector64 UI16 => System.Runtime.Intrinsics.Vector64.AsUInt16(_); 9 | public System.Runtime.Intrinsics.Vector64 SI16 => System.Runtime.Intrinsics.Vector64.AsInt16(_); 10 | public System.Runtime.Intrinsics.Vector64 UI32 => System.Runtime.Intrinsics.Vector64.AsUInt32(_); 11 | public System.Runtime.Intrinsics.Vector64 SI32 => System.Runtime.Intrinsics.Vector64.AsInt32(_); 12 | public System.Runtime.Intrinsics.Vector64 UI64 => System.Runtime.Intrinsics.Vector64.AsUInt64(_); 13 | public System.Runtime.Intrinsics.Vector64 SI64 => System.Runtime.Intrinsics.Vector64.AsInt64(_); 14 | public System.Runtime.Intrinsics.Vector64 FP32 => System.Runtime.Intrinsics.Vector64.AsSingle(_); 15 | public System.Runtime.Intrinsics.Vector64 FP64 => System.Runtime.Intrinsics.Vector64.AsDouble(_); 16 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 17 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 18 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 19 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 20 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 21 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 22 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 23 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 24 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 25 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) }; 26 | } 27 | 28 | public struct __m128 29 | { 30 | private System.Runtime.Intrinsics.Vector128 _; 31 | public System.Runtime.Intrinsics.Vector128 UI8 => System.Runtime.Intrinsics.Vector128.AsByte(_); 32 | public System.Runtime.Intrinsics.Vector128 SI8 => System.Runtime.Intrinsics.Vector128.AsSByte(_); 33 | public System.Runtime.Intrinsics.Vector128 UI16 => System.Runtime.Intrinsics.Vector128.AsUInt16(_); 34 | public System.Runtime.Intrinsics.Vector128 SI16 => System.Runtime.Intrinsics.Vector128.AsInt16(_); 35 | public System.Runtime.Intrinsics.Vector128 UI32 => System.Runtime.Intrinsics.Vector128.AsUInt32(_); 36 | public System.Runtime.Intrinsics.Vector128 SI32 => System.Runtime.Intrinsics.Vector128.AsInt32(_); 37 | public System.Runtime.Intrinsics.Vector128 UI64 => System.Runtime.Intrinsics.Vector128.AsUInt64(_); 38 | public System.Runtime.Intrinsics.Vector128 SI64 => System.Runtime.Intrinsics.Vector128.AsInt64(_); 39 | public System.Runtime.Intrinsics.Vector128 FP32 => System.Runtime.Intrinsics.Vector128.AsSingle(_); 40 | public System.Runtime.Intrinsics.Vector128 FP64 => System.Runtime.Intrinsics.Vector128.AsDouble(_); 41 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 42 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 43 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 44 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 45 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 46 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 47 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 48 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 49 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 50 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 51 | } 52 | 53 | public struct __m128i 54 | { 55 | private System.Runtime.Intrinsics.Vector128 _; 56 | public System.Runtime.Intrinsics.Vector128 UI8 => System.Runtime.Intrinsics.Vector128.AsByte(_); 57 | public System.Runtime.Intrinsics.Vector128 SI8 => System.Runtime.Intrinsics.Vector128.AsSByte(_); 58 | public System.Runtime.Intrinsics.Vector128 UI16 => System.Runtime.Intrinsics.Vector128.AsUInt16(_); 59 | public System.Runtime.Intrinsics.Vector128 SI16 => System.Runtime.Intrinsics.Vector128.AsInt16(_); 60 | public System.Runtime.Intrinsics.Vector128 UI32 => System.Runtime.Intrinsics.Vector128.AsUInt32(_); 61 | public System.Runtime.Intrinsics.Vector128 SI32 => System.Runtime.Intrinsics.Vector128.AsInt32(_); 62 | public System.Runtime.Intrinsics.Vector128 UI64 => System.Runtime.Intrinsics.Vector128.AsUInt64(_); 63 | public System.Runtime.Intrinsics.Vector128 SI64 => System.Runtime.Intrinsics.Vector128.AsInt64(_); 64 | public System.Runtime.Intrinsics.Vector128 FP32 => System.Runtime.Intrinsics.Vector128.AsSingle(_); 65 | public System.Runtime.Intrinsics.Vector128 FP64 => System.Runtime.Intrinsics.Vector128.AsDouble(_); 66 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 67 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 68 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 69 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 70 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 71 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 72 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 73 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 74 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 75 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 76 | } 77 | 78 | public struct __m128d 79 | { 80 | private System.Runtime.Intrinsics.Vector128 _; 81 | public System.Runtime.Intrinsics.Vector128 UI8 => System.Runtime.Intrinsics.Vector128.AsByte(_); 82 | public System.Runtime.Intrinsics.Vector128 SI8 => System.Runtime.Intrinsics.Vector128.AsSByte(_); 83 | public System.Runtime.Intrinsics.Vector128 UI16 => System.Runtime.Intrinsics.Vector128.AsUInt16(_); 84 | public System.Runtime.Intrinsics.Vector128 SI16 => System.Runtime.Intrinsics.Vector128.AsInt16(_); 85 | public System.Runtime.Intrinsics.Vector128 UI32 => System.Runtime.Intrinsics.Vector128.AsUInt32(_); 86 | public System.Runtime.Intrinsics.Vector128 SI32 => System.Runtime.Intrinsics.Vector128.AsInt32(_); 87 | public System.Runtime.Intrinsics.Vector128 UI64 => System.Runtime.Intrinsics.Vector128.AsUInt64(_); 88 | public System.Runtime.Intrinsics.Vector128 SI64 => System.Runtime.Intrinsics.Vector128.AsInt64(_); 89 | public System.Runtime.Intrinsics.Vector128 FP32 => System.Runtime.Intrinsics.Vector128.AsSingle(_); 90 | public System.Runtime.Intrinsics.Vector128 FP64 => System.Runtime.Intrinsics.Vector128.AsDouble(_); 91 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 92 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 93 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 94 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 95 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 96 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 97 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 98 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 99 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 100 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) }; 101 | } 102 | 103 | public struct __m256 104 | { 105 | private System.Runtime.Intrinsics.Vector256 _; 106 | public System.Runtime.Intrinsics.Vector256 UI8 => System.Runtime.Intrinsics.Vector256.AsByte(_); 107 | public System.Runtime.Intrinsics.Vector256 SI8 => System.Runtime.Intrinsics.Vector256.AsSByte(_); 108 | public System.Runtime.Intrinsics.Vector256 UI16 => System.Runtime.Intrinsics.Vector256.AsUInt16(_); 109 | public System.Runtime.Intrinsics.Vector256 SI16 => System.Runtime.Intrinsics.Vector256.AsInt16(_); 110 | public System.Runtime.Intrinsics.Vector256 UI32 => System.Runtime.Intrinsics.Vector256.AsUInt32(_); 111 | public System.Runtime.Intrinsics.Vector256 SI32 => System.Runtime.Intrinsics.Vector256.AsInt32(_); 112 | public System.Runtime.Intrinsics.Vector256 UI64 => System.Runtime.Intrinsics.Vector256.AsUInt64(_); 113 | public System.Runtime.Intrinsics.Vector256 SI64 => System.Runtime.Intrinsics.Vector256.AsInt64(_); 114 | public System.Runtime.Intrinsics.Vector256 FP32 => System.Runtime.Intrinsics.Vector256.AsSingle(_); 115 | public System.Runtime.Intrinsics.Vector256 FP64 => System.Runtime.Intrinsics.Vector256.AsDouble(_); 116 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 117 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 118 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 119 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 120 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 121 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 122 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 123 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 124 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 125 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 126 | } 127 | 128 | public struct __m256i 129 | { 130 | private System.Runtime.Intrinsics.Vector256 _; 131 | public System.Runtime.Intrinsics.Vector256 UI8 => System.Runtime.Intrinsics.Vector256.AsByte(_); 132 | public System.Runtime.Intrinsics.Vector256 SI8 => System.Runtime.Intrinsics.Vector256.AsSByte(_); 133 | public System.Runtime.Intrinsics.Vector256 UI16 => System.Runtime.Intrinsics.Vector256.AsUInt16(_); 134 | public System.Runtime.Intrinsics.Vector256 SI16 => System.Runtime.Intrinsics.Vector256.AsInt16(_); 135 | public System.Runtime.Intrinsics.Vector256 UI32 => System.Runtime.Intrinsics.Vector256.AsUInt32(_); 136 | public System.Runtime.Intrinsics.Vector256 SI32 => System.Runtime.Intrinsics.Vector256.AsInt32(_); 137 | public System.Runtime.Intrinsics.Vector256 UI64 => System.Runtime.Intrinsics.Vector256.AsUInt64(_); 138 | public System.Runtime.Intrinsics.Vector256 SI64 => System.Runtime.Intrinsics.Vector256.AsInt64(_); 139 | public System.Runtime.Intrinsics.Vector256 FP32 => System.Runtime.Intrinsics.Vector256.AsSingle(_); 140 | public System.Runtime.Intrinsics.Vector256 FP64 => System.Runtime.Intrinsics.Vector256.AsDouble(_); 141 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 142 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 143 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 144 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 145 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 146 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 147 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 148 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 149 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 150 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 151 | } 152 | 153 | public struct __m256d 154 | { 155 | private System.Runtime.Intrinsics.Vector256 _; 156 | public System.Runtime.Intrinsics.Vector256 UI8 => System.Runtime.Intrinsics.Vector256.AsByte(_); 157 | public System.Runtime.Intrinsics.Vector256 SI8 => System.Runtime.Intrinsics.Vector256.AsSByte(_); 158 | public System.Runtime.Intrinsics.Vector256 UI16 => System.Runtime.Intrinsics.Vector256.AsUInt16(_); 159 | public System.Runtime.Intrinsics.Vector256 SI16 => System.Runtime.Intrinsics.Vector256.AsInt16(_); 160 | public System.Runtime.Intrinsics.Vector256 UI32 => System.Runtime.Intrinsics.Vector256.AsUInt32(_); 161 | public System.Runtime.Intrinsics.Vector256 SI32 => System.Runtime.Intrinsics.Vector256.AsInt32(_); 162 | public System.Runtime.Intrinsics.Vector256 UI64 => System.Runtime.Intrinsics.Vector256.AsUInt64(_); 163 | public System.Runtime.Intrinsics.Vector256 SI64 => System.Runtime.Intrinsics.Vector256.AsInt64(_); 164 | public System.Runtime.Intrinsics.Vector256 FP32 => System.Runtime.Intrinsics.Vector256.AsSingle(_); 165 | public System.Runtime.Intrinsics.Vector256 FP64 => System.Runtime.Intrinsics.Vector256.AsDouble(_); 166 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 167 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 168 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 169 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 170 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 171 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 172 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 173 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 174 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 175 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) }; 176 | } 177 | 178 | } 179 | -------------------------------------------------------------------------------- /RawIntrinsics/FMA.cs: -------------------------------------------------------------------------------- 1 | namespace RawIntrinsics 2 | { 3 | public static unsafe partial class FMA 4 | { 5 | ///

6 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 7 | ///

8 | /// VFMADD132PD xmm, xmm, xmm 9 | /// __m128d {FP64} 10 | /// __m128d {FP64} 11 | /// __m128d {FP64} 12 | /// __m128d dst {FP64} 13 | public static __m128d _mm_fmadd_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(a.FP64, b.FP64, c.FP64); 14 | 15 | ///

16 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 17 | ///

18 | /// VFMADD132PS xmm, xmm, xmm 19 | /// __m128 {FP32} 20 | /// __m128 {FP32} 21 | /// __m128 {FP32} 22 | /// __m128 dst {FP32} 23 | public static __m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(a.FP32, b.FP32, c.FP32); 24 | 25 | ///

26 | /// Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 27 | ///

28 | /// VFMADD132SD xmm, xmm, xmm 29 | /// __m128d {FP64} 30 | /// __m128d {FP64} 31 | /// __m128d {FP64} 32 | /// __m128d dst {FP64} 33 | public static __m128d _mm_fmadd_sd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddScalar(a.FP64, b.FP64, c.FP64); 34 | 35 | ///

36 | /// Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 37 | ///

38 | /// VFMADD132SS xmm, xmm, xmm 39 | /// __m128 {FP32} 40 | /// __m128 {FP32} 41 | /// __m128 {FP32} 42 | /// __m128 dst {FP32} 43 | public static __m128 _mm_fmadd_ss(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddScalar(a.FP32, b.FP32, c.FP32); 44 | 45 | ///

46 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 47 | ///

48 | /// VFMADDSUB132PD xmm, xmm, xmm 49 | /// __m128d {FP64} 50 | /// __m128d {FP64} 51 | /// __m128d {FP64} 52 | /// __m128d dst {FP64} 53 | public static __m128d _mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddSubtract(a.FP64, b.FP64, c.FP64); 54 | 55 | ///

56 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 57 | ///

58 | /// VFMADDSUB132PS xmm, xmm, xmm 59 | /// __m128 {FP32} 60 | /// __m128 {FP32} 61 | /// __m128 {FP32} 62 | /// __m128 dst {FP32} 63 | public static __m128 _mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddSubtract(a.FP32, b.FP32, c.FP32); 64 | 65 | ///

66 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 67 | ///

68 | /// VFMSUB132PD xmm, xmm, xmm 69 | /// __m128d {FP64} 70 | /// __m128d {FP64} 71 | /// __m128d {FP64} 72 | /// __m128d dst {FP64} 73 | public static __m128d _mm_fmsub_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtract(a.FP64, b.FP64, c.FP64); 74 | 75 | ///

76 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 77 | ///

78 | /// VFMSUB132PS xmm, xmm, xmm 79 | /// __m128 {FP32} 80 | /// __m128 {FP32} 81 | /// __m128 {FP32} 82 | /// __m128 dst {FP32} 83 | public static __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtract(a.FP32, b.FP32, c.FP32); 84 | 85 | ///

86 | /// Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 87 | ///

88 | /// VFMSUB132SD xmm, xmm, xmm 89 | /// __m128d {FP64} 90 | /// __m128d {FP64} 91 | /// __m128d {FP64} 92 | /// __m128d dst {FP64} 93 | public static __m128d _mm_fmsub_sd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractScalar(a.FP64, b.FP64, c.FP64); 94 | 95 | ///

96 | /// Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 97 | ///

98 | /// VFMSUB132SS xmm, xmm, xmm 99 | /// __m128 {FP32} 100 | /// __m128 {FP32} 101 | /// __m128 {FP32} 102 | /// __m128 dst {FP32} 103 | public static __m128 _mm_fmsub_ss(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractScalar(a.FP32, b.FP32, c.FP32); 104 | 105 | ///

106 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 107 | ///

108 | /// VFMSUBADD132PD xmm, xmm, xmm 109 | /// __m128d {FP64} 110 | /// __m128d {FP64} 111 | /// __m128d {FP64} 112 | /// __m128d dst {FP64} 113 | public static __m128d _mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractAdd(a.FP64, b.FP64, c.FP64); 114 | 115 | ///

116 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 117 | ///

118 | /// VFMSUBADD132PS xmm, xmm, xmm 119 | /// __m128 {FP32} 120 | /// __m128 {FP32} 121 | /// __m128 {FP32} 122 | /// __m128 dst {FP32} 123 | public static __m128 _mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractAdd(a.FP32, b.FP32, c.FP32); 124 | 125 | ///

126 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 127 | ///

128 | /// VFNMADD132PD xmm, xmm, xmm 129 | /// __m128d {FP64} 130 | /// __m128d {FP64} 131 | /// __m128d {FP64} 132 | /// __m128d dst {FP64} 133 | public static __m128d _mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegated(a.FP64, b.FP64, c.FP64); 134 | 135 | ///

136 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 137 | ///

138 | /// VFNMADD132PS xmm, xmm, xmm 139 | /// __m128 {FP32} 140 | /// __m128 {FP32} 141 | /// __m128 {FP32} 142 | /// __m128 dst {FP32} 143 | public static __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegated(a.FP32, b.FP32, c.FP32); 144 | 145 | ///

146 | /// Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 147 | ///

148 | /// VFNMADD132SD xmm, xmm, xmm 149 | /// __m128d {FP64} 150 | /// __m128d {FP64} 151 | /// __m128d {FP64} 152 | /// __m128d dst {FP64} 153 | public static __m128d _mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegatedScalar(a.FP64, b.FP64, c.FP64); 154 | 155 | ///

156 | /// Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 157 | ///

158 | /// VFNMADD132SS xmm, xmm, xmm 159 | /// __m128 {FP32} 160 | /// __m128 {FP32} 161 | /// __m128 {FP32} 162 | /// __m128 dst {FP32} 163 | public static __m128 _mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegatedScalar(a.FP32, b.FP32, c.FP32); 164 | 165 | ///

166 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". 167 | ///

168 | /// VFNMSUB132PD xmm, xmm, xmm 169 | /// __m128d {FP64} 170 | /// __m128d {FP64} 171 | /// __m128d {FP64} 172 | /// __m128d dst {FP64} 173 | public static __m128d _mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegated(a.FP64, b.FP64, c.FP64); 174 | 175 | ///

176 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". 177 | ///

178 | /// VFNMSUB132PS xmm, xmm, xmm 179 | /// __m128 {FP32} 180 | /// __m128 {FP32} 181 | /// __m128 {FP32} 182 | /// __m128 dst {FP32} 183 | public static __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegated(a.FP32, b.FP32, c.FP32); 184 | 185 | ///

186 | /// Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 187 | ///

188 | /// VFNMSUB132SD xmm, xmm, xmm 189 | /// __m128d {FP64} 190 | /// __m128d {FP64} 191 | /// __m128d {FP64} 192 | /// __m128d dst {FP64} 193 | public static __m128d _mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegatedScalar(a.FP64, b.FP64, c.FP64); 194 | 195 | ///

196 | /// Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 197 | ///

198 | /// VFNMSUB132SS xmm, xmm, xmm 199 | /// __m128 {FP32} 200 | /// __m128 {FP32} 201 | /// __m128 {FP32} 202 | /// __m128 dst {FP32} 203 | public static __m128 _mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegatedScalar(a.FP32, b.FP32, c.FP32); 204 | 205 | ///

206 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 207 | ///

208 | /// VFMADD132PD ymm, ymm, ymm 209 | /// __m256d {FP64} 210 | /// __m256d {FP64} 211 | /// __m256d {FP64} 212 | /// __m256d dst {FP64} 213 | public static __m256d _mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(a.FP64, b.FP64, c.FP64); 214 | 215 | ///

216 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 217 | ///

218 | /// VFMADD132PS ymm, ymm, ymm 219 | /// __m256 {FP32} 220 | /// __m256 {FP32} 221 | /// __m256 {FP32} 222 | /// __m256 dst {FP32} 223 | public static __m256 _mm256_fmadd_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(a.FP32, b.FP32, c.FP32); 224 | 225 | ///

226 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 227 | ///

228 | /// VFMADDSUB132PD ymm, ymm, ymm 229 | /// __m256d {FP64} 230 | /// __m256d {FP64} 231 | /// __m256d {FP64} 232 | /// __m256d dst {FP64} 233 | public static __m256d _mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddSubtract(a.FP64, b.FP64, c.FP64); 234 | 235 | ///

236 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 237 | ///

238 | /// VFMADDSUB132PS ymm, ymm, ymm 239 | /// __m256 {FP32} 240 | /// __m256 {FP32} 241 | /// __m256 {FP32} 242 | /// __m256 dst {FP32} 243 | public static __m256 _mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddSubtract(a.FP32, b.FP32, c.FP32); 244 | 245 | ///

246 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 247 | ///

248 | /// VFMSUB132PD ymm, ymm, ymm 249 | /// __m256d {FP64} 250 | /// __m256d {FP64} 251 | /// __m256d {FP64} 252 | /// __m256d dst {FP64} 253 | public static __m256d _mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtract(a.FP64, b.FP64, c.FP64); 254 | 255 | ///

256 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 257 | ///

258 | /// VFMSUB132PS ymm, ymm, ymm 259 | /// __m256 {FP32} 260 | /// __m256 {FP32} 261 | /// __m256 {FP32} 262 | /// __m256 dst {FP32} 263 | public static __m256 _mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtract(a.FP32, b.FP32, c.FP32); 264 | 265 | ///

266 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 267 | ///

268 | /// VFMSUBADD132PD ymm, ymm, ymm 269 | /// __m256d {FP64} 270 | /// __m256d {FP64} 271 | /// __m256d {FP64} 272 | /// __m256d dst {FP64} 273 | public static __m256d _mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractAdd(a.FP64, b.FP64, c.FP64); 274 | 275 | ///

276 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 277 | ///

278 | /// VFMSUBADD132PS ymm, ymm, ymm 279 | /// __m256 {FP32} 280 | /// __m256 {FP32} 281 | /// __m256 {FP32} 282 | /// __m256 dst {FP32} 283 | public static __m256 _mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractAdd(a.FP32, b.FP32, c.FP32); 284 | 285 | ///

286 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 287 | ///

288 | /// VFNMADD132PD ymm, ymm, ymm 289 | /// __m256d {FP64} 290 | /// __m256d {FP64} 291 | /// __m256d {FP64} 292 | /// __m256d dst {FP64} 293 | public static __m256d _mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegated(a.FP64, b.FP64, c.FP64); 294 | 295 | ///

296 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 297 | ///

298 | /// VFNMADD132PS ymm, ymm, ymm 299 | /// __m256 {FP32} 300 | /// __m256 {FP32} 301 | /// __m256 {FP32} 302 | /// __m256 dst {FP32} 303 | public static __m256 _mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegated(a.FP32, b.FP32, c.FP32); 304 | 305 | ///

306 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". 307 | ///

308 | /// VFNMSUB132PD ymm, ymm, ymm 309 | /// __m256d {FP64} 310 | /// __m256d {FP64} 311 | /// __m256d {FP64} 312 | /// __m256d dst {FP64} 313 | public static __m256d _mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegated(a.FP64, b.FP64, c.FP64); 314 | 315 | ///

316 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". 317 | ///