├── RawIntrinsics
├── Utils.ManuallyAdded.cs
├── RawIntrinsics.csproj
├── MMX.ManuallyAdded.cs
├── SSE.ManuallyAdded.cs
├── SSE42.cs
├── SSE2.ManuallyAdded.cs
├── AVX.ManuallyAdded.cs
├── MMX.cs
├── SSE3.cs
├── Other.cs
├── SSSE3.cs
├── Types.cs
├── FMA.cs
├── SSE41.cs
└── SSE.cs
├── RawIntrinsicsGenerator
├── RawIntrinsicsGenerator.csproj
├── Program.cs
└── Generator.cs
├── README.md
├── Wibic.sln
└── .gitignore
/RawIntrinsics/Utils.ManuallyAdded.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static class Utils
4 | {
5 | public static int _MM_SHUFFLE(int z, int y, int x, int w) => (z << 6) | (y << 4) | (x << 2) | w;
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/RawIntrinsics/RawIntrinsics.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net5.0
5 | true
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/RawIntrinsics/MMX.ManuallyAdded.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static partial class MMX
4 | {
5 | ///
6 | /// Return vector of type __m64 with all elements set to zero.
7 | ///
8 | /// __m64 dst {FP32}
9 | public static __m64 _mm_setzero_si64() => System.Runtime.Intrinsics.Vector64.Zero;
10 | }
11 | }
--------------------------------------------------------------------------------
/RawIntrinsics/SSE.ManuallyAdded.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static partial class SSE
4 | {
5 | ///
6 | /// Return vector of type __m128 with all elements set to zero.
7 | ///
8 | /// __m128 dst {FP32}
9 | public static __m128 _mm_setzero_ps() => System.Runtime.Intrinsics.Vector128.Zero;
10 | }
11 | }
--------------------------------------------------------------------------------
/RawIntrinsicsGenerator/RawIntrinsicsGenerator.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net5.0
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/RawIntrinsicsGenerator/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.IO;
3 | using System.Reflection;
4 | using System.Threading.Tasks;
5 |
6 | namespace RawIntrinsicsGenerator
7 | {
8 | public static class Program
9 | {
10 | private async static Task Main(string[] _)
11 | {
12 | var savePath = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "RawIntrinsics");
13 |
14 | await Generator.Generate("RawIntrinsics", savePath);
15 |
16 | Console.WriteLine($"Done! Generated files were saved to {savePath}");
17 | }
18 | }
19 | }
--------------------------------------------------------------------------------
/RawIntrinsics/SSE42.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static unsafe partial class SSE42
4 | {
5 | ///
6 | /// Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst".
7 | ///
8 | /// PCMPGTQ xmm, xmm
9 | /// __m128i {SI64}
10 | /// __m128i {SI64}
11 | /// __m128i dst {UI64}
12 | public static __m128i _mm_cmpgt_epi64(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Sse42.CompareGreaterThan(a.SI64, b.SI64);
13 |
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/RawIntrinsics/SSE2.ManuallyAdded.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static partial class SSE2
4 | {
5 | ///
6 | /// Return vector of type __m128d with all elements set to zero.
7 | ///
8 | /// __m128d dst {M128}
9 | public static __m128d _mm_setzero_pd() => System.Runtime.Intrinsics.Vector128.Zero;
10 |
11 | ///
12 | /// Return vector of type __m128i with all elements set to zero.
13 | ///
14 | /// __m128i dst {M128}
15 | public static __m128i _mm_setzero_si128() => System.Runtime.Intrinsics.Vector128.Zero;
16 | }
17 | }
--------------------------------------------------------------------------------
/RawIntrinsics/AVX.ManuallyAdded.cs:
--------------------------------------------------------------------------------
1 | using System.Runtime.Intrinsics;
2 |
3 | namespace RawIntrinsics
4 | {
5 | public static partial class AVX
6 | {
7 | ///
8 | /// Return vector of type __m256d with all elements set to zero.
9 | ///
10 | /// __m256d dst {FP64}
11 | public static __m256d _mm256_setzero_pd() => System.Runtime.Intrinsics.Vector256.Zero;
12 |
13 | ///
14 | /// Return vector of type __m256 with all elements set to zero.
15 | ///
16 | /// __m256 dst {FP32}
17 | public static __m256 _mm256_setzero_ps() => System.Runtime.Intrinsics.Vector256.Zero;
18 |
19 | ///
20 | /// Return vector of type __m256i with all elements set to zero.
21 | ///
22 | /// __m256i dst {M256}
23 | public static __m256i _mm256_setzero_si256() => System.Runtime.Intrinsics.Vector256.Zero;
24 | }
25 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Wibic.RawIntrinsics
2 |
3 | .NET intrinsics represented by methods named after native intrinsics functions.
4 | Those methods can make it a lot easier to port existing SIMD related C++ code into C# (I hope).
5 |
6 | Something like this:
7 |
8 | ```csharp
9 | var v = _mm256_set1_epi8(1);
10 |
11 | var end = data + size;
12 | var ptr = data;
13 |
14 | __m256i tmp;
15 | __m256i global_sum = _mm256_setzero_si256();
16 | __m256i local_sum;
17 |
18 | while (ptr + 255 * 32 < end)
19 | {
20 | local_sum = _mm256_setzero_si256();
21 |
22 | for (var i = 0; i < 255; i++, ptr += 32)
23 | {
24 | __m256i src = _mm256_loadu_si256((__m256i*)ptr);
25 | __m256i eq = _mm256_cmpeq_epi8(src, v);
26 |
27 | local_sum = _mm256_sub_epi8(local_sum, eq);
28 | }
29 |
30 | tmp = _mm256_sad_epu8(local_sum, _mm256_setzero_si256());
31 | global_sum = _mm256_add_epi64(global_sum, tmp);
32 | }
33 | ```
34 |
35 | All methods generated by parsing and using data from these two sources:
36 |
37 | https://github.com/dotnet/runtime/tree/master/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics
38 |
39 | and:
40 |
41 | https://software.intel.com/sites/landingpage/IntrinsicsGuide/files/data-latest.xml
42 |
43 | PS: Generator project also included in the repo.
44 |
--------------------------------------------------------------------------------
/Wibic.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RawIntrinsics", "RawIntrinsics\RawIntrinsics.csproj", "{2C8F57F8-6560-42F3-A24C-C649FA350F72}"
4 | EndProject
5 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RawIntrinsicsGenerator", "RawIntrinsicsGenerator\RawIntrinsicsGenerator.csproj", "{A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}"
6 | EndProject
7 | Global
8 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
9 | Debug|Any CPU = Debug|Any CPU
10 | Release|Any CPU = Release|Any CPU
11 | EndGlobalSection
12 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
13 | {2C8F57F8-6560-42F3-A24C-C649FA350F72}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
14 | {2C8F57F8-6560-42F3-A24C-C649FA350F72}.Debug|Any CPU.Build.0 = Debug|Any CPU
15 | {2C8F57F8-6560-42F3-A24C-C649FA350F72}.Release|Any CPU.ActiveCfg = Release|Any CPU
16 | {2C8F57F8-6560-42F3-A24C-C649FA350F72}.Release|Any CPU.Build.0 = Release|Any CPU
17 | {A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
18 | {A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}.Debug|Any CPU.Build.0 = Debug|Any CPU
19 | {A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}.Release|Any CPU.ActiveCfg = Release|Any CPU
20 | {A701F8FF-AA1F-4F24-ADC7-7DFB7D0E7EDB}.Release|Any CPU.Build.0 = Release|Any CPU
21 | EndGlobalSection
22 | EndGlobal
23 |
--------------------------------------------------------------------------------
/RawIntrinsics/MMX.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static unsafe partial class MMX
4 | {
5 | ///
6 | /// Broadcast 16-bit integer "a" to all all elements of "dst".
7 | ///
8 | ///
9 | /// short {UI16}
10 | /// __m64 dst {FP32}
11 | public static __m64 _mm_set1_pi16(short a) => System.Runtime.Intrinsics.Vector64.Create((ushort)a);
12 |
13 | ///
14 | /// Broadcast 32-bit integer "a" to all elements of "dst".
15 | ///
16 | ///
17 | /// int {UI32}
18 | /// __m64 dst {FP32}
19 | public static __m64 _mm_set1_pi32(int a) => System.Runtime.Intrinsics.Vector64.Create((uint)a);
20 |
21 | ///
22 | /// Broadcast 8-bit integer "a" to all elements of "dst".
23 | ///
24 | ///
25 | /// byte {UI8}
26 | /// __m64 dst {FP32}
27 | public static __m64 _mm_set1_pi8(byte a) => System.Runtime.Intrinsics.Vector64.Create(a);
28 |
29 | ///
30 | /// Set packed 16-bit integers in "dst" with the supplied values in reverse order.
31 | ///
32 | ///
33 | /// short {UI16}
34 | /// short {UI16}
35 | /// short {UI16}
36 | /// short {UI16}
37 | /// __m64 dst {FP32}
38 | public static __m64 _mm_setr_pi16(short e3, short e2, short e1, short e0) => System.Runtime.Intrinsics.Vector64.Create((ushort)e3, (ushort)e2, (ushort)e1, (ushort)e0);
39 |
40 | ///
41 | /// Set packed 32-bit integers in "dst" with the supplied values in reverse order.
42 | ///
43 | ///
44 | /// int {UI32}
45 | /// int {UI32}
46 | /// __m64 dst {FP32}
47 | public static __m64 _mm_setr_pi32(int e1, int e0) => System.Runtime.Intrinsics.Vector64.Create((uint)e1, (uint)e0);
48 |
49 | ///
50 | /// Set packed 8-bit integers in "dst" with the supplied values in reverse order.
51 | ///
52 | ///
53 | /// byte {UI8}
54 | /// byte {UI8}
55 | /// byte {UI8}
56 | /// byte {UI8}
57 | /// byte {UI8}
58 | /// byte {UI8}
59 | /// byte {UI8}
60 | /// byte {UI8}
61 | /// __m64 dst {FP32}
62 | public static __m64 _mm_setr_pi8(byte e7, byte e6, byte e5, byte e4, byte e3, byte e2, byte e1, byte e0) => System.Runtime.Intrinsics.Vector64.Create(e7, e6, e5, e4, e3, e2, e1, e0);
63 |
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/RawIntrinsics/SSE3.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static unsafe partial class SSE3
4 | {
5 | ///
6 | /// Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".
7 | ///
8 | /// ADDSUBPD xmm, xmm
9 | /// __m128d {FP64}
10 | /// __m128d {FP64}
11 | /// __m128d dst {FP64}
12 | public static __m128d _mm_addsub_pd(__m128d a, __m128d b) => System.Runtime.Intrinsics.X86.Sse3.AddSubtract(a.FP64, b.FP64);
13 |
14 | ///
15 | /// Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".
16 | ///
17 | /// ADDSUBPS xmm, xmm
18 | /// __m128 {FP32}
19 | /// __m128 {FP32}
20 | /// __m128 dst {FP32}
21 | public static __m128 _mm_addsub_ps(__m128 a, __m128 b) => System.Runtime.Intrinsics.X86.Sse3.AddSubtract(a.FP32, b.FP32);
22 |
23 | ///
24 | /// Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".
25 | ///
26 | /// HADDPD xmm, xmm
27 | /// __m128d {FP64}
28 | /// __m128d {FP64}
29 | /// __m128d dst {FP64}
30 | public static __m128d _mm_hadd_pd(__m128d a, __m128d b) => System.Runtime.Intrinsics.X86.Sse3.HorizontalAdd(a.FP64, b.FP64);
31 |
32 | ///
33 | /// Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".
34 | ///
35 | /// HADDPS xmm, xmm
36 | /// __m128 {FP32}
37 | /// __m128 {FP32}
38 | /// __m128 dst {FP32}
39 | public static __m128 _mm_hadd_ps(__m128 a, __m128 b) => System.Runtime.Intrinsics.X86.Sse3.HorizontalAdd(a.FP32, b.FP32);
40 |
41 | ///
42 | /// Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".
43 | ///
44 | /// HSUBPD xmm, xmm
45 | /// __m128d {FP64}
46 | /// __m128d {FP64}
47 | /// __m128d dst {FP64}
48 | public static __m128d _mm_hsub_pd(__m128d a, __m128d b) => System.Runtime.Intrinsics.X86.Sse3.HorizontalSubtract(a.FP64, b.FP64);
49 |
50 | ///
51 | /// Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".
52 | ///
53 | /// HSUBPS xmm, xmm
54 | /// __m128 {FP32}
55 | /// __m128 {FP32}
56 | /// __m128 dst {FP32}
57 | public static __m128 _mm_hsub_ps(__m128 a, __m128 b) => System.Runtime.Intrinsics.X86.Sse3.HorizontalSubtract(a.FP32, b.FP32);
58 |
59 | ///
60 | /// Load 128-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm_loadu_si128" when the data crosses a cache line boundary.
61 | ///
62 | /// LDDQU xmm, m128
63 | /// __m128i {M128}
64 | /// __m128i dst {M128}
65 | public static __m128i _mm_lddqu_si128(__m128i* mem_addr) => System.Runtime.Intrinsics.X86.Sse3.LoadDquVector128((sbyte*)mem_addr);
66 |
67 | ///
68 | /// Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".
69 | ///
70 | /// MOVDDUP xmm, m64
71 | /// double {FP64}
72 | /// __m128d dst {FP64}
73 | public static __m128d _mm_loaddup_pd(double* mem_addr) => System.Runtime.Intrinsics.X86.Sse3.LoadAndDuplicateToVector128(mem_addr);
74 |
75 | ///
76 | /// Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst".
77 | ///
78 | /// MOVDDUP xmm, xmm
79 | /// __m128d {FP64}
80 | /// __m128d dst {FP64}
81 | public static __m128d _mm_movedup_pd(__m128d a) => System.Runtime.Intrinsics.X86.Sse3.MoveAndDuplicate(a.FP64);
82 |
83 | ///
84 | /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
85 | ///
86 | /// MOVSHDUP xmm, xmm
87 | /// __m128 {FP32}
88 | /// __m128 dst {FP32}
89 | public static __m128 _mm_movehdup_ps(__m128 a) => System.Runtime.Intrinsics.X86.Sse3.MoveHighAndDuplicate(a.FP32);
90 |
91 | ///
92 | /// Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
93 | ///
94 | /// MOVSLDUP xmm, xmm
95 | /// __m128 {FP32}
96 | /// __m128 dst {FP32}
97 | public static __m128 _mm_moveldup_ps(__m128 a) => System.Runtime.Intrinsics.X86.Sse3.MoveLowAndDuplicate(a.FP32);
98 |
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/RawIntrinsics/Other.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static unsafe partial class Other
4 | {
5 | ///
6 | /// Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst".
7 | ///
8 | /// AESDEC xmm, xmm
9 | /// __m128i {M128}
10 | /// __m128i {M128}
11 | /// __m128i dst {M128}
12 | public static __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey) => System.Runtime.Intrinsics.X86.Aes.Decrypt(a.UI8, RoundKey.UI8);
13 |
14 | ///
15 | /// Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst".
16 | ///
17 | /// AESDECLAST xmm, xmm
18 | /// __m128i {M128}
19 | /// __m128i {M128}
20 | /// __m128i dst {M128}
21 | public static __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey) => System.Runtime.Intrinsics.X86.Aes.DecryptLast(a.UI8, RoundKey.UI8);
22 |
23 | ///
24 | /// Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."
25 | ///
26 | /// AESENC xmm, xmm
27 | /// __m128i {M128}
28 | /// __m128i {M128}
29 | /// __m128i dst {M128}
30 | public static __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey) => System.Runtime.Intrinsics.X86.Aes.Encrypt(a.UI8, RoundKey.UI8);
31 |
32 | ///
33 | /// Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."
34 | ///
35 | /// AESENCLAST xmm, xmm
36 | /// __m128i {M128}
37 | /// __m128i {M128}
38 | /// __m128i dst {M128}
39 | public static __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) => System.Runtime.Intrinsics.X86.Aes.EncryptLast(a.UI8, RoundKey.UI8);
40 |
41 | ///
42 | /// Perform the InvMixColumns transformation on "a" and store the result in "dst".
43 | ///
44 | /// AESIMC xmm, xmm
45 | /// __m128i {M128}
46 | /// __m128i dst {M128}
47 | public static __m128i _mm_aesimc_si128(__m128i a) => System.Runtime.Intrinsics.X86.Aes.InverseMixColumns(a.UI8);
48 |
49 | ///
50 | /// Assist in expanding the AES cipher key by computing steps towards generating a round key for encryption cipher using data from "a" and an 8-bit round constant specified in "imm8", and store the result in "dst"."
51 | ///
52 | /// AESKEYGENASSIST xmm, xmm, imm8
53 | /// __m128i {M128}
54 | /// int {IMM}
55 | /// __m128i dst {M128}
56 | public static __m128i _mm_aeskeygenassist_si128(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Aes.KeygenAssist(a.UI8, (byte)imm8);
57 |
58 | ///
59 | /// Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst".
60 | ///
61 | /// PCLMULQDQ xmm, xmm, imm8
62 | /// __m128i {M128}
63 | /// __m128i {M128}
64 | /// int {IMM}
65 | /// __m128i dst {M128}
66 | public static __m128i _mm_clmulepi64_si128(__m128i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(a.SI64, b.SI64, (byte)imm8);
67 |
68 | ///
69 | /// Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst".
70 | ///
71 | /// POPCNT r32, r32
72 | /// int {UI32}
73 | /// int dst {UI32}
74 | public static int _mm_popcnt_u32(int a) => (int)System.Runtime.Intrinsics.X86.Popcnt.PopCount((uint)a);
75 |
76 | ///
77 | /// Count the number of bits set to 1 in unsigned 64-bit integer "a", and return that count in "dst".
78 | ///
79 | /// POPCNT r64, r64
80 | /// long {UI64}
81 | /// long dst {UI64}
82 | public static long _mm_popcnt_u64(long a) => (long)System.Runtime.Intrinsics.X86.Popcnt.X64.PopCount((ulong)a);
83 |
84 | ///
85 | /// Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst".
86 | ///
87 | /// TZCNT r32, r32
88 | /// int {UI32}
89 | /// int dst {UI32}
90 | public static int _mm_tzcnt_32(int a) => (int)System.Runtime.Intrinsics.X86.Bmi1.TrailingZeroCount((uint)a);
91 |
92 | ///
93 | /// Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst".
94 | ///
95 | /// TZCNT r64, r64
96 | /// long {UI64}
97 | /// long dst {UI64}
98 | public static long _mm_tzcnt_64(long a) => (long)System.Runtime.Intrinsics.X86.Bmi1.X64.TrailingZeroCount((ulong)a);
99 |
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 | *.DotSettings.user
13 |
14 | # User-specific files (MonoDevelop/Xamarin Studio)
15 | *.userprefs
16 |
17 | # Mono auto generated files
18 | mono_crash.*
19 |
20 | # Build results
21 | [Dd]ebug/
22 | [Dd]ebugPublic/
23 | [Rr]elease/
24 | [Rr]eleases/
25 | x64/
26 | x86/
27 | [Ww][Ii][Nn]32/
28 | [Aa][Rr][Mm]/
29 | [Aa][Rr][Mm]64/
30 | bld/
31 | [Bb]in/
32 | [Oo]bj/
33 | [Ll]og/
34 | [Ll]ogs/
35 | .idea/
36 |
37 | # Visual Studio 2015/2017 cache/options directory
38 | .vs/
39 | # Uncomment if you have tasks that create the project's static files in wwwroot
40 | #wwwroot/
41 |
42 | # Visual Studio 2017 auto generated files
43 | Generated\ Files/
44 |
45 | # MSTest test Results
46 | [Tt]est[Rr]esult*/
47 | [Bb]uild[Ll]og.*
48 |
49 | # NUnit
50 | *.VisualState.xml
51 | TestResult.xml
52 | nunit-*.xml
53 |
54 | # Build Results of an ATL Project
55 | [Dd]ebugPS/
56 | [Rr]eleasePS/
57 | dlldata.c
58 |
59 | # Benchmark Results
60 | BenchmarkDotNet.Artifacts/
61 |
62 | # .NET Core
63 | project.lock.json
64 | project.fragment.lock.json
65 | artifacts/
66 |
67 | # ASP.NET Scaffolding
68 | ScaffoldingReadMe.txt
69 |
70 | # StyleCop
71 | StyleCopReport.xml
72 |
73 | # Files built by Visual Studio
74 | *_i.c
75 | *_p.c
76 | *_h.h
77 | *.ilk
78 | *.meta
79 | *.obj
80 | *.iobj
81 | *.pch
82 | *.pdb
83 | *.ipdb
84 | *.pgc
85 | *.pgd
86 | *.rsp
87 | *.sbr
88 | *.tlb
89 | *.tli
90 | *.tlh
91 | *.tmp
92 | *.tmp_proj
93 | *_wpftmp.csproj
94 | *.log
95 | *.vspscc
96 | *.vssscc
97 | .builds
98 | *.pidb
99 | *.svclog
100 | *.scc
101 |
102 | # Chutzpah Test files
103 | _Chutzpah*
104 |
105 | # Visual C++ cache files
106 | ipch/
107 | *.aps
108 | *.ncb
109 | *.opendb
110 | *.opensdf
111 | *.sdf
112 | *.cachefile
113 | *.VC.db
114 | *.VC.VC.opendb
115 |
116 | # Visual Studio profiler
117 | *.psess
118 | *.vsp
119 | *.vspx
120 | *.sap
121 |
122 | # Visual Studio Trace Files
123 | *.e2e
124 |
125 | # TFS 2012 Local Workspace
126 | $tf/
127 |
128 | # Guidance Automation Toolkit
129 | *.gpState
130 |
131 | # ReSharper is a .NET coding add-in
132 | _ReSharper*/
133 | *.[Rr]e[Ss]harper
134 | *.DotSettings.user
135 |
136 | # TeamCity is a build add-in
137 | _TeamCity*
138 |
139 | # DotCover is a Code Coverage Tool
140 | *.dotCover
141 |
142 | # AxoCover is a Code Coverage Tool
143 | .axoCover/*
144 | !.axoCover/settings.json
145 |
146 | # Coverlet is a free, cross platform Code Coverage Tool
147 | coverage*.json
148 | coverage*.xml
149 | coverage*.info
150 |
151 | # Visual Studio code coverage results
152 | *.coverage
153 | *.coveragexml
154 |
155 | # NCrunch
156 | _NCrunch_*
157 | .*crunch*.local.xml
158 | nCrunchTemp_*
159 |
160 | # MightyMoose
161 | *.mm.*
162 | AutoTest.Net/
163 |
164 | # Web workbench (sass)
165 | .sass-cache/
166 |
167 | # Installshield output folder
168 | [Ee]xpress/
169 |
170 | # DocProject is a documentation generator add-in
171 | DocProject/buildhelp/
172 | DocProject/Help/*.HxT
173 | DocProject/Help/*.HxC
174 | DocProject/Help/*.hhc
175 | DocProject/Help/*.hhk
176 | DocProject/Help/*.hhp
177 | DocProject/Help/Html2
178 | DocProject/Help/html
179 |
180 | # Click-Once directory
181 | publish/
182 |
183 | # Publish Web Output
184 | *.[Pp]ublish.xml
185 | *.azurePubxml
186 | # Note: Comment the next line if you want to checkin your web deploy settings,
187 | # but database connection strings (with potential passwords) will be unencrypted
188 | *.pubxml
189 | *.publishproj
190 |
191 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
192 | # checkin your Azure Web App publish settings, but sensitive information contained
193 | # in these scripts will be unencrypted
194 | PublishScripts/
195 |
196 | # NuGet Packages
197 | *.nupkg
198 | # NuGet Symbol Packages
199 | *.snupkg
200 | # The packages folder can be ignored because of Package Restore
201 | **/[Pp]ackages/*
202 | # except build/, which is used as an MSBuild target.
203 | !**/[Pp]ackages/build/
204 | # Uncomment if necessary however generally it will be regenerated when needed
205 | #!**/[Pp]ackages/repositories.config
206 | # NuGet v3's project.json files produces more ignorable files
207 | *.nuget.props
208 | *.nuget.targets
209 |
210 | # Microsoft Azure Build Output
211 | csx/
212 | *.build.csdef
213 |
214 | # Microsoft Azure Emulator
215 | ecf/
216 | rcf/
217 |
218 | # Windows Store app package directories and files
219 | AppPackages/
220 | BundleArtifacts/
221 | Package.StoreAssociation.xml
222 | _pkginfo.txt
223 | *.appx
224 | *.appxbundle
225 | *.appxupload
226 |
227 | # Visual Studio cache files
228 | # files ending in .cache can be ignored
229 | *.[Cc]ache
230 | # but keep track of directories ending in .cache
231 | !?*.[Cc]ache/
232 |
233 | # Others
234 | ClientBin/
235 | ~$*
236 | *~
237 | *.dbmdl
238 | *.dbproj.schemaview
239 | *.jfm
240 | *.pfx
241 | *.publishsettings
242 | orleans.codegen.cs
243 |
244 | # Including strong name files can present a security risk
245 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
246 | #*.snk
247 |
248 | # Since there are multiple workflows, uncomment next line to ignore bower_components
249 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
250 | #bower_components/
251 |
252 | # RIA/Silverlight projects
253 | Generated_Code/
254 |
255 | # Backup & report files from converting an old project file
256 | # to a newer Visual Studio version. Backup files are not needed,
257 | # because we have git ;-)
258 | _UpgradeReport_Files/
259 | Backup*/
260 | UpgradeLog*.XML
261 | UpgradeLog*.htm
262 | ServiceFabricBackup/
263 | *.rptproj.bak
264 |
265 | # SQL Server files
266 | *.mdf
267 | *.ldf
268 | *.ndf
269 |
270 | # Business Intelligence projects
271 | *.rdl.data
272 | *.bim.layout
273 | *.bim_*.settings
274 | *.rptproj.rsuser
275 | *- [Bb]ackup.rdl
276 | *- [Bb]ackup ([0-9]).rdl
277 | *- [Bb]ackup ([0-9][0-9]).rdl
278 |
279 | # Microsoft Fakes
280 | FakesAssemblies/
281 |
282 | # GhostDoc plugin setting file
283 | *.GhostDoc.xml
284 |
285 | # Node.js Tools for Visual Studio
286 | .ntvs_analysis.dat
287 | node_modules/
288 |
289 | # Visual Studio 6 build log
290 | *.plg
291 |
292 | # Visual Studio 6 workspace options file
293 | *.opt
294 |
295 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
296 | *.vbw
297 |
298 | # Visual Studio LightSwitch build output
299 | **/*.HTMLClient/GeneratedArtifacts
300 | **/*.DesktopClient/GeneratedArtifacts
301 | **/*.DesktopClient/ModelManifest.xml
302 | **/*.Server/GeneratedArtifacts
303 | **/*.Server/ModelManifest.xml
304 | _Pvt_Extensions
305 |
306 | # Paket dependency manager
307 | .paket/paket.exe
308 | paket-files/
309 |
310 | # FAKE - F# Make
311 | .fake/
312 |
313 | # CodeRush personal settings
314 | .cr/personal
315 |
316 | # Python Tools for Visual Studio (PTVS)
317 | __pycache__/
318 | *.pyc
319 |
320 | # Cake - Uncomment if you are using it
321 | # tools/**
322 | # !tools/packages.config
323 |
324 | # Tabs Studio
325 | *.tss
326 |
327 | # Telerik's JustMock configuration file
328 | *.jmconfig
329 |
330 | # BizTalk build output
331 | *.btp.cs
332 | *.btm.cs
333 | *.odx.cs
334 | *.xsd.cs
335 |
336 | # OpenCover UI analysis results
337 | OpenCover/
338 |
339 | # Azure Stream Analytics local run output
340 | ASALocalRun/
341 |
342 | # MSBuild Binary and Structured Log
343 | *.binlog
344 |
345 | # NVidia Nsight GPU debugger configuration file
346 | *.nvuser
347 |
348 | # MFractors (Xamarin productivity tool) working folder
349 | .mfractor/
350 |
351 | # Local History for Visual Studio
352 | .localhistory/
353 |
354 | # BeatPulse healthcheck temp database
355 | healthchecksdb
356 |
357 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
358 | MigrationBackup/
359 |
360 | # Ionide (cross platform F# VS Code tools) working folder
361 | .ionide/
362 |
363 | # Fody - auto-generated XML schema
364 | FodyWeavers.xsd
--------------------------------------------------------------------------------
/RawIntrinsics/SSSE3.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static unsafe partial class SSSE3
4 | {
5 | ///
6 | /// Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst".
7 | ///
8 | /// PABSW xmm, xmm
9 | /// __m128i {SI16}
10 | /// __m128i dst {UI16}
11 | public static __m128i _mm_abs_epi16(__m128i a) => System.Runtime.Intrinsics.X86.Ssse3.Abs(a.SI16);
12 |
13 | ///
14 | /// Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst".
15 | ///
16 | /// PABSD xmm, xmm
17 | /// __m128i {SI32}
18 | /// __m128i dst {UI32}
19 | public static __m128i _mm_abs_epi32(__m128i a) => System.Runtime.Intrinsics.X86.Ssse3.Abs(a.SI32);
20 |
21 | ///
22 | /// Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst".
23 | ///
24 | /// PABSB xmm, xmm
25 | /// __m128i {SI8}
26 | /// __m128i dst {UI8}
27 | public static __m128i _mm_abs_epi8(__m128i a) => System.Runtime.Intrinsics.X86.Ssse3.Abs(a.SI8);
28 |
29 | ///
30 | /// Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst".
31 | ///
32 | /// PALIGNR xmm, xmm, imm8
33 | /// __m128i {UI8}
34 | /// __m128i {UI8}
35 | /// int {IMM}
36 | /// __m128i dst {UI8}
37 | public static __m128i _mm_alignr_epi8(__m128i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Ssse3.AlignRight(a.UI8, b.UI8, (byte)imm8);
38 |
39 | ///
40 | /// Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".
41 | ///
42 | /// PHADDW xmm, xmm
43 | /// __m128i {SI16}
44 | /// __m128i {SI16}
45 | /// __m128i dst {SI16}
46 | public static __m128i _mm_hadd_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalAdd(a.SI16, b.SI16);
47 |
48 | ///
49 | /// Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".
50 | ///
51 | /// PHADDD xmm, xmm
52 | /// __m128i {SI32}
53 | /// __m128i {SI32}
54 | /// __m128i dst {SI32}
55 | public static __m128i _mm_hadd_epi32(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalAdd(a.SI32, b.SI32);
56 |
57 | ///
58 | /// Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".
59 | ///
60 | /// PHADDSW xmm, xmm
61 | /// __m128i {SI16}
62 | /// __m128i {SI16}
63 | /// __m128i dst {SI16}
64 | public static __m128i _mm_hadds_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalAddSaturate(a.SI16, b.SI16);
65 |
66 | ///
67 | /// Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".
68 | ///
69 | /// PHSUBW xmm, xmm
70 | /// __m128i {SI16}
71 | /// __m128i {SI16}
72 | /// __m128i dst {SI16}
73 | public static __m128i _mm_hsub_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalSubtract(a.SI16, b.SI16);
74 |
75 | ///
76 | /// Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".
77 | ///
78 | /// PHSUBD xmm, xmm
79 | /// __m128i {SI32}
80 | /// __m128i {SI32}
81 | /// __m128i dst {SI32}
82 | public static __m128i _mm_hsub_epi32(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalSubtract(a.SI32, b.SI32);
83 |
84 | ///
85 | /// Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".
86 | ///
87 | /// PHSUBSW xmm, xmm
88 | /// __m128i {SI16}
89 | /// __m128i {SI16}
90 | /// __m128i dst {SI16}
91 | public static __m128i _mm_hsubs_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.HorizontalSubtractSaturate(a.SI16, b.SI16);
92 |
93 | ///
94 | /// Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".
95 | ///
96 | /// PMADDUBSW xmm, xmm
97 | /// __m128i {UI8}
98 | /// __m128i {SI8}
99 | /// __m128i dst {SI16}
100 | public static __m128i _mm_maddubs_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.MultiplyAddAdjacent(a.UI8, b.SI8);
101 |
102 | ///
103 | /// Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst".
104 | ///
105 | /// PMULHRSW xmm, xmm
106 | /// __m128i {SI16}
107 | /// __m128i {SI16}
108 | /// __m128i dst {UI16}
109 | public static __m128i _mm_mulhrs_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.MultiplyHighRoundScale(a.SI16, b.SI16);
110 |
111 | ///
112 | /// Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".
113 | ///
114 | /// PSHUFB xmm, xmm
115 | /// __m128i {UI8}
116 | /// __m128i {UI8}
117 | /// __m128i dst {UI8}
118 | public static __m128i _mm_shuffle_epi8(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.Shuffle(a.UI8, b.UI8);
119 |
120 | ///
121 | /// Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.
122 | ///
123 | /// PSIGNW xmm, xmm
124 | /// __m128i {SI16}
125 | /// __m128i {SI16}
126 | /// __m128i dst {UI16}
127 | public static __m128i _mm_sign_epi16(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.Sign(a.SI16, b.SI16);
128 |
129 | ///
130 | /// Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.
131 | ///
132 | /// PSIGND xmm, xmm
133 | /// __m128i {SI32}
134 | /// __m128i {SI32}
135 | /// __m128i dst {UI32}
136 | public static __m128i _mm_sign_epi32(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.Sign(a.SI32, b.SI32);
137 |
138 | ///
139 | /// Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.
140 | ///
141 | /// PSIGNB xmm, xmm
142 | /// __m128i {SI8}
143 | /// __m128i {SI8}
144 | /// __m128i dst {UI8}
145 | public static __m128i _mm_sign_epi8(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Ssse3.Sign(a.SI8, b.SI8);
146 |
147 | }
148 | }
149 |
--------------------------------------------------------------------------------
/RawIntrinsics/Types.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public struct __m64
4 | {
5 | private System.Runtime.Intrinsics.Vector64 _;
6 | public System.Runtime.Intrinsics.Vector64 UI8 => System.Runtime.Intrinsics.Vector64.AsByte(_);
7 | public System.Runtime.Intrinsics.Vector64 SI8 => System.Runtime.Intrinsics.Vector64.AsSByte(_);
8 | public System.Runtime.Intrinsics.Vector64 UI16 => System.Runtime.Intrinsics.Vector64.AsUInt16(_);
9 | public System.Runtime.Intrinsics.Vector64 SI16 => System.Runtime.Intrinsics.Vector64.AsInt16(_);
10 | public System.Runtime.Intrinsics.Vector64 UI32 => System.Runtime.Intrinsics.Vector64.AsUInt32(_);
11 | public System.Runtime.Intrinsics.Vector64 SI32 => System.Runtime.Intrinsics.Vector64.AsInt32(_);
12 | public System.Runtime.Intrinsics.Vector64 UI64 => System.Runtime.Intrinsics.Vector64.AsUInt64(_);
13 | public System.Runtime.Intrinsics.Vector64 SI64 => System.Runtime.Intrinsics.Vector64.AsInt64(_);
14 | public System.Runtime.Intrinsics.Vector64 FP32 => System.Runtime.Intrinsics.Vector64.AsSingle(_);
15 | public System.Runtime.Intrinsics.Vector64 FP64 => System.Runtime.Intrinsics.Vector64.AsDouble(_);
16 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
17 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
18 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
19 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
20 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
21 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
22 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
23 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
24 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
25 | public static implicit operator __m64(System.Runtime.Intrinsics.Vector64 v) => new __m64 { _ = System.Runtime.Intrinsics.Vector64.AsByte(v) };
26 | }
27 |
28 | public struct __m128
29 | {
30 | private System.Runtime.Intrinsics.Vector128 _;
31 | public System.Runtime.Intrinsics.Vector128 UI8 => System.Runtime.Intrinsics.Vector128.AsByte(_);
32 | public System.Runtime.Intrinsics.Vector128 SI8 => System.Runtime.Intrinsics.Vector128.AsSByte(_);
33 | public System.Runtime.Intrinsics.Vector128 UI16 => System.Runtime.Intrinsics.Vector128.AsUInt16(_);
34 | public System.Runtime.Intrinsics.Vector128 SI16 => System.Runtime.Intrinsics.Vector128.AsInt16(_);
35 | public System.Runtime.Intrinsics.Vector128 UI32 => System.Runtime.Intrinsics.Vector128.AsUInt32(_);
36 | public System.Runtime.Intrinsics.Vector128 SI32 => System.Runtime.Intrinsics.Vector128.AsInt32(_);
37 | public System.Runtime.Intrinsics.Vector128 UI64 => System.Runtime.Intrinsics.Vector128.AsUInt64(_);
38 | public System.Runtime.Intrinsics.Vector128 SI64 => System.Runtime.Intrinsics.Vector128.AsInt64(_);
39 | public System.Runtime.Intrinsics.Vector128 FP32 => System.Runtime.Intrinsics.Vector128.AsSingle(_);
40 | public System.Runtime.Intrinsics.Vector128 FP64 => System.Runtime.Intrinsics.Vector128.AsDouble(_);
41 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
42 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
43 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
44 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
45 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
46 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
47 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
48 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
49 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
50 | public static implicit operator __m128(System.Runtime.Intrinsics.Vector128 v) => new __m128 { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
51 | }
52 |
53 | public struct __m128i
54 | {
55 | private System.Runtime.Intrinsics.Vector128 _;
56 | public System.Runtime.Intrinsics.Vector128 UI8 => System.Runtime.Intrinsics.Vector128.AsByte(_);
57 | public System.Runtime.Intrinsics.Vector128 SI8 => System.Runtime.Intrinsics.Vector128.AsSByte(_);
58 | public System.Runtime.Intrinsics.Vector128 UI16 => System.Runtime.Intrinsics.Vector128.AsUInt16(_);
59 | public System.Runtime.Intrinsics.Vector128 SI16 => System.Runtime.Intrinsics.Vector128.AsInt16(_);
60 | public System.Runtime.Intrinsics.Vector128 UI32 => System.Runtime.Intrinsics.Vector128.AsUInt32(_);
61 | public System.Runtime.Intrinsics.Vector128 SI32 => System.Runtime.Intrinsics.Vector128.AsInt32(_);
62 | public System.Runtime.Intrinsics.Vector128 UI64 => System.Runtime.Intrinsics.Vector128.AsUInt64(_);
63 | public System.Runtime.Intrinsics.Vector128 SI64 => System.Runtime.Intrinsics.Vector128.AsInt64(_);
64 | public System.Runtime.Intrinsics.Vector128 FP32 => System.Runtime.Intrinsics.Vector128.AsSingle(_);
65 | public System.Runtime.Intrinsics.Vector128 FP64 => System.Runtime.Intrinsics.Vector128.AsDouble(_);
66 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
67 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
68 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
69 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
70 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
71 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
72 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
73 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
74 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
75 | public static implicit operator __m128i(System.Runtime.Intrinsics.Vector128 v) => new __m128i { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
76 | }
77 |
78 | public struct __m128d
79 | {
80 | private System.Runtime.Intrinsics.Vector128 _;
81 | public System.Runtime.Intrinsics.Vector128 UI8 => System.Runtime.Intrinsics.Vector128.AsByte(_);
82 | public System.Runtime.Intrinsics.Vector128 SI8 => System.Runtime.Intrinsics.Vector128.AsSByte(_);
83 | public System.Runtime.Intrinsics.Vector128 UI16 => System.Runtime.Intrinsics.Vector128.AsUInt16(_);
84 | public System.Runtime.Intrinsics.Vector128 SI16 => System.Runtime.Intrinsics.Vector128.AsInt16(_);
85 | public System.Runtime.Intrinsics.Vector128 UI32 => System.Runtime.Intrinsics.Vector128.AsUInt32(_);
86 | public System.Runtime.Intrinsics.Vector128 SI32 => System.Runtime.Intrinsics.Vector128.AsInt32(_);
87 | public System.Runtime.Intrinsics.Vector128 UI64 => System.Runtime.Intrinsics.Vector128.AsUInt64(_);
88 | public System.Runtime.Intrinsics.Vector128 SI64 => System.Runtime.Intrinsics.Vector128.AsInt64(_);
89 | public System.Runtime.Intrinsics.Vector128 FP32 => System.Runtime.Intrinsics.Vector128.AsSingle(_);
90 | public System.Runtime.Intrinsics.Vector128 FP64 => System.Runtime.Intrinsics.Vector128.AsDouble(_);
91 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
92 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
93 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
94 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
95 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
96 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
97 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
98 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
99 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
100 | public static implicit operator __m128d(System.Runtime.Intrinsics.Vector128 v) => new __m128d { _ = System.Runtime.Intrinsics.Vector128.AsByte(v) };
101 | }
102 |
103 | public struct __m256
104 | {
105 | private System.Runtime.Intrinsics.Vector256 _;
106 | public System.Runtime.Intrinsics.Vector256 UI8 => System.Runtime.Intrinsics.Vector256.AsByte(_);
107 | public System.Runtime.Intrinsics.Vector256 SI8 => System.Runtime.Intrinsics.Vector256.AsSByte(_);
108 | public System.Runtime.Intrinsics.Vector256 UI16 => System.Runtime.Intrinsics.Vector256.AsUInt16(_);
109 | public System.Runtime.Intrinsics.Vector256 SI16 => System.Runtime.Intrinsics.Vector256.AsInt16(_);
110 | public System.Runtime.Intrinsics.Vector256 UI32 => System.Runtime.Intrinsics.Vector256.AsUInt32(_);
111 | public System.Runtime.Intrinsics.Vector256 SI32 => System.Runtime.Intrinsics.Vector256.AsInt32(_);
112 | public System.Runtime.Intrinsics.Vector256 UI64 => System.Runtime.Intrinsics.Vector256.AsUInt64(_);
113 | public System.Runtime.Intrinsics.Vector256 SI64 => System.Runtime.Intrinsics.Vector256.AsInt64(_);
114 | public System.Runtime.Intrinsics.Vector256 FP32 => System.Runtime.Intrinsics.Vector256.AsSingle(_);
115 | public System.Runtime.Intrinsics.Vector256 FP64 => System.Runtime.Intrinsics.Vector256.AsDouble(_);
116 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
117 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
118 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
119 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
120 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
121 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
122 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
123 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
124 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
125 | public static implicit operator __m256(System.Runtime.Intrinsics.Vector256 v) => new __m256 { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
126 | }
127 |
128 | public struct __m256i
129 | {
130 | private System.Runtime.Intrinsics.Vector256 _;
131 | public System.Runtime.Intrinsics.Vector256 UI8 => System.Runtime.Intrinsics.Vector256.AsByte(_);
132 | public System.Runtime.Intrinsics.Vector256 SI8 => System.Runtime.Intrinsics.Vector256.AsSByte(_);
133 | public System.Runtime.Intrinsics.Vector256 UI16 => System.Runtime.Intrinsics.Vector256.AsUInt16(_);
134 | public System.Runtime.Intrinsics.Vector256 SI16 => System.Runtime.Intrinsics.Vector256.AsInt16(_);
135 | public System.Runtime.Intrinsics.Vector256 UI32 => System.Runtime.Intrinsics.Vector256.AsUInt32(_);
136 | public System.Runtime.Intrinsics.Vector256 SI32 => System.Runtime.Intrinsics.Vector256.AsInt32(_);
137 | public System.Runtime.Intrinsics.Vector256 UI64 => System.Runtime.Intrinsics.Vector256.AsUInt64(_);
138 | public System.Runtime.Intrinsics.Vector256 SI64 => System.Runtime.Intrinsics.Vector256.AsInt64(_);
139 | public System.Runtime.Intrinsics.Vector256 FP32 => System.Runtime.Intrinsics.Vector256.AsSingle(_);
140 | public System.Runtime.Intrinsics.Vector256 FP64 => System.Runtime.Intrinsics.Vector256.AsDouble(_);
141 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
142 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
143 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
144 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
145 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
146 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
147 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
148 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
149 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
150 | public static implicit operator __m256i(System.Runtime.Intrinsics.Vector256 v) => new __m256i { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
151 | }
152 |
153 | public struct __m256d
154 | {
155 | private System.Runtime.Intrinsics.Vector256 _;
156 | public System.Runtime.Intrinsics.Vector256 UI8 => System.Runtime.Intrinsics.Vector256.AsByte(_);
157 | public System.Runtime.Intrinsics.Vector256 SI8 => System.Runtime.Intrinsics.Vector256.AsSByte(_);
158 | public System.Runtime.Intrinsics.Vector256 UI16 => System.Runtime.Intrinsics.Vector256.AsUInt16(_);
159 | public System.Runtime.Intrinsics.Vector256 SI16 => System.Runtime.Intrinsics.Vector256.AsInt16(_);
160 | public System.Runtime.Intrinsics.Vector256 UI32 => System.Runtime.Intrinsics.Vector256.AsUInt32(_);
161 | public System.Runtime.Intrinsics.Vector256 SI32 => System.Runtime.Intrinsics.Vector256.AsInt32(_);
162 | public System.Runtime.Intrinsics.Vector256 UI64 => System.Runtime.Intrinsics.Vector256.AsUInt64(_);
163 | public System.Runtime.Intrinsics.Vector256 SI64 => System.Runtime.Intrinsics.Vector256.AsInt64(_);
164 | public System.Runtime.Intrinsics.Vector256 FP32 => System.Runtime.Intrinsics.Vector256.AsSingle(_);
165 | public System.Runtime.Intrinsics.Vector256 FP64 => System.Runtime.Intrinsics.Vector256.AsDouble(_);
166 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
167 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
168 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
169 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
170 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
171 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
172 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
173 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
174 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
175 | public static implicit operator __m256d(System.Runtime.Intrinsics.Vector256 v) => new __m256d { _ = System.Runtime.Intrinsics.Vector256.AsByte(v) };
176 | }
177 |
178 | }
179 |
--------------------------------------------------------------------------------
/RawIntrinsics/FMA.cs:
--------------------------------------------------------------------------------
1 | namespace RawIntrinsics
2 | {
3 | public static unsafe partial class FMA
4 | {
5 | ///
6 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".
7 | ///
8 | /// VFMADD132PD xmm, xmm, xmm
9 | /// __m128d {FP64}
10 | /// __m128d {FP64}
11 | /// __m128d {FP64}
12 | /// __m128d dst {FP64}
13 | public static __m128d _mm_fmadd_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(a.FP64, b.FP64, c.FP64);
14 |
15 | ///
16 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".
17 | ///
18 | /// VFMADD132PS xmm, xmm, xmm
19 | /// __m128 {FP32}
20 | /// __m128 {FP32}
21 | /// __m128 {FP32}
22 | /// __m128 dst {FP32}
23 | public static __m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(a.FP32, b.FP32, c.FP32);
24 |
25 | ///
26 | /// Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
27 | ///
28 | /// VFMADD132SD xmm, xmm, xmm
29 | /// __m128d {FP64}
30 | /// __m128d {FP64}
31 | /// __m128d {FP64}
32 | /// __m128d dst {FP64}
33 | public static __m128d _mm_fmadd_sd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddScalar(a.FP64, b.FP64, c.FP64);
34 |
35 | ///
36 | /// Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
37 | ///
38 | /// VFMADD132SS xmm, xmm, xmm
39 | /// __m128 {FP32}
40 | /// __m128 {FP32}
41 | /// __m128 {FP32}
42 | /// __m128 dst {FP32}
43 | public static __m128 _mm_fmadd_ss(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddScalar(a.FP32, b.FP32, c.FP32);
44 |
45 | ///
46 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".
47 | ///
48 | /// VFMADDSUB132PD xmm, xmm, xmm
49 | /// __m128d {FP64}
50 | /// __m128d {FP64}
51 | /// __m128d {FP64}
52 | /// __m128d dst {FP64}
53 | public static __m128d _mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddSubtract(a.FP64, b.FP64, c.FP64);
54 |
55 | ///
56 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".
57 | ///
58 | /// VFMADDSUB132PS xmm, xmm, xmm
59 | /// __m128 {FP32}
60 | /// __m128 {FP32}
61 | /// __m128 {FP32}
62 | /// __m128 dst {FP32}
63 | public static __m128 _mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddSubtract(a.FP32, b.FP32, c.FP32);
64 |
65 | ///
66 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".
67 | ///
68 | /// VFMSUB132PD xmm, xmm, xmm
69 | /// __m128d {FP64}
70 | /// __m128d {FP64}
71 | /// __m128d {FP64}
72 | /// __m128d dst {FP64}
73 | public static __m128d _mm_fmsub_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtract(a.FP64, b.FP64, c.FP64);
74 |
75 | ///
76 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".
77 | ///
78 | /// VFMSUB132PS xmm, xmm, xmm
79 | /// __m128 {FP32}
80 | /// __m128 {FP32}
81 | /// __m128 {FP32}
82 | /// __m128 dst {FP32}
83 | public static __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtract(a.FP32, b.FP32, c.FP32);
84 |
85 | ///
86 | /// Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
87 | ///
88 | /// VFMSUB132SD xmm, xmm, xmm
89 | /// __m128d {FP64}
90 | /// __m128d {FP64}
91 | /// __m128d {FP64}
92 | /// __m128d dst {FP64}
93 | public static __m128d _mm_fmsub_sd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractScalar(a.FP64, b.FP64, c.FP64);
94 |
95 | ///
96 | /// Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
97 | ///
98 | /// VFMSUB132SS xmm, xmm, xmm
99 | /// __m128 {FP32}
100 | /// __m128 {FP32}
101 | /// __m128 {FP32}
102 | /// __m128 dst {FP32}
103 | public static __m128 _mm_fmsub_ss(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractScalar(a.FP32, b.FP32, c.FP32);
104 |
105 | ///
106 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".
107 | ///
108 | /// VFMSUBADD132PD xmm, xmm, xmm
109 | /// __m128d {FP64}
110 | /// __m128d {FP64}
111 | /// __m128d {FP64}
112 | /// __m128d dst {FP64}
113 | public static __m128d _mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractAdd(a.FP64, b.FP64, c.FP64);
114 |
115 | ///
116 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".
117 | ///
118 | /// VFMSUBADD132PS xmm, xmm, xmm
119 | /// __m128 {FP32}
120 | /// __m128 {FP32}
121 | /// __m128 {FP32}
122 | /// __m128 dst {FP32}
123 | public static __m128 _mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractAdd(a.FP32, b.FP32, c.FP32);
124 |
125 | ///
126 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".
127 | ///
128 | /// VFNMADD132PD xmm, xmm, xmm
129 | /// __m128d {FP64}
130 | /// __m128d {FP64}
131 | /// __m128d {FP64}
132 | /// __m128d dst {FP64}
133 | public static __m128d _mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegated(a.FP64, b.FP64, c.FP64);
134 |
135 | ///
136 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".
137 | ///
138 | /// VFNMADD132PS xmm, xmm, xmm
139 | /// __m128 {FP32}
140 | /// __m128 {FP32}
141 | /// __m128 {FP32}
142 | /// __m128 dst {FP32}
143 | public static __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegated(a.FP32, b.FP32, c.FP32);
144 |
145 | ///
146 | /// Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
147 | ///
148 | /// VFNMADD132SD xmm, xmm, xmm
149 | /// __m128d {FP64}
150 | /// __m128d {FP64}
151 | /// __m128d {FP64}
152 | /// __m128d dst {FP64}
153 | public static __m128d _mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegatedScalar(a.FP64, b.FP64, c.FP64);
154 |
155 | ///
156 | /// Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
157 | ///
158 | /// VFNMADD132SS xmm, xmm, xmm
159 | /// __m128 {FP32}
160 | /// __m128 {FP32}
161 | /// __m128 {FP32}
162 | /// __m128 dst {FP32}
163 | public static __m128 _mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegatedScalar(a.FP32, b.FP32, c.FP32);
164 |
165 | ///
166 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".
167 | ///
168 | /// VFNMSUB132PD xmm, xmm, xmm
169 | /// __m128d {FP64}
170 | /// __m128d {FP64}
171 | /// __m128d {FP64}
172 | /// __m128d dst {FP64}
173 | public static __m128d _mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegated(a.FP64, b.FP64, c.FP64);
174 |
175 | ///
176 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".
177 | ///
178 | /// VFNMSUB132PS xmm, xmm, xmm
179 | /// __m128 {FP32}
180 | /// __m128 {FP32}
181 | /// __m128 {FP32}
182 | /// __m128 dst {FP32}
183 | public static __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegated(a.FP32, b.FP32, c.FP32);
184 |
185 | ///
186 | /// Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
187 | ///
188 | /// VFNMSUB132SD xmm, xmm, xmm
189 | /// __m128d {FP64}
190 | /// __m128d {FP64}
191 | /// __m128d {FP64}
192 | /// __m128d dst {FP64}
193 | public static __m128d _mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegatedScalar(a.FP64, b.FP64, c.FP64);
194 |
195 | ///
196 | /// Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
197 | ///
198 | /// VFNMSUB132SS xmm, xmm, xmm
199 | /// __m128 {FP32}
200 | /// __m128 {FP32}
201 | /// __m128 {FP32}
202 | /// __m128 dst {FP32}
203 | public static __m128 _mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegatedScalar(a.FP32, b.FP32, c.FP32);
204 |
205 | ///
206 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".
207 | ///
208 | /// VFMADD132PD ymm, ymm, ymm
209 | /// __m256d {FP64}
210 | /// __m256d {FP64}
211 | /// __m256d {FP64}
212 | /// __m256d dst {FP64}
213 | public static __m256d _mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(a.FP64, b.FP64, c.FP64);
214 |
215 | ///
216 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".
217 | ///
218 | /// VFMADD132PS ymm, ymm, ymm
219 | /// __m256 {FP32}
220 | /// __m256 {FP32}
221 | /// __m256 {FP32}
222 | /// __m256 dst {FP32}
223 | public static __m256 _mm256_fmadd_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(a.FP32, b.FP32, c.FP32);
224 |
225 | ///
226 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".
227 | ///
228 | /// VFMADDSUB132PD ymm, ymm, ymm
229 | /// __m256d {FP64}
230 | /// __m256d {FP64}
231 | /// __m256d {FP64}
232 | /// __m256d dst {FP64}
233 | public static __m256d _mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddSubtract(a.FP64, b.FP64, c.FP64);
234 |
235 | ///
236 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".
237 | ///
238 | /// VFMADDSUB132PS ymm, ymm, ymm
239 | /// __m256 {FP32}
240 | /// __m256 {FP32}
241 | /// __m256 {FP32}
242 | /// __m256 dst {FP32}
243 | public static __m256 _mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddSubtract(a.FP32, b.FP32, c.FP32);
244 |
245 | ///
246 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".
247 | ///
248 | /// VFMSUB132PD ymm, ymm, ymm
249 | /// __m256d {FP64}
250 | /// __m256d {FP64}
251 | /// __m256d {FP64}
252 | /// __m256d dst {FP64}
253 | public static __m256d _mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtract(a.FP64, b.FP64, c.FP64);
254 |
255 | ///
256 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".
257 | ///
258 | /// VFMSUB132PS ymm, ymm, ymm
259 | /// __m256 {FP32}
260 | /// __m256 {FP32}
261 | /// __m256 {FP32}
262 | /// __m256 dst {FP32}
263 | public static __m256 _mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtract(a.FP32, b.FP32, c.FP32);
264 |
265 | ///
266 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".
267 | ///
268 | /// VFMSUBADD132PD ymm, ymm, ymm
269 | /// __m256d {FP64}
270 | /// __m256d {FP64}
271 | /// __m256d {FP64}
272 | /// __m256d dst {FP64}
273 | public static __m256d _mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractAdd(a.FP64, b.FP64, c.FP64);
274 |
275 | ///
276 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".
277 | ///
278 | /// VFMSUBADD132PS ymm, ymm, ymm
279 | /// __m256 {FP32}
280 | /// __m256 {FP32}
281 | /// __m256 {FP32}
282 | /// __m256 dst {FP32}
283 | public static __m256 _mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractAdd(a.FP32, b.FP32, c.FP32);
284 |
285 | ///
286 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".
287 | ///
288 | /// VFNMADD132PD ymm, ymm, ymm
289 | /// __m256d {FP64}
290 | /// __m256d {FP64}
291 | /// __m256d {FP64}
292 | /// __m256d dst {FP64}
293 | public static __m256d _mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegated(a.FP64, b.FP64, c.FP64);
294 |
295 | ///
296 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".
297 | ///
298 | /// VFNMADD132PS ymm, ymm, ymm
299 | /// __m256 {FP32}
300 | /// __m256 {FP32}
301 | /// __m256 {FP32}
302 | /// __m256 dst {FP32}
303 | public static __m256 _mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplyAddNegated(a.FP32, b.FP32, c.FP32);
304 |
305 | ///
306 | /// Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".
307 | ///
308 | /// VFNMSUB132PD ymm, ymm, ymm
309 | /// __m256d {FP64}
310 | /// __m256d {FP64}
311 | /// __m256d {FP64}
312 | /// __m256d dst {FP64}
313 | public static __m256d _mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegated(a.FP64, b.FP64, c.FP64);
314 |
315 | ///
316 | /// Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".
317 | ///
318 | /// VFNMSUB132PS ymm, ymm, ymm
319 | /// __m256 {FP32}
320 | /// __m256 {FP32}
321 | /// __m256 {FP32}
322 | /// __m256 dst {FP32}
323 | public static __m256 _mm256_fnmsub_ps(__m256 a, __m256 b, __m256 c) => System.Runtime.Intrinsics.X86.Fma.MultiplySubtractNegated(a.FP32, b.FP32, c.FP32);
324 |
325 | }
326 | }
327 |
--------------------------------------------------------------------------------
/RawIntrinsicsGenerator/Generator.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Concurrent;
3 | using System.Collections.Generic;
4 | using System.Diagnostics;
5 | using System.IO;
6 | using System.Linq;
7 | using System.Net.Http;
8 | using System.Runtime.Intrinsics;
9 | using System.Text;
10 | using System.Text.RegularExpressions;
11 | using System.Threading.Tasks;
12 | using System.Xml;
13 | using Microsoft.CodeAnalysis;
14 | using Microsoft.CodeAnalysis.CSharp;
15 | using Microsoft.CodeAnalysis.CSharp.Syntax;
16 |
17 | namespace RawIntrinsicsGenerator
18 | {
19 | public static class Generator
20 | {
21 | private const string SriDataUrl1 = @"https://raw.githubusercontent.com/dotnet/runtime/master/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/";
22 | private const string SriDataUrl2 = @"https://raw.githubusercontent.com/dotnet/runtime/master/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/";
23 | private const string IntelDataUrl = @"https://software.intel.com/sites/landingpage/IntrinsicsGuide/files/data-latest.xml";
24 |
25 | private static readonly Regex IntelMethodSignature = new(@"///\s+?(?