├── GpuMemLatency
├── OpenCL
│ ├── lib
│ │ └── OpenCL.lib
│ ├── include
│ │ └── CL
│ │ │ ├── opencl.h
│ │ │ └── cl_gl_ext.h
│ └── README.md
├── Makefile
├── opencltest.sln
├── instruction_rate_fp16_kernel.cl
├── instruction_rate_fp64_kernel.cl
├── opencltest.h
└── latency_test.c
├── AsmGen
├── Properties
│ └── launchSettings.json
├── IUarchTestParallelBuild.cs
├── IUarchTest.cs
├── DataFiles
│ ├── CommonFunctions.c
│ ├── VsBranchHistFunction.c
│ ├── BranchhistTestBlock.c
│ ├── GccBranchHistFunction.c
│ ├── IndirectBranchTestBlock.c
│ ├── VsIndirectBranchFunction.c
│ └── GccIndirectBranchFunction.c
├── AsmGen.sln
├── AsmGen.csproj
├── tests
│ ├── RobTest.cs
│ ├── FlagsRfTest.cs
│ ├── LoadDivSchedTest.cs
│ ├── LdqStqTest.cs
│ ├── RorSchedTest.cs
│ ├── IntRfTest.cs
│ ├── JumpSchedTest.cs
│ ├── NotIntRfTest.cs
│ ├── MovImmIntRfTest.cs
│ ├── LdqTest.cs
│ ├── LoadDivNsqTest.cs
│ ├── StqTest.cs
│ ├── MixMaskIntRfTest.cs
│ ├── MaskRfTest.cs
│ ├── FaddSchedTest.cs
│ ├── FmulSchedTest.cs
│ ├── MixLoadStoreDivSchedTest.cs
│ ├── MixFaddFmulSchedTest.cs
│ ├── Fadd256SchedTest.cs
│ ├── JumpNsqTest.cs
│ ├── FaddNsqTest.cs
│ ├── MixJumpAddSchedTest.cs
│ ├── JumpAddSchedTest.cs
│ ├── MixBtsMulSchedTest.cs
│ ├── MixPdepMulSchedTest.cs
│ ├── MixLeaMulSchedTest.cs
│ ├── MixAddJump21SchedTest.cs
│ ├── MxcsrTest.cs
│ ├── MixRorBtsSchedTest.cs
│ ├── MmxRfTest.cs
│ ├── MixRorMulSchedTest.cs
│ ├── NopLoopTest.cs
│ ├── FaddIntAddSchedTest.cs
│ ├── Add256RfTest.cs
│ ├── MixIntFpRf13Test.cs
│ ├── BtsSchedTest.cs
│ ├── PdepSchedTest.cs
│ ├── MixPdepLeaSchedTest.cs
│ ├── LeaSchedTest.cs
│ ├── FpRfTest.cs
│ ├── AddSchedTest.cs
│ ├── MulSchedTest.cs
│ ├── CvtSchedTest.cs
│ ├── VecRfTest.cs
│ ├── Add512SchedTest.cs
│ ├── MxcsrFeTest.cs
│ ├── Add256SchedTest.cs
│ ├── LdmTest.cs
│ ├── MixIntFpRf12Test.cs
│ ├── Add128SchedTest.cs
│ ├── Vec256RfTest.cs
│ ├── Vec512RfTest.cs
│ ├── MixMulSchedTest.cs
│ ├── MixJumpMulSchedTest.cs
│ ├── VecStoreDataSchedTest.cs
│ ├── Mul16SchedTest.cs
│ ├── Add128SNsqTest.cs
│ └── Mul32SchedTest.cs
└── README.md
├── Makefile
├── MemoryBandwidth
├── Makefile
├── MemoryBandwidth
│ ├── MemoryBandwidth.vcxproj.filters
│ └── MemoryBandwidth.sln
└── README.md
├── MemoryLatency
├── Makefile
├── MemoryLatencyFunctions.asm
├── MemoryLatency.sln
├── README.md
├── MemoryLatency_i686.s
└── MemoryLatency_arm.s
├── CoreClockChecker
├── Makefile
├── CoreClockChecker_x86.s
├── BoostClockChecker_arm.s
├── BoostClockChecker.s
└── BoostClockChecker.c
├── .github
└── workflows
│ └── linux.yaml
├── InstructionRate
└── Makefile
├── README.md
├── clammicrobench
├── clammicrobench.vcxproj.filters
└── clammicrobench.sln
└── CoherencyLatency
└── CoherencyLatency.sln
/GpuMemLatency/OpenCL/lib/OpenCL.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChipsandCheese/Microbenchmarks/HEAD/GpuMemLatency/OpenCL/lib/OpenCL.lib
--------------------------------------------------------------------------------
/AsmGen/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "profiles": {
3 | "AsmGen": {
4 | "commandName": "Project",
5 | "commandLineArgs": "autocopy"
6 | }
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Folders to recursive make into, not everything has a Makefile
2 | folders := MemoryLatency MemoryBandwidth instructionrate CoreClockChecker GpuMemLatency
3 |
4 | all: $(folders)
5 |
6 | $(folders): .FORCE
7 | $(MAKE) -C $@
8 |
9 |
10 | .FORCE:
11 |
--------------------------------------------------------------------------------
/MemoryBandwidth/Makefile:
--------------------------------------------------------------------------------
1 | amd64:
2 | x86_64-linux-gnu-gcc -pthread -O3 MemoryBandwidth.c MemoryBandwidth_x86.s -o membw_amd64 -lm
3 |
4 | aarch64:
5 | aarch64-linux-gnu-gcc -pthread -O3 MemoryBandwidth.c MemoryBandwidth_arm.s -o membw_aarch64 -lm
6 |
7 | win64:
8 | x86_64-w64-mingw32-gcc-win32 -pthread -O3 MemoryBandwidth.c MemoryBandwidth_x86.s -o membw.exe -lm
9 |
--------------------------------------------------------------------------------
/MemoryLatency/Makefile:
--------------------------------------------------------------------------------
1 | amd64:
2 | x86_64-linux-gnu-gcc -O3 MemoryLatency.c MemoryLatency_x86.s -o MemoryLatency -lm
3 | aarch64:
4 | aarch64-linux-gnu-gcc -O3 MemoryLatency.c MemoryLatency_arm.s -o MemoryLatency -lm
5 | win64:
6 | x86_64-w64-mingw32-gcc -O3 MemoryLatency.c MemoryLatency_x86.s -o MemoryLatency.exe -lm
7 | win32:
8 | i686-w64-mingw32-gcc -O3 MemoryLatency.c MemoryLatency_i686.s -o MemoryLatency32.exe -lm
9 |
--------------------------------------------------------------------------------
/AsmGen/IUarchTestParallelBuild.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 |
3 | namespace AsmGen
4 | {
5 | public interface IUarchTestParallelBuild : IUarchTest
6 | {
7 | ///
8 | /// Generate and write out NASM files
9 | ///
10 | /// list of nasm filenames to include in build
11 | public List GenerateNasmFiles();
12 |
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/CoreClockChecker/Makefile:
--------------------------------------------------------------------------------
1 | amd64:
2 | x86_64-linux-gnu-gcc -pthread -O3 CoreClockChecker.c CoreClockChecker_x86.s -o CoreClockChecker -lm
3 | win64:
4 | x86_64-w64-mingw32-gcc -pthread -O3 CoreClockChecker.c CoreClockChecker_x86.s -o CoreClockChecker -lm
5 | boostclockchecker:
6 | gcc -O3 BoostClockChecker.c BoostClockChecker.s -o BoostClockChecker
7 | win64boostclockchecker:
8 | x86_64-w64-mingw32-gcc BoostClockChecker.c BoostClockChecker.s -o BoostClockChecker.exe
9 |
--------------------------------------------------------------------------------
/.github/workflows/linux.yaml:
--------------------------------------------------------------------------------
1 | name: Build benchmarks on Ubuntu
2 | on: [push, pull_request]
3 | jobs:
4 | BuildBenchmarks:
5 | # Only Ubuntu for now.
6 | runs-on: ubuntu-latest
7 | steps:
8 | - run: sudo apt-get update
9 | - run: sudo apt-get install -y build-essential ocl-icd-opencl-dev opencl-headers
10 | - name: Check out repository code
11 | uses: actions/checkout@v3
12 | - name: Try to build all benchmarks with a Makefile
13 | run: make -j4 all
14 |
--------------------------------------------------------------------------------
/InstructionRate/Makefile:
--------------------------------------------------------------------------------
1 | x86instructionrate: x86instructionrate.s x86instructionrate.c x86instructionrate.h
2 | gcc -O3 x86instructionrate.s x86instructionrate.c x86instructionrate.h -o x86instructionrate
3 | arm_instructionrate: arminstructionrate.s arminstructionrate.c arminstructionrate.h
4 | gcc -O3 arminstructionrate.s arminstructionrate.c arminstructionrate.h -o arminstructionrate
5 | x86_instructionrate_win64:
6 | x86_64-w64-mingw32-gcc -O3 x86instructionrate.c x86instructionrate.s x86instructionrate.h -o x86instructionrate.exe
7 |
--------------------------------------------------------------------------------
/GpuMemLatency/Makefile:
--------------------------------------------------------------------------------
1 | UNAME_S := $(shell uname -s)
2 | CC = gcc
3 | CFLAGS = -O3
4 | DEPS = ../common/timings.h
5 | OBJ = opencltest.o latency_test.o bw_test.o common.o atomic_test.o instruction_rate.o timing.o
6 |
7 | ifeq ($(UNAME_S),Darwin)
8 | LDFLAGS += -framework OpenCL
9 | else
10 | LDFLAGS += -lOpenCL
11 | endif
12 |
13 | opencltest: $(OBJ)
14 | gcc $(CFLAGS) $^ -o $@ -lm $(LDFLAGS)
15 |
16 | %.o: %.c $(DEPS)
17 | $(CC) $(CFLAGS) -c -o $@ $<
18 |
19 | timing.o:
20 | $(CC) $(CFLAGS) -c ../common/timing.c -o timing.o
21 |
--------------------------------------------------------------------------------
/MemoryLatency/MemoryLatencyFunctions.asm:
--------------------------------------------------------------------------------
1 | section .text
2 | bits 64
3 |
4 | global preplatencyarr
5 | global latencytest
6 |
7 | preplatencyarr:
8 | push r15
9 | push r14
10 | xor r15, r15 ; array index
11 | preplatencyarr_loop:
12 | mov r14, [rcx + r15 * 8]
13 | lea r14, [rcx + r14 * 8]
14 | mov [rcx + r15 * 8], r14
15 | inc r15
16 | cmp rdx, r15
17 | jne preplatencyarr_loop
18 | pop r14
19 | pop r15
20 | ret
21 |
22 | latencytest:
23 | push r15
24 | mov r15, [rdx]
25 | xor rax, rax
26 | latencytest_loop:
27 | mov r15, [r15]
28 | add rax, r15
29 | dec rcx
30 | jnz latencytest_loop
31 | pop r15
32 | ret
33 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Microbenchmarks
2 | Trying to figure various CPU things out
3 |
4 | Basically my playground to microbenchmark various CPU-related things like ROB/register file sizes, lock/cache coherency latency, and cache/memory performance.
5 |
6 | # Building Clammicrobench with Generated Code
7 | Get NASM (https://www.nasm.us/) and make sure it's in your path. Then things should build under Visual Studio 2019.
8 |
9 | Some microbenchmarks have the source code and assembly generated by C# code, to avoid crazy stuff like self modifying code. For clammicrobench, build/run the AsmGen project. Pass "autocopy" on the command line to have it automatically place generated ASM files for Visual Studio. Then, the clammicrobench project should build.
10 |
--------------------------------------------------------------------------------
/CoreClockChecker/CoreClockChecker_x86.s:
--------------------------------------------------------------------------------
1 | .global clktest
2 |
3 | /*
4 | %rdi = arg0 = iteration count
5 | */
6 | clktest:
7 | push %rbx
8 | push %r8
9 | push %r9
10 | mov $1, %r8
11 | mov $20, %r9
12 | xor %rbx, %rbx
13 | clktest_loop:
14 | add %r8, %rbx
15 | add %r8, %rbx
16 | add %r8, %rbx
17 | add %r8, %rbx
18 | add %r8, %rbx
19 | add %r8, %rbx
20 | add %r8, %rbx
21 | add %r8, %rbx
22 | add %r8, %rbx
23 | add %r8, %rbx
24 | add %r8, %rbx
25 | add %r8, %rbx
26 | add %r8, %rbx
27 | add %r8, %rbx
28 | add %r8, %rbx
29 | add %r8, %rbx
30 | add %r8, %rbx
31 | add %r8, %rbx
32 | add %r8, %rbx
33 | add %r8, %rbx
34 | sub %r9, %rdi
35 | jnz clktest_loop
36 | pop %r9
37 | pop %r8
38 | pop %rbx
39 | ret
40 |
--------------------------------------------------------------------------------
/AsmGen/IUarchTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public interface IUarchTest
6 | {
7 | // enough to generate global lines, function calls, and let user pick from tests
8 | public string Prefix { get; }
9 | public string Description { get; }
10 | public bool DivideTimeByCount { get; }
11 | public void GenerateX86GccAsm(StringBuilder sb);
12 | public void GenerateX86NasmAsm(StringBuilder sb);
13 | public void GenerateArmAsm(StringBuilder sb);
14 | public void GenerateVsTestBlock(StringBuilder sb);
15 | public void GenerateTestBlock(StringBuilder sb);
16 |
17 | public void GenerateAsmGlobalLines(StringBuilder sb);
18 | public void GenerateNasmGlobalLines(StringBuilder sb);
19 |
20 | public void GenerateVsExternLines(StringBuilder sb);
21 | public void GenerateExternLines(StringBuilder sb);
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/AsmGen/DataFiles/CommonFunctions.c:
--------------------------------------------------------------------------------
1 | // this is a partial C file that's appended into generated code
2 | // stuff here is generic enough to work for both windows/vs and gcc
3 |
4 | void printCsvHeader(uint32_t* xCounts, uint32_t xLen) {
5 | printf("x");
6 | for (uint32_t testSizeIdx = 0; testSizeIdx < xLen; testSizeIdx++) {
7 | printf(", %d", xCounts[testSizeIdx]);
8 | }
9 |
10 | printf("\n");
11 | }
12 |
13 | // print results in format that excel can take
14 | void printResultFloatArr(float* arr, uint32_t *xCounts, uint32_t xLen, uint32_t *yCounts, uint32_t yLen) {
15 | uint32_t testSizeCount = xLen;
16 | printCsvHeader(xCounts, xLen);
17 | for (uint32_t branchCountIdx = 0; branchCountIdx < yLen; branchCountIdx++) {
18 | // row header
19 | printf("%d", yCounts[branchCountIdx]);
20 | for (uint32_t testSizeIdx = 0; testSizeIdx < testSizeCount; testSizeIdx++) {
21 | printf(",%f", arr[branchCountIdx * testSizeCount + testSizeIdx]);
22 | }
23 |
24 | printf("\n");
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/CoreClockChecker/BoostClockChecker_arm.s:
--------------------------------------------------------------------------------
1 | .text
2 | .global clktsctest
3 |
4 | /* x0 = iterations, return elapsed TSC in x0 */
5 | clktsctest:
6 | sub sp, sp, #0x40
7 | stp x10, x11, [sp, #0x10]
8 | stp x12, x13, [sp, #0x20]
9 | stp x14, x15, [sp, #0x30]
10 | mov x10, 1
11 | mov x11, 20
12 | mov x12, 0
13 | /* stackoverflow says this is a good idea */
14 | mrs x14, cntvct_el0
15 | clktsctest_loop:
16 | add x12, x12, x10
17 | add x12, x12, x10
18 | add x12, x12, x10
19 | add x12, x12, x10
20 | add x12, x12, x10
21 | add x12, x12, x10
22 | add x12, x12, x10
23 | add x12, x12, x10
24 | add x12, x12, x10
25 | add x12, x12, x10
26 | add x12, x12, x10
27 | add x12, x12, x10
28 | add x12, x12, x10
29 | add x12, x12, x10
30 | add x12, x12, x10
31 | add x12, x12, x10
32 | add x12, x12, x10
33 | add x12, x12, x10
34 | add x12, x12, x10
35 | add x12, x12, x10
36 | sub x0, x0, x11
37 | cbnz x0, clktsctest_loop
38 | mrs x15, cntvct_el0
39 | sub x0, x15, x14
40 | ldp x14, x15, [sp, #0x30]
41 | ldp x12, x13, [sp, #0x20]
42 | ldp x10, x11, [sp, #0x10]
43 | add sp, sp, #0x40
44 | ret
45 |
--------------------------------------------------------------------------------
/GpuMemLatency/OpenCL/include/CL/opencl.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __OPENCL_H
18 | #define __OPENCL_H
19 |
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 |
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 |
33 | #endif /* __OPENCL_H */
34 |
--------------------------------------------------------------------------------
/AsmGen/AsmGen.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.2.32516.85
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AsmGen", "AsmGen.csproj", "{B8930E86-946C-4831-B088-F571E73EEDC4}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Release|Any CPU = Release|Any CPU
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {B8930E86-946C-4831-B088-F571E73EEDC4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | {B8930E86-946C-4831-B088-F571E73EEDC4}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | {B8930E86-946C-4831-B088-F571E73EEDC4}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | {B8930E86-946C-4831-B088-F571E73EEDC4}.Release|Any CPU.Build.0 = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {4433D029-CD62-44B9-862E-A8DE52DA45CE}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/CoreClockChecker/BoostClockChecker.s:
--------------------------------------------------------------------------------
1 | .global clktsctest
2 |
3 | /* rcx = iterations, return elapsed TSC in rax */
4 | clktsctest:
5 | push %rdx
6 | push %rbx
7 | push %r8
8 | push %r9
9 | push %r10
10 | mov %rcx, %rdi
11 | mov $1, %r8
12 | mov $20, %r9
13 | xor %rbx, %rbx
14 | rdtsc /* high 32 bits in EDX, low 32 bits in EAX */
15 | shl $32, %rdx /* shift high 32 bits into upper half of EDX */
16 | add %rax, %rdx /* place full 64-bit value in rdx */
17 | mov %rdx, %r10
18 | clktsctest_loop:
19 | add %r8, %rbx
20 | add %r8, %rbx
21 | add %r8, %rbx
22 | add %r8, %rbx
23 | add %r8, %rbx
24 | add %r8, %rbx
25 | add %r8, %rbx
26 | add %r8, %rbx
27 | add %r8, %rbx
28 | add %r8, %rbx
29 | add %r8, %rbx
30 | add %r8, %rbx
31 | add %r8, %rbx
32 | add %r8, %rbx
33 | add %r8, %rbx
34 | add %r8, %rbx
35 | add %r8, %rbx
36 | add %r8, %rbx
37 | add %r8, %rbx
38 | add %r8, %rbx
39 | sub %r9, %rdi
40 | jnz clktsctest_loop
41 | rdtsc
42 | shl $32, %rdx
43 | add %rdx, %rax /* now rax has the new value */
44 | sub %r10, %rax /* subtract old TSC value from the new one, which should be larger */
45 | pop %r10
46 | pop %r9
47 | pop %r8
48 | pop %rbx
49 | pop %rdx
50 | ret
51 |
--------------------------------------------------------------------------------
/AsmGen/AsmGen.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 |
7 |
8 |
9 |
10 | Always
11 |
12 |
13 | Always
14 |
15 |
16 | Always
17 |
18 |
19 | Always
20 |
21 |
22 | Always
23 |
24 |
25 | Always
26 |
27 |
28 | Always
29 |
30 |
31 | Always
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/AsmGen/tests/RobTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class RobTest : UarchTest
6 | {
7 | private string[] nops;
8 |
9 | public RobTest(int low, int high, int step)
10 | {
11 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
12 | this.Prefix = "rob";
13 | this.Description = "Reorder Buffer Test";
14 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
15 | this.GetFunctionCallParameters = "structIterations, A";
16 | this.DivideTimeByCount = false;
17 | this.nops = new string[] { "nop" };
18 | }
19 |
20 | public override void GenerateX86GccAsm(StringBuilder sb)
21 | {
22 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true);
23 | }
24 |
25 | public override void GenerateX86NasmAsm(StringBuilder sb)
26 | {
27 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true);
28 | }
29 |
30 | public override void GenerateArmAsm(StringBuilder sb)
31 | {
32 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true);
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/clammicrobench/clammicrobench.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 |
23 |
24 | Source Files
25 |
26 |
27 | Source Files
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/AsmGen/DataFiles/VsBranchHistFunction.c:
--------------------------------------------------------------------------------
1 | // partial C file that gets appended into generated code
2 |
3 | float runBranchHistTest(uint32_t historyLen, uint32_t branchCountIdx, int random) {
4 | struct timeb start, end;
5 | uint32_t branchCount = branchCounts[branchCountIdx];
6 | uint64_t iterations = 160000000 / branchCount;
7 | uint64_t(*branchtestFunc)(uint64_t, uint32_t**, uint32_t) = branchtestFuncArr[branchCountIdx];
8 |
9 | uint32_t** testArrToArr = (uint32_t**)malloc(sizeof(uint32_t*) * branchCount);
10 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) {
11 | uint32_t* testArr = (uint32_t*)malloc(sizeof(uint32_t) * historyLen);
12 | for (uint32_t i = 0; i < historyLen; i++) testArr[i] = random ? (rand() & 0x400U != 0) : 0;
13 | testArrToArr[testArrIdx] = testArr;
14 | }
15 |
16 | ftime(&start);
17 | branchtestFunc(iterations, testArrToArr, historyLen);
18 | ftime(&end);
19 | uint64_t time_diff_ms = 1000 * (end.time - start.time) + (end.millitm - start.millitm);
20 | float latency = 1e6 * (float)time_diff_ms / (float)iterations;
21 |
22 | // give result in latency per branch
23 | latency = latency / branchCount;
24 |
25 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) free(testArrToArr[testArrIdx]);
26 | free(testArrToArr);
27 | return latency;
28 | }
29 |
--------------------------------------------------------------------------------
/MemoryBandwidth/MemoryBandwidth/MemoryBandwidth.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 |
23 |
24 | Source Files
25 |
26 |
27 | Source Files
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/GpuMemLatency/OpenCL/include/CL/cl_gl_ext.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __OPENCL_CL_GL_EXT_H
18 | #define __OPENCL_CL_GL_EXT_H
19 |
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 |
24 | #include
25 |
26 | /*
27 | * cl_khr_gl_event extension
28 | */
29 | #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
30 |
31 | extern CL_API_ENTRY cl_event CL_API_CALL
32 | clCreateEventFromGLsyncKHR(cl_context context,
33 | cl_GLsync cl_GLsync,
34 | cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
35 |
36 | #ifdef __cplusplus
37 | }
38 | #endif
39 |
40 | #endif /* __OPENCL_CL_GL_EXT_H */
41 |
--------------------------------------------------------------------------------
/GpuMemLatency/opencltest.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.30503.244
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "opencltest", "opencltest.vcxproj", "{FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Debug|x86 = Debug|x86
12 | Release|x64 = Release|x64
13 | Release|x86 = Release|x86
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Debug|x64.ActiveCfg = Debug|x64
17 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Debug|x64.Build.0 = Debug|x64
18 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Debug|x86.ActiveCfg = Debug|Win32
19 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Debug|x86.Build.0 = Debug|Win32
20 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Release|x64.ActiveCfg = Release|x64
21 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Release|x64.Build.0 = Release|x64
22 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Release|x86.ActiveCfg = Release|Win32
23 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Release|x86.Build.0 = Release|Win32
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {4447E91D-E7A1-4249-87A7-E75A78167E71}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/AsmGen/DataFiles/BranchhistTestBlock.c:
--------------------------------------------------------------------------------
1 | uint32_t testSizeCount = sizeof(branchHistoryLengths) / sizeof(int);
2 | initializeBranchHistFuncArr();
3 | srand(time(NULL));
4 |
5 | size_t resultSize = sizeof(float) * maxBranchCount * testSizeCount;
6 | float* randomResults = (float*)malloc(resultSize);
7 | float* predictableResults = (float*)malloc(resultSize);
8 | for (uint32_t branchCountIdx = 0; branchCountIdx < maxBranchCount; branchCountIdx++) {
9 | for (uint32_t testSizeIdx = 0; testSizeIdx < testSizeCount; testSizeIdx++) {
10 | uint32_t testSize = branchHistoryLengths[testSizeIdx];
11 | uint32_t branchCount = branchCounts[branchCountIdx];
12 | printf("Testing branch count %d history length %d\n", branchCount, testSize);
13 | randomResults[branchCountIdx * testSizeCount + testSizeIdx] = runBranchHistTest(testSize, branchCountIdx, 1);
14 | predictableResults[branchCountIdx * testSizeCount + testSizeIdx] = runBranchHistTest(testSize, branchCountIdx, 0);
15 | printf("%d, %f, %f\n", testSize,
16 | randomResults[branchCountIdx * testSizeCount + testSizeIdx],
17 | predictableResults[branchCountIdx * testSizeCount + testSizeIdx]);
18 | }
19 | }
20 |
21 | printf("Random:\n");
22 | printResultFloatArr(randomResults, branchHistoryLengths, testSizeCount, branchCounts, maxBranchCount);
23 | printf("\nPredictable:\n");
24 | printResultFloatArr(predictableResults, branchHistoryLengths, testSizeCount, branchCounts, maxBranchCount);
25 |
26 | free(randomResults);
27 | free(predictableResults);
28 |
--------------------------------------------------------------------------------
/AsmGen/tests/FlagsRfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class FlagRfTest : UarchTest
6 | {
7 | public FlagRfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "flagrf";
11 | this.Description = "Flags register file capacity";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] nops = new string[1];
20 | nops[0] = "test %r15, %r14";
21 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true);
22 | }
23 |
24 | public override void GenerateX86NasmAsm(StringBuilder sb)
25 | {
26 | string[] nops = new string[1];
27 | nops[0] = "test r15, r14";
28 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true);
29 | }
30 |
31 | public override void GenerateArmAsm(StringBuilder sb)
32 | {
33 | string[] nops = new string[1];
34 | nops[0] = "cmp x14, x15";
35 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true);
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/MemoryLatency/MemoryLatency.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.31229.75
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MemoryLatency", "MemoryLatency.vcxproj", "{3A98A230-A87B-432D-931D-369872DE24AF}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Debug|x86 = Debug|x86
12 | Release|x64 = Release|x64
13 | Release|x86 = Release|x86
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {3A98A230-A87B-432D-931D-369872DE24AF}.Debug|x64.ActiveCfg = Debug|x64
17 | {3A98A230-A87B-432D-931D-369872DE24AF}.Debug|x64.Build.0 = Debug|x64
18 | {3A98A230-A87B-432D-931D-369872DE24AF}.Debug|x86.ActiveCfg = Debug|Win32
19 | {3A98A230-A87B-432D-931D-369872DE24AF}.Debug|x86.Build.0 = Debug|Win32
20 | {3A98A230-A87B-432D-931D-369872DE24AF}.Release|x64.ActiveCfg = Release|x64
21 | {3A98A230-A87B-432D-931D-369872DE24AF}.Release|x64.Build.0 = Release|x64
22 | {3A98A230-A87B-432D-931D-369872DE24AF}.Release|x86.ActiveCfg = Release|Win32
23 | {3A98A230-A87B-432D-931D-369872DE24AF}.Release|x86.Build.0 = Release|Win32
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {F2D00DD2-A22B-4A3C-A2FF-9CE8CF9070D1}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/clammicrobench/clammicrobench.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.31410.357
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clammicrobench", "clammicrobench.vcxproj", "{7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Debug|x86 = Debug|x86
12 | Release|x64 = Release|x64
13 | Release|x86 = Release|x86
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Debug|x64.ActiveCfg = Debug|x64
17 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Debug|x64.Build.0 = Debug|x64
18 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Debug|x86.ActiveCfg = Debug|Win32
19 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Debug|x86.Build.0 = Debug|Win32
20 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Release|x64.ActiveCfg = Release|x64
21 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Release|x64.Build.0 = Release|x64
22 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Release|x86.ActiveCfg = Release|Win32
23 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Release|x86.Build.0 = Release|Win32
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {A4441112-E760-4CF1-9A63-6BE0A3ACB1C6}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/CoherencyLatency/CoherencyLatency.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.31025.194
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CoherencyLatency", "CoherencyLatency.vcxproj", "{6D9CCC8C-09F5-484B-8630-BE18A9CF1995}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Debug|x86 = Debug|x86
12 | Release|x64 = Release|x64
13 | Release|x86 = Release|x86
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Debug|x64.ActiveCfg = Debug|x64
17 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Debug|x64.Build.0 = Debug|x64
18 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Debug|x86.ActiveCfg = Debug|Win32
19 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Debug|x86.Build.0 = Debug|Win32
20 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Release|x64.ActiveCfg = Release|x64
21 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Release|x64.Build.0 = Release|x64
22 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Release|x86.ActiveCfg = Release|Win32
23 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Release|x86.Build.0 = Release|Win32
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {A6E60C3D-60ED-4DBF-B4AA-7C1C3A140325}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/MemoryLatency/README.md:
--------------------------------------------------------------------------------
1 | # Memory Latency Test
2 |
3 | This test measures random memory access latency within increasing array sizes, and (hopefully) shows the latency and size of caches as well as memory latency. Modes, passed as the first parameter:
4 | - (no parameter) - Uses plain C code and `current = A[current]` to measure latency
5 | - asm - Uses `mov r15, [r15]` for x86-64 or `ldr x15, [x15]`. This can help accurately measure L1D latency, because many x86 CPUs take an extra cycle to calculate "complex" addresses. And compilers like to do that for the plain C version above. This doesn't seem to make a difference for ARM
6 | - tlb - Accesses just one element per 4 KB region to measure virtual to physical address translation latency (so TLBs and page walkers). Cache latency is subtracted out to isolate address translation latency.
7 |
8 | # Building and Running
9 |
10 | Make sure optimization is on, or L1D latencies may be quite a bit higher than expected.
11 |
12 | ## Windows
13 | Under WSL, do `x86_64-w64-mingw32-gcc-win32 -O3 MemoryLatency.c MemoryLatency_x86.s -o MemoryLatency.exe`
14 |
15 | Run with
16 | `MemoryLatency.exe`
17 | `MemoryLatency.exe asm`
18 | `MemoryLatency.exe tlb`
19 | ## Linux, x86-64
20 | `gcc -O3 MemoryLatency.c MemoryLatency_x86.s -o MemoryLatency`
21 |
22 | ## Linux/Android+Termux, aarch64
23 | `gcc -O3 MemoryLatency.c MemoryLatency_arm.s -o MemoryLatency`
24 |
25 | ## VS version
26 | Open solution and build. But this will be removed in the near future because cross-compiling from WSL is sufficient to produce a Windows exe, since calling conventions are lined up.
27 |
--------------------------------------------------------------------------------
/MemoryBandwidth/MemoryBandwidth/MemoryBandwidth.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.31410.357
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MemoryBandwidth", "MemoryBandwidth.vcxproj", "{E968D202-64A2-43A5-8BBD-D7D010D06564}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Debug|x86 = Debug|x86
12 | Release|x64 = Release|x64
13 | Release|x86 = Release|x86
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Debug|x64.ActiveCfg = Debug|x64
17 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Debug|x64.Build.0 = Debug|x64
18 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Debug|x86.ActiveCfg = Debug|Win32
19 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Debug|x86.Build.0 = Debug|Win32
20 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Release|x64.ActiveCfg = Release|x64
21 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Release|x64.Build.0 = Release|x64
22 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Release|x86.ActiveCfg = Release|Win32
23 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Release|x86.Build.0 = Release|Win32
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {2EA86D6F-CEE0-40A9-B4DD-AC59CCAD358F}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/GpuMemLatency/OpenCL/README.md:
--------------------------------------------------------------------------------
1 | # OpenCLTM API Headers
2 |
3 | This repository contains C language headers for the OpenCL API.
4 |
5 | The authoritative public repository for these headers is located at:
6 |
7 | https://github.com/KhronosGroup/OpenCL-Headers
8 |
9 | Issues, proposed fixes for issues, and other suggested changes should be
10 | created using Github.
11 |
12 | ## Branch Structure
13 |
14 | The OpenCL API headers in this repository are Unified headers and are designed
15 | to work with all released OpenCL versions. This differs from previous OpenCL
16 | API headers, where version-specific API headers either existed in separate
17 | branches, or in separate folders in a branch.
18 |
19 | ## Compiling for a Specific OpenCL Version
20 |
21 | By default, the OpenCL API headers in this repository are for the latest
22 | OpenCL version (currently OpenCL 2.2). To use these API headers to target
23 | a different OpenCL version, an application may `#define` the preprocessor
24 | value `CL_TARGET_OPENCL_VERSION` before including the OpenCL API headers.
25 | The `CL_TARGET_OPENCL_VERSION` is a three digit decimal value representing
26 | the OpenCL API version.
27 |
28 | For example, to enforce usage of no more than the OpenCL 1.2 APIs, you may
29 | include the OpenCL API headers as follows:
30 |
31 | ```
32 | #define CL_TARGET_OPENCL_VERSION 120
33 | #include
34 | ```
35 |
36 | ## Directory Structure
37 |
38 | ```
39 | README.md This file
40 | LICENSE Source license for the OpenCL API headers
41 | CL/ Unified OpenCL API headers tree
42 | ```
43 |
44 | ## License
45 |
46 | See [LICENSE](LICENSE).
47 |
48 | ---
49 |
50 | OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos.
51 |
--------------------------------------------------------------------------------
/AsmGen/DataFiles/GccBranchHistFunction.c:
--------------------------------------------------------------------------------
1 | // this is a partial C file that's appended into generated code
2 |
3 | // Run a test, return the result in time (ns) per branch
4 | // historyLen: length of random array that the test loops through
5 | // branchCountIdx: index into array of branch counts, max determined by generated header/asm
6 | // random: if 1, randomize test array contents. If 0, fill with zeroes
7 | float runBranchHistTest(uint32_t historyLen, uint32_t branchCountIdx, int random) {
8 | struct timeval startTv, endTv;
9 | struct timezone startTz, endTz;
10 | uint32_t branchCount = branchCounts[branchCountIdx];
11 | uint64_t iterations = 80000000 / branchCount;
12 | uint64_t(*branchtestFunc)(uint64_t, uint32_t**, uint32_t) __attribute((sysv_abi)) = branchtestFuncArr[branchCountIdx];
13 |
14 | uint32_t** testArrToArr = (uint32_t**)malloc(sizeof(uint32_t*) * branchCount);
15 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) {
16 | uint32_t* testArr = (uint32_t*)malloc(sizeof(uint32_t) * historyLen);
17 | for (uint32_t i = 0; i < historyLen; i++) testArr[i] = random ? rand() % 2 : 0;
18 | testArrToArr[testArrIdx] = testArr;
19 | }
20 |
21 | gettimeofday(&startTv, &startTz);
22 | branchtestFunc(iterations, testArrToArr, historyLen);
23 | gettimeofday(&endTv, &endTz);
24 | uint64_t time_diff_ms = 1000 * (endTv.tv_sec - startTv.tv_sec) + ((endTv.tv_usec - startTv.tv_usec) / 1000);
25 | float latency = 1e6 * (float)time_diff_ms / (float)iterations;
26 |
27 | // give result in latency per branch
28 | latency = latency / branchCount;
29 |
30 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) free(testArrToArr[testArrIdx]);
31 | free(testArrToArr);
32 | return latency;
33 | }
34 |
--------------------------------------------------------------------------------
/AsmGen/DataFiles/IndirectBranchTestBlock.c:
--------------------------------------------------------------------------------
1 | // generated code will have:
2 | // - indirectBranchTargetCounts = array containing # of targets per branch
3 | // - indirectBranchCounts = array containing # of branches to test
4 | // - maxIndirectBranchCount = length of ^^
5 | // - initializeIndirectBranchFuncArr = populates
6 |
7 | uint32_t testSizeCount = sizeof(indirectBranchTargetCounts) / sizeof(int);
8 | initializeIndirectBranchFuncArr();
9 | srand(time(NULL));
10 |
11 | size_t resultSize = sizeof(float) * maxIndirectBranchCount * testSizeCount;
12 | float* results = (float*)malloc(resultSize);
13 | float* refResults = (float*)malloc(resultSize);
14 | for (uint32_t branchCountIdx = 0; branchCountIdx < maxIndirectBranchCount; branchCountIdx++) {
15 | for (uint32_t targetCountIdx = 0; targetCountIdx < testSizeCount; targetCountIdx++) {
16 | uint32_t testSize = indirectBranchTargetCounts[targetCountIdx];
17 | uint32_t branchCount = indirectBranchCounts[branchCountIdx];
18 | printf("Testing branch count %d target count %d:", branchCount, testSize);
19 | results[branchCountIdx * testSizeCount + targetCountIdx] = runIndirectBranchTest(branchCountIdx, targetCountIdx, 0);
20 | refResults[branchCountIdx * testSizeCount + targetCountIdx] = runIndirectBranchTest(branchCountIdx, targetCountIdx, 2);
21 | printf("%f ns, reference %f ns\n",
22 | results[branchCountIdx * testSizeCount + targetCountIdx],
23 | refResults[branchCountIdx * testSizeCount + targetCountIdx]);
24 | }
25 | }
26 |
27 | printf("Indirect branch results:\n");
28 | printResultFloatArr(results, indirectBranchTargetCounts, testSizeCount, indirectBranchCounts, maxIndirectBranchCount);
29 | printf("Reference indirect branch results:\n");
30 | printResultFloatArr(refResults, indirectBranchTargetCounts, testSizeCount, indirectBranchCounts, maxIndirectBranchCount);
31 |
32 | free(results);
33 | free(refResults);
34 |
--------------------------------------------------------------------------------
/AsmGen/tests/LoadDivSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class LoadDivSchedTest : UarchTest
6 | {
7 | public LoadDivSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "loaddivsched";
11 | this.Description = "Load Scheduler Capacity Test, using divs to block retirement";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int count, int *arr2";
13 | this.GetFunctionCallParameters = "structIterations, list_size, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] dependentLoads = new string[1];
20 | dependentLoads[0] = " mov (%r8, %rdx, 4), %r15";
21 |
22 | UarchTestHelpers.GenerateX86AsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false);
23 | }
24 |
25 | public override void GenerateX86NasmAsm(StringBuilder sb)
26 | {
27 | string[] dependentLoads = new string[1];
28 | dependentLoads[0] = " mov r15, [r8 + rdx * 4]";
29 |
30 | UarchTestHelpers.GenerateX86NasmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false);
31 | }
32 |
33 | public override void GenerateArmAsm(StringBuilder sb)
34 | {
35 | string[] dependentLoads = new string[1];
36 | dependentLoads[0] = " ldr w15, [x2, w25, uxtw #2]";
37 |
38 | string[] dependentLoads1 = new string[1];
39 | dependentLoads1[0] = " ldr w15, [x2, w26, uxtw #2]";
40 |
41 | UarchTestHelpers.GenerateArmAsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads1, false);
42 | }
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/GpuMemLatency/instruction_rate_fp16_kernel.cl:
--------------------------------------------------------------------------------
1 | #define rate_local_mem_test_size 256
2 | __kernel void fp16_add_rate_test(__global half8 *A, int count, __global half8 *ret) {
3 | int tid = get_local_id(0);
4 | int max_offset = get_local_size(0);
5 | __global half8 *local_a = A;
6 |
7 | int masked_tid = tid & (rate_local_mem_test_size - 1);
8 | half8 v0 = local_a[masked_tid];
9 | half8 v1 = local_a[masked_tid + 1];
10 | half8 v2 = local_a[masked_tid + 2];
11 | half8 v3 = local_a[masked_tid + 3];
12 | half8 v4 = v0 + v1;
13 | half8 v5 = v0 + v2;
14 | half8 v6 = v0 + v3;
15 | half8 v7 = v1 + v2;
16 | half8 acc = local_a[0];
17 |
18 | for (int i = 0; i < count; i++) {
19 | v0 += acc;
20 | v1 += acc;
21 | v2 += acc;
22 | v3 += acc;
23 | v4 += acc;
24 | v5 += acc;
25 | v6 += acc;
26 | v7 += acc;
27 | }
28 |
29 | ret[get_global_id(0)] = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7;
30 | }
31 |
32 | __kernel void fp16_fma_rate_test(__global half8 *A, int count, __global half8 *ret) {
33 | int tid = get_local_id(0);
34 | int max_offset = get_local_size(0);
35 | __global half8 *local_a = A;
36 |
37 | int masked_tid = tid & (rate_local_mem_test_size - 1);
38 | half8 v0 = local_a[masked_tid];
39 | half8 v1 = local_a[masked_tid + 1];
40 | half8 v2 = local_a[masked_tid + 2];
41 | half8 v3 = local_a[masked_tid + 3];
42 | half8 v4 = v0 + v1;
43 | half8 v5 = v0 + v2;
44 | half8 v6 = v0 + v3;
45 | half8 v7 = v1 + v2;
46 | half8 acc = local_a[0];
47 |
48 | for (int i = 0; i < count; i++) {
49 | v0 += acc * v0;
50 | v1 += acc * v1;
51 | v2 += acc * v2;
52 | v3 += acc * v3;
53 | v4 += acc * v4;
54 | v5 += acc * v5;
55 | v6 += acc * v6;
56 | v7 += acc * v7;
57 | }
58 |
59 | ret[get_global_id(0)] = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7;
60 | }
61 |
--------------------------------------------------------------------------------
/AsmGen/tests/LdqStqTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class LdqStqTest : UarchTest
6 | {
7 | public LdqStqTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixldqstq";
11 | this.Description = "Mixed Load/Store Queue Test (mem ops pending retire)";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | }
15 |
16 | public override void GenerateX86GccAsm(StringBuilder sb)
17 | {
18 | string[] instrs = new string[4];
19 | instrs[0] = " mov %r15, (%r8)";
20 | instrs[1] = " mov (%rdx), %r14";
21 | instrs[2] = " mov %r13, (%r8)";
22 | instrs[3] = " mov (%rdx), %r12";
23 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true);
24 | }
25 |
26 | public override void GenerateX86NasmAsm(StringBuilder sb)
27 | {
28 | string[] instrs = new string[4];
29 | instrs[0] = " mov [r8], r15";
30 | instrs[1] = " mov r14, [rdx]";
31 | instrs[2] = " mov [r8], r13";
32 | instrs[3] = " mov r12, [rdx]";
33 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true);
34 | }
35 |
36 | public override void GenerateArmAsm(StringBuilder sb)
37 | {
38 | string[] instrs = new string[4];
39 | instrs[0] = " str x15, [x2]";
40 | instrs[1] = " ldr x14, [x1]";
41 | instrs[2] = " str x13, [x2]";
42 | instrs[3] = " ldr x12, [x1]";
43 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true);
44 | }
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/GpuMemLatency/instruction_rate_fp64_kernel.cl:
--------------------------------------------------------------------------------
1 | #define rate_local_mem_test_size 256
2 | __kernel void fp64_add_rate_test(__global double2 *A, int count, __global double2 *ret) {
3 | int tid = get_local_id(0);
4 | int max_offset = get_local_size(0);
5 | __global double2 *local_a = A;
6 |
7 | int masked_tid = tid & (rate_local_mem_test_size - 1);
8 | double2 v0 = local_a[masked_tid];
9 | double2 v1 = local_a[masked_tid + 1];
10 | double2 v2 = local_a[masked_tid + 2];
11 | double2 v3 = local_a[masked_tid + 3];
12 | double2 v4 = v0 + v1;
13 | double2 v5 = v0 + v2;
14 | double2 v6 = v0 + v3;
15 | double2 v7 = v1 + v2;
16 | double2 acc = local_a[0];
17 |
18 | for (int i = 0; i < count; i++) {
19 | v0 += acc;
20 | v1 += acc;
21 | v2 += acc;
22 | v3 += acc;
23 | v4 += acc;
24 | v5 += acc;
25 | v6 += acc;
26 | v7 += acc;
27 | }
28 |
29 | ret[get_global_id(0)] = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7;
30 | }
31 |
32 | __kernel void fp64_fma_rate_test(__global double2 *A, int count, __global double2 *ret) {
33 | int tid = get_local_id(0);
34 | int max_offset = get_local_size(0);
35 | __global double2 *local_a = A;
36 |
37 | int masked_tid = tid & (rate_local_mem_test_size - 1);
38 | double2 v0 = local_a[masked_tid];
39 | double2 v1 = local_a[masked_tid + 1];
40 | double2 v2 = local_a[masked_tid + 2];
41 | double2 v3 = local_a[masked_tid + 3];
42 | double2 v4 = v0 + v1;
43 | double2 v5 = v0 + v2;
44 | double2 v6 = v0 + v3;
45 | double2 v7 = v1 + v2;
46 | double2 acc = local_a[0];
47 |
48 | for (int i = 0; i < count; i++) {
49 | v0 += acc * v0;
50 | v1 += acc * v1;
51 | v2 += acc * v2;
52 | v3 += acc * v3;
53 | v4 += acc * v4;
54 | v5 += acc * v5;
55 | v6 += acc * v6;
56 | v7 += acc * v7;
57 | }
58 |
59 | ret[get_global_id(0)] = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7;
60 | }
61 |
--------------------------------------------------------------------------------
/AsmGen/README.md:
--------------------------------------------------------------------------------
1 | # Microbenchmark Generator
2 | C# project to generate C and assembly for CPU structure size benchmarks that use different code for each data point, making them
3 | impractical to write by hand. For more details on methodology for out-of-order structure size measurement, see https://blog.stuffedcow.net/2013/05/measuring-rob-capacity/
4 |
5 | Branch predictor benchmarks are different:
6 | - BTB tests: Spams different numbers of unconditional jumps in a loop, spaced by different distances because branch predictors sometimes have trouble tracking branches that are too close together.
7 | - Branch history test: Generates branches that are taken or not taken in some random pattern, then increases the length of that pattern and the number of branches. Each branch is given its own pattern. This test thus tries to see how long of a pattern the branch predictor can track before getting a lot of mispredicts.
8 | - Indirect branch prediction test: Varies the number of branch targets and branches to see how many total targets the indirect branch predictor can track
9 |
10 | # Building
11 |
12 | Compile the project and run AsmGen.exe. That gives several output files. Compilation for Linux:
13 | `gcc clammicrobench.c clammicrobench_x86.s -o clammicrobench` for x86_64
14 | `gcc clammicrobench.c clammicrobench_arm.s -o clammicrobench` for aarch64
15 | `aarch64-linux-gnu-gcc clammicrobench.c clammicrobench_arm.s -o clammicrobench` to cross compile for aarch64 (for example from a fast desktop)
16 |
17 | For Windows, run `AsmGen.exe autocopy`. That copies generated files to the /clammicrobench directory, assuming it's run from the default VS output location. Then, open /clammicrobench/clammicrobench.sln and build. You need nasm in your path for that, as covered on README.md at repo root.
18 |
19 | The indirect branch test can take a while to build with nasm, so you might want to reduce the branch and target counts for that. Or just keep it commented out.
20 |
21 | # Running
22 | Generally, the syntax is `clammicrobench [test name] [list size for latency test] [iteration count]`. The last two parameters are optional.
23 |
--------------------------------------------------------------------------------
/AsmGen/tests/RorSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class RorSchedTest : UarchTest
6 | {
7 | public RorSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "rorsched";
11 | this.Description = "Rotate Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " mov %rdi, %r15";
20 | string postLoadInstr2 = " mov %rsi, %r15";
21 | string[] rors = new string[1];
22 | rors[0] = " ror $1, %r15";
23 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, rors, rors, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
24 | }
25 |
26 | public override void GenerateX86NasmAsm(StringBuilder sb)
27 | {
28 | string postLoadInstr1 = " mov r15, rdi";
29 | string postLoadInstr2 = " mov r15, rsi";
30 | string[] rors = new string[1];
31 | rors[0] = " ror r15, 1";
32 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, rors, rors, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
33 | }
34 |
35 | public override void GenerateArmAsm(StringBuilder sb)
36 | {
37 | string postLoadInstr1 = " mov x15, x25";
38 | string postLoadInstr2 = " mov x15, x26";
39 | string[] rors = new string[1];
40 | rors[0] = " ror x15, x15, #1";
41 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, rors, rors, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
42 | }
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/AsmGen/tests/IntRfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class IntRfTest : UarchTest
6 | {
7 | public IntRfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "intrf";
11 | this.Description = "Integer RF Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledAdds = new string[4];
20 | unrolledAdds[0] = " add %r11, %r15";
21 | unrolledAdds[1] = " add %r11, %r14";
22 | unrolledAdds[2] = " add %r11, %r13";
23 | unrolledAdds[3] = " add %r11, %r12";
24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
25 | }
26 |
27 | public override void GenerateX86NasmAsm(StringBuilder sb)
28 | {
29 | string[] unrolledAdds = new string[4];
30 | unrolledAdds[0] = " add r15, r11";
31 | unrolledAdds[1] = " add r14, r11";
32 | unrolledAdds[2] = " add r13, r11";
33 | unrolledAdds[3] = " add r12, r11";
34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
35 | }
36 |
37 | public override void GenerateArmAsm(StringBuilder sb)
38 | {
39 | string[] unrolledAdds = new string[4];
40 | unrolledAdds[0] = " add x15, x15, x11";
41 | unrolledAdds[1] = " add x14, x14, x11";
42 | unrolledAdds[2] = " add x13, x13, x11";
43 | unrolledAdds[3] = " add x12, x12, x11";
44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/GpuMemLatency/opencltest.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef opencltestheader
4 | #define opencltestheader
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include "../common/timing.h"
11 |
12 | #define false 0
13 | #define true 1
14 |
15 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
16 | #ifndef __APPLE__
17 | #include
18 | #else
19 | #include
20 | #endif
21 | #define MAX_SOURCE_SIZE (0x100000)
22 |
23 | #define CACHELINE_SIZE 64
24 |
25 | #ifndef _MSC_VER
26 | #define _strnicmp strncmp
27 | #endif
28 | extern cl_device_id selected_device_id;
29 | extern cl_platform_id selected_platform_id;
30 | extern cl_ulong max_global_test_size;
31 | cl_context get_context_from_user(int platform_index, int device_index);
32 | cl_program build_program(cl_context context, const char* fname);
33 | void FillPatternArr(uint32_t* pattern_arr, uint32_t list_size, uint32_t byte_increment);
34 | cl_uint getCuCount();
35 |
36 | float int_atomic_latency_test(cl_context context,
37 | cl_command_queue command_queue,
38 | cl_kernel kernel,
39 | uint32_t iterations,
40 | short local);
41 | float latency_test(cl_context context,
42 | cl_command_queue command_queue,
43 | cl_kernel kernel,
44 | uint32_t list_size,
45 | uint32_t chase_iterations,
46 | short sattolo);
47 | float bw_test(cl_context context,
48 | cl_command_queue command_queue,
49 | cl_kernel kernel,
50 | uint64_t list_size,
51 | uint32_t thread_count,
52 | uint32_t local_size,
53 | uint32_t skip,
54 | uint32_t chase_iterations);
55 | void link_bw_test(cl_context context,
56 | cl_command_queue command_queue,
57 | cl_kernel kernel,
58 | uint32_t iterations);
59 | float c2c_atomic_latency_test(cl_context context,
60 | cl_command_queue command_queue,
61 | cl_kernel kernel,
62 | uint32_t iterations);
63 |
64 | float instruction_rate_test(cl_context context,
65 | cl_command_queue command_queue,
66 | uint32_t thread_count,
67 | uint32_t local_size,
68 | uint32_t chase_iterations);
69 | #endif
70 |
--------------------------------------------------------------------------------
/AsmGen/tests/JumpSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class JumpSchedTest : UarchTest
6 | {
7 | public JumpSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "jmpsched";
11 | this.Description = "Not-taken Jump Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledJumps = new string[1];
20 | unrolledJumps[0] = $" cmp %rdi, %rsi\n je jumpsched_reallybadthing";
21 |
22 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
23 |
24 | sb.AppendLine("jumpsched_reallybadthing:");
25 | sb.AppendLine(" int3");
26 | }
27 |
28 | public override void GenerateX86NasmAsm(StringBuilder sb)
29 | {
30 | string[] unrolledJumps = new string[1];
31 | unrolledJumps[0] = " cmp rdi, rsi\n je jumpsched_reallybadthing";
32 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
33 |
34 | sb.AppendLine("jumpsched_reallybadthing:");
35 | sb.AppendLine(" int3");
36 | }
37 |
38 | public override void GenerateArmAsm(StringBuilder sb)
39 | {
40 | string[] unrolledJumps = new string[1];
41 | unrolledJumps[0] = " cmp x25, x26\n b.eq jumpsched_reallybadthing";
42 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(
43 | sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
44 |
45 | sb.AppendLine("jumpsched_reallybadthing:");
46 | sb.AppendLine(" .word 0xf7f0a000");
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/AsmGen/tests/NotIntRfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class NotIntRfTest : UarchTest
6 | {
7 | public NotIntRfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "notintrf";
11 | this.Description = "Integer RF Test with not (no setting flags)";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledAdds = new string[4];
20 | unrolledAdds[0] = " not %r15";
21 | unrolledAdds[1] = " not %r14";
22 | unrolledAdds[2] = " not %r13";
23 | unrolledAdds[3] = " not %r12";
24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
25 | }
26 |
27 | public override void GenerateX86NasmAsm(StringBuilder sb)
28 | {
29 | string[] unrolledAdds = new string[4];
30 | unrolledAdds[0] = " not r15";
31 | unrolledAdds[1] = " not r14";
32 | unrolledAdds[2] = " not r13";
33 | unrolledAdds[3] = " not r12";
34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
35 | }
36 |
37 | // todo
38 | public override void GenerateArmAsm(StringBuilder sb)
39 | {
40 | string[] unrolledAdds = new string[4];
41 | unrolledAdds[0] = " add x15, x15, x11";
42 | unrolledAdds[1] = " add x14, x14, x11";
43 | unrolledAdds[2] = " add x13, x13, x11";
44 | unrolledAdds[3] = " add x12, x12, x11";
45 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/AsmGen/tests/MovImmIntRfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MovImmIntRfTest : UarchTest
6 | {
7 | public MovImmIntRfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "movimmintrf";
11 | this.Description = "Integer RF Test (move immediate)";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledAdds = new string[4];
20 | unrolledAdds[0] = " mov $1, %r15";
21 | unrolledAdds[1] = " mov $2, %r14";
22 | unrolledAdds[2] = " mov $3, %r13";
23 | unrolledAdds[3] = " mov $4, %r12";
24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
25 | }
26 |
27 | public override void GenerateX86NasmAsm(StringBuilder sb)
28 | {
29 | string[] unrolledAdds = new string[4];
30 | unrolledAdds[0] = " mov r15, 1";
31 | unrolledAdds[1] = " mov r14, 2";
32 | unrolledAdds[2] = " mov r13, 3";
33 | unrolledAdds[3] = " mov r12, 4";
34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
35 | }
36 |
37 | public override void GenerateArmAsm(StringBuilder sb)
38 | {
39 | string[] unrolledAdds = new string[4];
40 | unrolledAdds[0] = " add x15, x15, x11";
41 | unrolledAdds[1] = " add x14, x14, x11";
42 | unrolledAdds[2] = " add x13, x13, x11";
43 | unrolledAdds[3] = " add x12, x12, x11";
44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/AsmGen/tests/LdqTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class LdqTest : UarchTest
6 | {
7 | public LdqTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "ldq";
11 | this.Description = "Load Queue Test (loads pending retire)";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledLoads = new string[4];
20 | unrolledLoads[0] = " mov (%r8), %r15";
21 | unrolledLoads[1] = " mov (%r8), %r14";
22 | unrolledLoads[2] = " mov (%r8), %r13";
23 | unrolledLoads[3] = " mov (%r8), %r12";
24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true);
25 | }
26 |
27 | public override void GenerateX86NasmAsm(StringBuilder sb)
28 | {
29 | string[] unrolledLoads = new string[4];
30 | unrolledLoads[0] = " mov r15, [r8]";
31 | unrolledLoads[1] = " mov r14, [r8]";
32 | unrolledLoads[2] = " mov r13, [r8]";
33 | unrolledLoads[3] = " mov r12, [r8]";
34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true);
35 | }
36 |
37 | public override void GenerateArmAsm(StringBuilder sb)
38 | {
39 | string[] unrolledLoads = new string[4];
40 | unrolledLoads[0] = " ldr x15, [x2]";
41 | unrolledLoads[1] = " ldr x14, [x2]";
42 | unrolledLoads[2] = " ldr x13, [x2]";
43 | unrolledLoads[3] = " ldr x12, [x2]";
44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/AsmGen/tests/LoadDivNsqTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class LoadDivNsqTest : UarchTest
6 | {
7 | public LoadDivNsqTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "loaddivnsq";
11 | this.Description = "Load Scheduler Capacity Test, using divs to block retirement, excluding NSQ";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int count, int *arr2";
13 | this.GetFunctionCallParameters = "structIterations, list_size, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] dependentLoads = new string[1];
20 | dependentLoads[0] = " mov (%r8, %rdx, 4), %r15";
21 |
22 | string[] indepLoads = new string[1];
23 | indepLoads[0] = " mov (%r8), %r15";
24 |
25 | UarchTestHelpers.GenerateX86AsmDivNsqTestFuncs(sb, this.Counts[this.Counts.Length - 1], this.Counts, this.Prefix, dependentLoads, indepLoads, false);
26 | }
27 |
28 | public override void GenerateX86NasmAsm(StringBuilder sb)
29 | {
30 | // not implemented
31 | string[] dependentLoads = new string[1];
32 | dependentLoads[0] = " mov r15, [r8 + rdx * 4]";
33 |
34 | UarchTestHelpers.GenerateX86NasmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false);
35 | }
36 |
37 | public override void GenerateArmAsm(StringBuilder sb)
38 | {
39 | // not implemented
40 | string[] dependentLoads = new string[1];
41 | dependentLoads[0] = " ldr w15, [x2, w25, uxtw #2]";
42 |
43 | string[] dependentLoads1 = new string[1];
44 | dependentLoads1[0] = " ldr w15, [x2, w26, uxtw #2]";
45 |
46 | UarchTestHelpers.GenerateArmAsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads1, false);
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/AsmGen/tests/StqTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class StqTest : UarchTest
6 | {
7 | public StqTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "stq";
11 | this.Description = "Store Queue Test (stores pending retire)";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledLoads = new string[4];
20 | unrolledLoads[0] = " mov %r15, (%r8)";
21 | unrolledLoads[1] = " mov %r14, (%r8)";
22 | unrolledLoads[2] = " mov %r13, (%r8)";
23 | unrolledLoads[3] = " mov %r12, (%r8)";
24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true);
25 | }
26 |
27 | public override void GenerateX86NasmAsm(StringBuilder sb)
28 | {
29 | string[] unrolledLoads = new string[4];
30 | unrolledLoads[0] = " mov [r8], r15";
31 | unrolledLoads[1] = " mov [r8], r14";
32 | unrolledLoads[2] = " mov [r8], r13";
33 | unrolledLoads[3] = " mov [r8], r12";
34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true);
35 | }
36 |
37 | public override void GenerateArmAsm(StringBuilder sb)
38 | {
39 | string[] unrolledLoads = new string[4];
40 | unrolledLoads[0] = " str x15, [x2]";
41 | unrolledLoads[1] = " str x14, [x2]";
42 | unrolledLoads[2] = " str x13, [x2]";
43 | unrolledLoads[3] = " str x12, [x2]";
44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixMaskIntRfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixMaskIntRfTest : UarchTest
6 | {
7 | public MixMaskIntRfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixmaskintrf";
11 | this.Description = "Mixed Integer and Mask (K regs) RF Test - AVX-512 x86 CPUs only";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledAdds = new string[4];
20 | unrolledAdds[0] = " kaddb %k0, %k1, %k1";
21 | unrolledAdds[1] = " add %r14, %r13";
22 | unrolledAdds[2] = " kaddb %k0, %k3, %k3";
23 | unrolledAdds[3] = " add %r11, %r12";
24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false);
25 | }
26 |
27 | public override void GenerateX86NasmAsm(StringBuilder sb)
28 | {
29 | string[] unrolledAdds = new string[4];
30 | unrolledAdds[0] = " kaddb k1, k1, k0";
31 | unrolledAdds[1] = " add r13, r14";
32 | unrolledAdds[2] = " kaddb k3, k3, k0";
33 | unrolledAdds[3] = " add r12, r11";
34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false);
35 | }
36 |
37 | public override void GenerateArmAsm(StringBuilder sb)
38 | {
39 | string[] unrolledAdds = new string[4];
40 | unrolledAdds[0] = " add x15, x15, x11";
41 | unrolledAdds[1] = " add x14, x14, x11";
42 | unrolledAdds[2] = " add x13, x13, x11";
43 | unrolledAdds[3] = " add x12, x12, x11";
44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/AsmGen/tests/MaskRfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MaskRfTest : UarchTest
6 | {
7 | public MaskRfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "maskrf";
11 | this.Description = "Mask (K regs) RF Test - AVX-512 x86 CPUs only";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledAdds = new string[4];
20 | unrolledAdds[0] = " kaddb %k0, %k1, %k1";
21 | unrolledAdds[1] = " kaddb %k0, %k2, %k2";
22 | unrolledAdds[2] = " kaddb %k0, %k3, %k3";
23 | unrolledAdds[3] = " kaddb %k0, %k4, %k4";
24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false);
25 | }
26 |
27 | public override void GenerateX86NasmAsm(StringBuilder sb)
28 | {
29 | string[] unrolledAdds = new string[4];
30 | unrolledAdds[0] = " kaddb k1, k1, k0";
31 | unrolledAdds[1] = " kaddb k2, k2, k0";
32 | unrolledAdds[2] = " kaddb k3, k3, k0";
33 | unrolledAdds[3] = " kaddb k4, k4, k0";
34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false);
35 | }
36 |
37 | public override void GenerateArmAsm(StringBuilder sb)
38 | {
39 | string[] unrolledAdds = new string[4];
40 | unrolledAdds[0] = " add x15, x15, x11";
41 | unrolledAdds[1] = " add x14, x14, x11";
42 | unrolledAdds[2] = " add x13, x13, x11";
43 | unrolledAdds[3] = " add x12, x12, x11";
44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/AsmGen/tests/FaddSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class FaddSchedTest : UarchTest
6 | {
7 | public FaddSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "faddsched";
11 | this.Description = "FP (32-bit add) Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // xmm0 is dependent on ptr chasing load
20 | string[] unrolledAdds = new string[4];
21 | unrolledAdds[0] = " addss %xmm0, %xmm1";
22 | unrolledAdds[1] = " addss %xmm0, %xmm2";
23 | unrolledAdds[2] = " addss %xmm0, %xmm3";
24 | unrolledAdds[3] = " addss %xmm0, %xmm4";
25 |
26 | UarchTestHelpers.GenerateX86AsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
27 | }
28 |
29 | public override void GenerateX86NasmAsm(StringBuilder sb)
30 | {
31 | string[] unrolledAdds = new string[4];
32 | unrolledAdds[0] = " addss xmm1, xmm0";
33 | unrolledAdds[1] = " addss xmm2, xmm0";
34 | unrolledAdds[2] = " addss xmm3, xmm0";
35 | unrolledAdds[3] = " addss xmm4, xmm0";
36 | UarchTestHelpers.GenerateX86NasmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
37 | }
38 |
39 | public override void GenerateArmAsm(StringBuilder sb)
40 | {
41 | string[] unrolledAdds = new string[4];
42 | unrolledAdds[0] = " fadd s17, s17, s16";
43 | unrolledAdds[1] = " fadd s18, s18, s16";
44 | unrolledAdds[2] = " fadd s19, s19, s16";
45 | unrolledAdds[3] = " fadd s20, s20, s16";
46 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/AsmGen/tests/FmulSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class FmulSchedTest : UarchTest
6 | {
7 | public FmulSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "fmulsched";
11 | this.Description = "FP (32-bit multiply) Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // xmm0 is dependent on ptr chasing load
20 | string[] unrolledAdds = new string[4];
21 | unrolledAdds[0] = " mulss %xmm0, %xmm1";
22 | unrolledAdds[1] = " mulss %xmm0, %xmm2";
23 | unrolledAdds[2] = " mulss %xmm0, %xmm3";
24 | unrolledAdds[3] = " mulss %xmm0, %xmm4";
25 |
26 | UarchTestHelpers.GenerateX86AsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
27 | }
28 |
29 | public override void GenerateX86NasmAsm(StringBuilder sb)
30 | {
31 | string[] unrolledAdds = new string[4];
32 | unrolledAdds[0] = " mulss xmm1, xmm0";
33 | unrolledAdds[1] = " mulss xmm2, xmm0";
34 | unrolledAdds[2] = " mulss xmm3, xmm0";
35 | unrolledAdds[3] = " mulss xmm4, xmm0";
36 | UarchTestHelpers.GenerateX86NasmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
37 | }
38 |
39 | public override void GenerateArmAsm(StringBuilder sb)
40 | {
41 | string[] unrolledAdds = new string[4];
42 | unrolledAdds[0] = " fmul s17, s17, s16";
43 | unrolledAdds[1] = " fmul s18, s18, s16";
44 | unrolledAdds[2] = " fmul s19, s19, s16";
45 | unrolledAdds[3] = " fmul s20, s20, s16";
46 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixLoadStoreDivSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixLoadStoreDivSchedTest : UarchTest
6 | {
7 | public MixLoadStoreDivSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixloadstoredivsched";
11 | this.Description = "Load/Store Scheduler Capacity Test, using divs to block retirement";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int count, int *arr2, int *arr3";
13 | this.GetFunctionCallParameters = "structIterations, list_size, B, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] dependentLoads = new string[2];
20 | dependentLoads[0] = " mov (%r9, %rdx, 4), %r15";
21 | dependentLoads[1] = " mov %r14, (%r8, %rdx, 4)";
22 |
23 | UarchTestHelpers.GenerateX86AsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false);
24 | }
25 |
26 | public override void GenerateX86NasmAsm(StringBuilder sb)
27 | {
28 | string[] dependentLoads = new string[2];
29 | dependentLoads[0] = " mov r15, [r9 + rdx * 4]";
30 | dependentLoads[1] = " mov [r8 + rdx * 4], r14";
31 |
32 | UarchTestHelpers.GenerateX86NasmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false);
33 | }
34 |
35 | public override void GenerateArmAsm(StringBuilder sb)
36 | {
37 | string[] dependentLoads = new string[2];
38 | dependentLoads[0] = " ldr w15, [x3, w25, uxtw #2]";
39 | dependentLoads[1] = " str w14, [x2, w25, uxtw #2]";
40 |
41 | string[] dependentLoads1 = new string[2];
42 | dependentLoads1[0] = " ldr w15, [x3, w26, uxtw #2]";
43 | dependentLoads1[1] = " str w14, [x2, w26, uxtw #2]";
44 |
45 | UarchTestHelpers.GenerateArmAsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads1, false);
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixFaddFmulSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixFaddFmulSchedTest : UarchTest
6 | {
7 | public MixFaddFmulSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixfaddfmulsched";
11 | this.Description = "FP (mixed 32-bit add and multiply) Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // xmm0 is dependent on ptr chasing load
20 | string[] unrolledAdds = new string[4];
21 | unrolledAdds[0] = " addss %xmm0, %xmm1";
22 | unrolledAdds[1] = " mulss %xmm0, %xmm2";
23 | unrolledAdds[2] = " addss %xmm0, %xmm3";
24 | unrolledAdds[3] = " mulss %xmm0, %xmm4";
25 |
26 | UarchTestHelpers.GenerateX86AsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
27 | }
28 |
29 | public override void GenerateX86NasmAsm(StringBuilder sb)
30 | {
31 | string[] unrolledAdds = new string[4];
32 | unrolledAdds[0] = " addss xmm1, xmm0";
33 | unrolledAdds[1] = " mulss xmm2, xmm0";
34 | unrolledAdds[2] = " addss xmm3, xmm0";
35 | unrolledAdds[3] = " mulss xmm4, xmm0";
36 | UarchTestHelpers.GenerateX86NasmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
37 | }
38 |
39 | public override void GenerateArmAsm(StringBuilder sb)
40 | {
41 | string[] unrolledAdds = new string[4];
42 | unrolledAdds[0] = " fadd s17, s17, s16";
43 | unrolledAdds[1] = " fmul s18, s18, s16";
44 | unrolledAdds[2] = " fadd s19, s19, s16";
45 | unrolledAdds[3] = " fmul s20, s20, s16";
46 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/AsmGen/tests/Fadd256SchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Fadd256SchedTest : UarchTest
6 | {
7 | public Fadd256SchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "fadd256sched";
11 | this.Description = "256-bit FADD Scheduler Capacity Test, 128-bit on ARM";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // ymm0 is dependent on ptr chasing load
20 | string[] unrolledAdds = new string[4];
21 | unrolledAdds[0] = " vaddps %ymm0, %ymm1, %ymm1";
22 | unrolledAdds[1] = " vaddps %ymm0, %ymm2, %ymm2";
23 | unrolledAdds[2] = " vaddps %ymm0, %ymm3, %ymm3";
24 | unrolledAdds[3] = " vaddps %ymm0, %ymm4, %ymm3";
25 |
26 | UarchTestHelpers.GenerateX86AsmFp256SchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
27 | }
28 |
29 | public override void GenerateX86NasmAsm(StringBuilder sb)
30 | {
31 | string[] unrolledAdds = new string[4];
32 | unrolledAdds[0] = " vaddps ymm1, ymm1, ymm0";
33 | unrolledAdds[1] = " vaddps ymm2, ymm2, ymm0";
34 | unrolledAdds[2] = " vaddps ymm3, ymm3, ymm0";
35 | unrolledAdds[3] = " vaddps ymm4, ymm4, ymm0";
36 | UarchTestHelpers.GenerateX86NasmFp256SchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
37 | }
38 |
39 | public override void GenerateArmAsm(StringBuilder sb)
40 | {
41 | string[] unrolledAdds = new string[4];
42 | unrolledAdds[0] = " fadd v20.4s, v15.4s, v16.4s";
43 | unrolledAdds[1] = " fadd v17.4s, v15.4s, v16.4s";
44 | unrolledAdds[2] = " fadd v18.4s, v15.4s, v16.4s";
45 | unrolledAdds[3] = " fadd v19.4s, v15.4s, v16.4s";
46 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/AsmGen/tests/JumpNsqTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class JumpNsqTest : UarchTest
6 | {
7 | public JumpNsqTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "jmpnsq";
11 | this.Description = "Not-taken Jump Scheduler Capacity Test, Excluding NSQ";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string ohnoLabelName = "jumpnsq_reallybadthing";
20 | string[] dependentJumps = new string[1];
21 | dependentJumps[0] = $" cmp %rdi, %rsi\n je {ohnoLabelName}";
22 |
23 | // R14 is set to 1, so the test instruction will never set the zero flag
24 | string[] independentJumps = new string[1];
25 | independentJumps[0] = $" test %r14, %r14\n je {ohnoLabelName}";
26 |
27 | UarchTestHelpers.GenerateX86AsmNsqTestFuncs(sb, this.Counts[this.Counts.Length - 1], this.Counts, this.Prefix, dependentJumps, independentJumps, false);
28 |
29 | sb.AppendLine(ohnoLabelName + ":");
30 | sb.AppendLine(" int3");
31 | }
32 |
33 | public override void GenerateX86NasmAsm(StringBuilder sb)
34 | {
35 | string ohnoLabelName = "jumpnsq_reallybadthing";
36 | string[] dependentJumps = new string[1];
37 | dependentJumps[0] = $" cmp rdi, rsi\n je {ohnoLabelName}";
38 |
39 | // R14 is set to 1, so the test instruction will never set the zero flag
40 | string[] independentJumps = new string[1];
41 | independentJumps[0] = $" test r14, r14\n je {ohnoLabelName}";
42 |
43 | UarchTestHelpers.GenerateX86NasmNsqTestFuncs(sb, this.Counts[this.Counts.Length - 1], this.Counts, this.Prefix, dependentJumps, independentJumps, false);
44 |
45 | sb.AppendLine(ohnoLabelName + ":");
46 | sb.AppendLine(" int3");
47 | }
48 |
49 | public override void GenerateArmAsm(StringBuilder sb)
50 | {
51 | UarchTestHelpers.GenerateStub(sb, this.Counts, this.Prefix);
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/AsmGen/tests/FaddNsqTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class FaddNsqTest : UarchTest
6 | {
7 | private int high;
8 |
9 | public FaddNsqTest(int low, int high, int step)
10 | {
11 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
12 | this.Prefix = "faddnsq";
13 | this.Description = "FP (32-bit add) Scheduler Test, excluding any NSQ";
14 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
15 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
16 | this.DivideTimeByCount = false;
17 | this.high = high;
18 | }
19 |
20 | public override void GenerateX86GccAsm(StringBuilder sb)
21 | {
22 | // xmm0 is dependent on ptr chasing load
23 | string initInstrs = " cvtsi2ss %r11, %xmm3\n";
24 | string postLoadInstr = " cvtsi2ss %rdi, %xmm0";
25 | string[] depAdds = new string[2];
26 | depAdds[0] = " addss %xmm0, %xmm1";
27 | depAdds[1] = " addss %xmm0, %xmm2";
28 |
29 | string[] indepAdds = new string[2];
30 | indepAdds[0] = " addss %xmm3, %xmm4";
31 | indepAdds[1] = " addss %xmm3, %xmm5";
32 |
33 | UarchTestHelpers.GenerateX86AsmNsqTestFuncs(sb, this.high, this.Counts, this.Prefix, depAdds, indepAdds, false, initInstrs, postLoadInstr);
34 | }
35 |
36 | public override void GenerateX86NasmAsm(StringBuilder sb)
37 | {
38 | string initInstrs = " movq xmm3, r11\n xorps xmm1, xmm1\n xorps xmm2, xmm2\n xorps xmm4, xmm4\n xorps xmm5, xmm5\n";
39 | string postLoadInstr = " cvtsi2ss xmm0, rdi";
40 | string[] depAdds = new string[2];
41 | depAdds[0] = " addss xmm1, xmm0";
42 | depAdds[1] = " addss xmm2, xmm0";
43 |
44 | string[] indepAdds = new string[2];
45 | indepAdds[0] = " addss xmm4, xmm3";
46 | indepAdds[1] = " addss xmm5, xmm3";
47 |
48 | UarchTestHelpers.GenerateX86NasmNsqTestFuncs(sb, this.high, this.Counts, this.Prefix, depAdds, indepAdds, false, initInstrs, postLoadInstr);
49 | }
50 |
51 | public override void GenerateArmAsm(StringBuilder sb)
52 | {
53 | UarchTestHelpers.GenerateStub(sb, this.Counts, this.Prefix);
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixJumpAddSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixJmpAddSched : UarchTest
6 | {
7 | public MixJmpAddSched(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixjmpaddsched";
11 | this.Description = "Not-taken Jump + Add Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " add %rdi, %r11";
20 | string postLoadInstr2 = " add %rsi, %r11";
21 | string[] unrolledJumps = new string[2];
22 | unrolledJumps[0] = $" cmp %rdi, %rsi\n je jumpsched_reallybadthing_jadd";
23 | unrolledJumps[1] = " add %r11, %r15";
24 |
25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
26 |
27 | sb.AppendLine("jumpsched_reallybadthing_jadd:");
28 | sb.AppendLine(" int3");
29 | }
30 |
31 | public override void GenerateX86NasmAsm(StringBuilder sb)
32 | {
33 | string[] unrolledJumps = new string[2];
34 | unrolledJumps[0] = " cmp rdi, rsi\n je jumpsched_reallybadthing_jadd";
35 | unrolledJumps[1] = " add r15, rdi";
36 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
37 |
38 | sb.AppendLine("jumpsched_reallybadthing_jadd:");
39 | sb.AppendLine(" int3");
40 | }
41 |
42 | public override void GenerateArmAsm(StringBuilder sb)
43 | {
44 | string[] unrolledJumps = new string[2];
45 | //string initInstrs = "jumpsched_reallybadthing_jadd:";
46 | unrolledJumps[0] = " add w14, w13, w25";
47 | unrolledJumps[1] = " cbz w14, jumpsched_reallybadthing_jadd";
48 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
49 |
50 | sb.AppendLine("jumpsched_reallybadthing_jadd:");
51 | sb.AppendLine(" .word 0xf7f0a000");
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/AsmGen/tests/JumpAddSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class JumpAddSchedTest : UarchTest
6 | {
7 | public JumpAddSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "jmpaddsched";
11 | this.Description = "Not-taken Jump + Add Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " add %rdi, %r11";
20 | string postLoadInstr2 = " add %rsi, %r11";
21 | string[] unrolledJumps = new string[2];
22 | unrolledJumps[0] = $" cmp %rdi, %rsi\n je jumpsched_reallybadthing_jadd";
23 | unrolledJumps[1] = " add %r11, %r15";
24 |
25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
26 |
27 | sb.AppendLine("jumpsched_reallybadthing_jadd:");
28 | sb.AppendLine(" int3");
29 | }
30 |
31 | public override void GenerateX86NasmAsm(StringBuilder sb)
32 | {
33 | string[] unrolledJumps = new string[2];
34 | unrolledJumps[0] = " cmp rdi, rsi\n je jumpsched_reallybadthing_jadd";
35 | unrolledJumps[1] = " add r15, rdi";
36 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
37 |
38 | sb.AppendLine("jumpsched_reallybadthing_jadd:");
39 | sb.AppendLine(" int3");
40 | }
41 |
42 | public override void GenerateArmAsm(StringBuilder sb)
43 | {
44 | string[] unrolledJumps = new string[2];
45 | //string initInstrs = "jumpsched_reallybadthing_jadd:";
46 | unrolledJumps[0] = " cmp x25, x26\n b.eq jumpsched_reallybadthing_jadd";
47 | unrolledJumps[1] = " add w14, w13, w25";
48 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
49 |
50 | sb.AppendLine("jumpsched_reallybadthing_jadd:");
51 | sb.AppendLine(" .word 0xf7f0a000");
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixBtsMulSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixMulBtsSchedTest : UarchTest
6 | {
7 | public MixMulBtsSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixmulbtssched";
11 | this.Description = "Mixed Multiply/BTS Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] instrs1 = new string[2];
20 | instrs1[0] = " bts %rdi, %r15";
21 | instrs1[1] = " imul %edi, %r12d";
22 | string[] instrs2 = new string[2];
23 | instrs2[0] = " bts %rdi, %r15";
24 | instrs2[1] = " imul %esi, %r11d";
25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false);
26 | }
27 |
28 | public override void GenerateX86NasmAsm(StringBuilder sb)
29 | {
30 | string postLoadInstr1 = " mov r15, rdi";
31 | string postLoadInstr2 = " mov r15, rsi";
32 | string[] instrs = new string[2];
33 | instrs[0] = " bts r15, rdi";
34 | instrs[1] = " imul r12d, edi";
35 | string[] instrs1 = new string[2];
36 | instrs1[0] = " bts r15, rsi";
37 | instrs1[1] = " imul r11d, esi";
38 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
39 | }
40 |
41 | public override void GenerateArmAsm(StringBuilder sb)
42 | {
43 | string postLoadInstr1 = " mov x15, x25";
44 | string postLoadInstr2 = " mov x15, x26";
45 | string[] instrs = new string[2];
46 | instrs[0] = " ror x15, x15, #1";
47 | instrs[1] = " mul x12, x12, x25";
48 | string[] instrs1 = new string[2];
49 | instrs1[0] = " ror x15, x15, #1";
50 | instrs1[1] = " mul x11, x11, x26";
51 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/AsmGen/DataFiles/VsIndirectBranchFunction.c:
--------------------------------------------------------------------------------
1 | // similar but for indirect branch test
2 | // needs indirectBranchTestFuncArr generated
3 | // mode:
4 | // 0 - cycle through targets
5 | // 1 - random target selection
6 | // 2 - jump to middle
7 | float runIndirectBranchTest(uint32_t branchCountIdx, uint32_t targetCountIdx, uint32_t mode) {
8 | struct timeb start, end;
9 | uint32_t branchCount = indirectBranchCounts[branchCountIdx];
10 | uint32_t targetCount = indirectBranchTargetCounts[targetCountIdx];
11 | uint64_t iterations = 80000000 / branchCount;
12 | uint64_t(*branchtestFunc)(uint64_t, uint32_t**, uint32_t, uint64_t**) = indirectBranchTestFuncArr[branchCountIdx][targetCountIdx];
13 |
14 | // generate an array containing jump target indexes for every branch
15 | uint32_t** testArrToArr = (uint32_t**)malloc(sizeof(uint32_t*) * branchCount);
16 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) {
17 | uint32_t* testArr = (uint32_t*)malloc(sizeof(uint32_t) * targetCount);
18 | if (mode == 1)
19 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = rand() % targetCount;
20 | else if (mode == 0)
21 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = i;
22 | else if (mode == 2)
23 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = targetCount / 2;
24 | testArrToArr[testArrIdx] = testArr;
25 | }
26 |
27 | // each branch needs a jump table
28 | uint64_t** jumpTables = (uint64_t**)malloc(sizeof(uint64_t*) * branchCount);
29 | for (int jumpTableIdx = 0; jumpTableIdx < branchCount; jumpTableIdx++)
30 | {
31 | uint64_t* jumpTable = (uint64_t*)malloc(sizeof(uint64_t) * targetCount);
32 | jumpTables[jumpTableIdx] = jumpTable;
33 | }
34 |
35 | ftime(&start);
36 | // uint64_t iterations, uint32_t **arr, uint32_t arrLen, uint64_t **scratch
37 | branchtestFunc(iterations, testArrToArr, targetCount, jumpTables);
38 | ftime(&end);
39 | uint64_t time_diff_ms = 1000 * (end.time - start.time) + (end.millitm - start.millitm);
40 | float latency = 1e6 * (float)time_diff_ms / (float)iterations;
41 |
42 | // give result in latency per branch
43 | latency = latency / branchCount;
44 |
45 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) free(testArrToArr[testArrIdx]);
46 | free(testArrToArr);
47 | for (int jumpTableIdx = 0; jumpTableIdx < branchCount; jumpTableIdx++) free(jumpTables[jumpTableIdx]);
48 | free(jumpTables);
49 | return latency;
50 | }
51 |
--------------------------------------------------------------------------------
/CoreClockChecker/BoostClockChecker.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | extern uint64_t clktsctest(uint64_t iterations) __attribute((ms_abi));
10 |
11 | int main(int argc, char *argv[]) {
12 | struct timeval startTv, endTv;
13 | uint64_t iterations = 500000, samples = 100;
14 | unsigned int sleepSeconds = 5;
15 | time_t time_diff_ms;
16 |
17 | for (int argIdx = 1; argIdx < argc; argIdx++) {
18 | if (*(argv[argIdx]) == '-') {
19 | char *arg = argv[argIdx] + 1;
20 | if (strncmp(arg, "samples", 7) == 0) {
21 | argIdx++;
22 | samples = atol(argv[argIdx]);
23 | } else if (strncmp(arg, "iterations", 10) == 0) {
24 | argIdx++;
25 | iterations = atol(argv[argIdx]);
26 | } else if (strncmp(arg, "sleep", 5) == 0) {
27 | argIdx++;
28 | sleepSeconds = atoi(argv[argIdx]);
29 | }
30 | }
31 | }
32 |
33 | sleep(sleepSeconds);
34 |
35 | uint64_t *measuredTscs = malloc(samples * sizeof(uint64_t));
36 | for (uint64_t sampleIdx = 0; sampleIdx < samples; sampleIdx++) {
37 | uint64_t elapsedTsc = clktsctest(iterations);
38 | measuredTscs[sampleIdx] = elapsedTsc;
39 | }
40 |
41 | fprintf(stderr, "Used %lu samples\n", samples);
42 | fprintf(stderr, "Used %lu iterations\n", iterations);
43 | // figure out TSC to real time ratio
44 | fprintf(stderr, "Checking TSC ratio...\n");
45 | uint64_t iterationsHi = 8e9; // should be a couple seconds at least?
46 | gettimeofday(&startTv, NULL);
47 | uint64_t referenceElapsedTsc = clktsctest(iterationsHi);
48 | gettimeofday(&endTv, NULL);
49 | time_diff_ms = 1000 * (endTv.tv_sec - startTv.tv_sec) + ((endTv.tv_usec - startTv.tv_usec) / 1000);
50 | float tsc_per_ms = (float)referenceElapsedTsc / (float)time_diff_ms;
51 | float tsc_per_ns = tsc_per_ms / 1e6;
52 | fprintf(stderr, "TSC = %lu, elapsed ms = %lu\n", referenceElapsedTsc, time_diff_ms);
53 | fprintf(stderr, "TSC per ms: %f, TSC per ns: %f\n", tsc_per_ms, tsc_per_ns);
54 |
55 | printf("Time (ms), Clk (GHz), TSC\n");
56 | float elapsedTime = 0;
57 | for (uint64_t sampleIdx = 0; sampleIdx < samples; sampleIdx++) {
58 | // (tsc / ms) * tsc = 1 / ms
59 | float elapsedTimeMs = measuredTscs[sampleIdx] / tsc_per_ms;
60 | elapsedTime += elapsedTimeMs;
61 | float latency = 1e6 * elapsedTimeMs / (float)iterations;
62 | float addsPerNs = 1 / latency;
63 | printf("%f,%f,%lu\n", elapsedTime, addsPerNs, measuredTscs[sampleIdx]);
64 | }
65 |
66 | return 0;
67 | }
68 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixPdepMulSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixPdepMulSchedTest : UarchTest
6 | {
7 | public MixPdepMulSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixpdepmulsched";
11 | this.Description = "Mixed Multiply/PDEP Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] instrs1 = new string[2];
20 | instrs1[0] = " pdep %rdi, %r14, %r15";
21 | instrs1[1] = " imul %edi, %r12d";
22 | string[] instrs2 = new string[2];
23 | instrs2[0] = " pdep %rsi, %r14, %r15";
24 | instrs2[1] = " imul %esi, %r11d";
25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false);
26 | }
27 |
28 | public override void GenerateX86NasmAsm(StringBuilder sb)
29 | {
30 | string postLoadInstr1 = " mov r15, rdi";
31 | string postLoadInstr2 = " mov r15, rsi";
32 | string[] instrs = new string[2];
33 | instrs[0] = " pdep r15, rdi, r14";
34 | instrs[1] = " imul r12d, edi";
35 | string[] instrs1 = new string[2];
36 | instrs1[0] = " pdep r15, rsi, r14";
37 | instrs1[1] = " imul r11d, esi";
38 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
39 | }
40 |
41 | public override void GenerateArmAsm(StringBuilder sb)
42 | {
43 | // todo, or not. no lea on aarch64
44 | string postLoadInstr1 = " mov x15, x25";
45 | string postLoadInstr2 = " mov x15, x26";
46 | string[] instrs = new string[2];
47 | instrs[0] = " ror x15, x15, #1";
48 | instrs[1] = " mul x12, x12, x25";
49 | string[] instrs1 = new string[2];
50 | instrs1[0] = " ror x15, x15, #1";
51 | instrs1[1] = " mul x11, x11, x26";
52 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixLeaMulSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixLeaMulSchedTest : UarchTest
6 | {
7 | public MixLeaMulSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixleamulsched";
11 | this.Description = "Mixed Multiply/lea Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] instrs1 = new string[2];
20 | instrs1[0] = " lea (%rdx,%rdi,8), %r15";
21 | instrs1[1] = " imul %edi, %r12d";
22 | string[] instrs2 = new string[2];
23 | instrs2[0] = " lea (%rdx,%rsi,8), %r15";
24 | instrs2[1] = " imul %esi, %r11d";
25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false);
26 | }
27 |
28 | public override void GenerateX86NasmAsm(StringBuilder sb)
29 | {
30 | string postLoadInstr1 = " mov r15, rdi";
31 | string postLoadInstr2 = " mov r15, rsi";
32 | string[] instrs = new string[2];
33 | instrs[0] = " lea r15, [rdx + rdi * 8]";
34 | instrs[1] = " imul r12d, edi";
35 | string[] instrs1 = new string[2];
36 | instrs1[0] = " lea r15, [rdx + rsi * 8]";
37 | instrs1[1] = " imul r11d, esi";
38 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
39 | }
40 |
41 | public override void GenerateArmAsm(StringBuilder sb)
42 | {
43 | // todo, or not. no lea on aarch64
44 | string postLoadInstr1 = " mov x15, x25";
45 | string postLoadInstr2 = " mov x15, x26";
46 | string[] instrs = new string[2];
47 | instrs[0] = " ror x15, x15, #1";
48 | instrs[1] = " mul x12, x12, x25";
49 | string[] instrs1 = new string[2];
50 | instrs1[0] = " ror x15, x15, #1";
51 | instrs1[1] = " mul x11, x11, x26";
52 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/AsmGen/DataFiles/GccIndirectBranchFunction.c:
--------------------------------------------------------------------------------
1 | // similar but for indirect branch test
2 | // needs indirectBranchTestFuncArr generated
3 | // mode:
4 | // 0 - cycle through targets
5 | // 1 - random target selection
6 | // 2 - jump to middle
7 | float runIndirectBranchTest(uint32_t branchCountIdx, uint32_t targetCountIdx, uint32_t mode) {
8 | struct timeval startTv, endTv;
9 | struct timezone startTz, endTz;
10 | uint32_t branchCount = indirectBranchCounts[branchCountIdx];
11 | uint32_t targetCount = indirectBranchTargetCounts[targetCountIdx];
12 | uint64_t iterations = 80000000 / branchCount;
13 | uint64_t(*branchtestFunc)(uint64_t, uint32_t**, uint32_t, uint64_t **) __attribute((sysv_abi)) = indirectBranchTestFuncArr[branchCountIdx][targetCountIdx];
14 |
15 | // generate an array containing jump target indexes for every branch
16 | uint32_t** testArrToArr = (uint32_t**)malloc(sizeof(uint32_t*) * branchCount);
17 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) {
18 | uint32_t* testArr = (uint32_t*)malloc(sizeof(uint32_t) * targetCount);
19 | if (mode == 1)
20 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = rand() % targetCount;
21 | else if (mode == 0)
22 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = i;
23 | else if (mode == 2)
24 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = targetCount / 2;
25 | testArrToArr[testArrIdx] = testArr;
26 | }
27 |
28 | // each branch needs a jump table
29 | uint64_t** jumpTables = (uint64_t**)malloc(sizeof(uint64_t*) * branchCount);
30 | for (int jumpTableIdx = 0; jumpTableIdx < branchCount; jumpTableIdx++)
31 | {
32 | uint64_t* jumpTable = (uint64_t*)malloc(sizeof(uint64_t) * targetCount);
33 | jumpTables[jumpTableIdx] = jumpTable;
34 | }
35 |
36 | gettimeofday(&startTv, &startTz);
37 | // uint64_t iterations, uint32_t **arr, uint32_t arrLen, uint64_t **scratch
38 | branchtestFunc(iterations, testArrToArr, targetCount, jumpTables);
39 | gettimeofday(&endTv, &endTz);
40 | uint64_t time_diff_ms = 1000 * (endTv.tv_sec - startTv.tv_sec) + ((endTv.tv_usec - startTv.tv_usec) / 1000);
41 | float latency = 1e6 * (float)time_diff_ms / (float)iterations;
42 |
43 | // give result in latency per branch
44 | latency = latency / branchCount;
45 |
46 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) free(testArrToArr[testArrIdx]);
47 | free(testArrToArr);
48 | for (int jumpTableIdx = 0; jumpTableIdx < branchCount; jumpTableIdx++) free(jumpTables[jumpTableIdx]);
49 | free(jumpTables);
50 | return latency;
51 | }
52 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixAddJump21SchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixAddJmp21Sched : UarchTest
6 | {
7 | public MixAddJmp21Sched(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixaddjmp21sched";
11 | this.Description = "Not-taken Jump + Add Scheduler Capacity Test, 1:2 ratio";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " add %rdi, %r11";
20 | string postLoadInstr2 = " add %rsi, %r11";
21 | string[] unrolledJumps = new string[3];
22 | unrolledJumps[0] = $" cmp %rdi, %rsi\n je jumpsched21_reallybadthing_jadd";
23 | unrolledJumps[1] = " add %r11, %r15";
24 | unrolledJumps[2] = " add %r11, %r14";
25 |
26 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
27 |
28 | sb.AppendLine("jumpsched21_reallybadthing_jadd:");
29 | sb.AppendLine(" int3");
30 | }
31 |
32 | public override void GenerateX86NasmAsm(StringBuilder sb)
33 | {
34 | string[] unrolledJumps = new string[3];
35 | unrolledJumps[0] = " cmp rdi, rsi\n je jumpsched21_reallybadthing_jadd";
36 | unrolledJumps[1] = " add r15, rdi";
37 | unrolledJumps[2] = " add r14, rdi";
38 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
39 |
40 | sb.AppendLine("jumpsched21_reallybadthing_jadd:");
41 | sb.AppendLine(" int3");
42 | }
43 |
44 | public override void GenerateArmAsm(StringBuilder sb)
45 | {
46 | string[] unrolledJumps = new string[3];
47 | //string initInstrs = "jumpsched_reallybadthing_jadd:";
48 | unrolledJumps[0] = " add x14, x13, x25";
49 | unrolledJumps[1] = " add x12, x13, x25";
50 | unrolledJumps[2] = " cbz w12, jumpsched21_reallybadthing_jadd";
51 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
52 |
53 | sb.AppendLine("jumpsched21_reallybadthing_jadd:");
54 | sb.AppendLine(" .word 0xf7f0a000");
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/AsmGen/tests/MxcsrTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MxcsrTest : UarchTest
6 | {
7 | public MxcsrTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mxcsrrename";
11 | this.Description = "MXCSR renamed registers";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] setMxcsrInstrs = new string[2];
20 | setMxcsrInstrs[0] = " mov $0x1f80, %r15\n mov %r15, (%r8)\n ldmxcsr (%r8)\n addss %xmm0, %xmm1"; // default
21 | setMxcsrInstrs[1] = " mov $0x9fc0, %r15\n mov %r15, (%r8)\n ldmxcsr (%r8)\n addss %xmm0, %xmm1"; // set denormals are zero, flush to zero
22 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, setMxcsrInstrs, setMxcsrInstrs, false);
23 | }
24 |
25 | public override void GenerateX86NasmAsm(StringBuilder sb)
26 | {
27 | string[] setMxcsrInstrs = new string[2];
28 | setMxcsrInstrs[0] = " mov r15, 0x1f80\n mov [r8], r15\n ldmxcsr [r8]\n addss xmm0, xmm1"; // default
29 | setMxcsrInstrs[1] = " mov r15, 0x9fc0\n mov [r8], r15\n ldmxcsr [r8]\n addss xmm0, xmm1"; // set denormals are zero, flush to zero
30 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, setMxcsrInstrs, setMxcsrInstrs, false);
31 | }
32 |
33 | // todo
34 | public override void GenerateArmAsm(StringBuilder sb)
35 | {
36 | // read FPCR into x15, set x14 = flush denormals to zero enabled, x15 = flush denormals to zero disabled
37 | // x12 = mask with all bits set except bit 24 (flush to zero) - bitwise AND to unset bit 24
38 | // x13 = just bit 24 set with all other bits zero - bitwise OR to set bit 24
39 | string initInstrs = " mrs x15, fpcr\n mov x13, 1\n lsl x13, x13, 24\n neg x12, x13\n orr x14, x15, x13\n and x15, x15, x12";
40 | string[] setFpcrInstrs = new string[2];
41 | setFpcrInstrs[0] = " msr fpcr, x15\n fadd s2, s2, s3\n";
42 | setFpcrInstrs[1] = " msr fpcr, x14\n fadd s4, s4, s5\n";
43 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, setFpcrInstrs, setFpcrInstrs, false, initInstrs: initInstrs);
44 | }
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixRorBtsSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixRorBtsSchedTest : UarchTest
6 | {
7 | public MixRorBtsSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixrorbtssched";
11 | this.Description = "Mixed BTS/ROR Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " mov %rdi, %r15";
20 | string postLoadInstr2 = " mov %rsi, %r15";
21 | string[] instrs1 = new string[2];
22 | instrs1[0] = " ror $1, %r15";
23 | instrs1[1] = " bts %rdi, %r12";
24 | string[] instrs2 = new string[2];
25 | instrs2[0] = " ror $1, %r15";
26 | instrs2[1] = " bts %rsi, %r11";
27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
28 | }
29 |
30 | public override void GenerateX86NasmAsm(StringBuilder sb)
31 | {
32 | string postLoadInstr1 = " mov r15, rdi";
33 | string postLoadInstr2 = " mov r15, rsi";
34 | string[] instrs = new string[2];
35 | instrs[0] = " ror r15, 1";
36 | instrs[1] = " bts r12d, edi";
37 | string[] instrs1 = new string[2];
38 | instrs1[0] = " ror r15, 1";
39 | instrs1[1] = " bts r11d, esi";
40 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
41 | }
42 |
43 | public override void GenerateArmAsm(StringBuilder sb)
44 | {
45 | // todo
46 | string postLoadInstr1 = " mov x15, x25";
47 | string postLoadInstr2 = " mov x15, x26";
48 | string[] instrs = new string[2];
49 | instrs[0] = " ror x15, x15, #1";
50 | instrs[1] = " mul x12, x12, x25";
51 | string[] instrs1 = new string[2];
52 | instrs1[0] = " ror x15, x15, #1";
53 | instrs1[1] = " mul x11, x11, x26";
54 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/AsmGen/tests/MmxRfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MmxRfTest : UarchTest
6 | {
7 | public MmxRfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mmxrf";
11 | this.Description = "64-bit MMX RF Capacity Test. x86 only";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string initInstrs = " movq (%rdx), %mm0\n" +
20 | " movq 8(%rdx), %mm1\n" +
21 | " movq 16(%rdx), %mm2\n" +
22 | " movq 24(%rdx), %mm3\n" +
23 | " movq 32(%rdx), %mm4\n";
24 |
25 | string[] unrolledAdds = new string[4];
26 | unrolledAdds[0] = " paddw %mm0, %mm1";
27 | unrolledAdds[1] = " paddw %mm0, %mm2";
28 | unrolledAdds[2] = " paddw %mm0, %mm3";
29 | unrolledAdds[3] = " paddw %mm0, %mm4";
30 |
31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, initInstrs: initInstrs);
32 | }
33 |
34 | public override void GenerateX86NasmAsm(StringBuilder sb)
35 | {
36 | string initInstrs = " movq mm0, [rdx]\n" +
37 | " movq mm1, [rdx + 8]\n" +
38 | " movq mm2, [rdx + 16]\n" +
39 | " movq mm3, [rdx + 24]\n" +
40 | " movq mm4, [rdx + 32]\n";
41 |
42 | string[] unrolledAdds = new string[4];
43 | unrolledAdds[0] = " paddw mm1, mm0";
44 | unrolledAdds[1] = " paddw mm2, mm0";
45 | unrolledAdds[2] = " paddw mm3, mm0";
46 | unrolledAdds[3] = " paddw mm4, mm0";
47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, initInstrs: initInstrs);
48 | }
49 |
50 | public override void GenerateArmAsm(StringBuilder sb)
51 | {
52 | string[] unrolledAdds = new string[4];
53 | unrolledAdds[0] = " add v15.2s, v15.2s, v19.2s";
54 | unrolledAdds[1] = " add v16.2s, v16.2s, v19.2s";
55 | unrolledAdds[2] = " add v17.2s, v17.2s, v19.2s";
56 | unrolledAdds[3] = " add v18.2s, v18.2s, v19.2s";
57 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
58 | }
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixRorMulSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixMulRorSchedTest : UarchTest
6 | {
7 | public MixMulRorSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixmulrorsched";
11 | this.Description = "Mixed Multiply/Rotate Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " mov %rdi, %r15";
20 | string postLoadInstr2 = " mov %rsi, %r15";
21 | string[] instrs1 = new string[2];
22 | instrs1[0] = " ror $1, %r15";
23 | instrs1[1] = " imul %edi, %r12d";
24 | string[] instrs2 = new string[2];
25 | instrs2[0] = " ror $1, %r15";
26 | instrs2[1] = " imul %esi, %r11d";
27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
28 | }
29 |
30 | public override void GenerateX86NasmAsm(StringBuilder sb)
31 | {
32 | string postLoadInstr1 = " mov r15, rdi";
33 | string postLoadInstr2 = " mov r15, rsi";
34 | string[] instrs = new string[2];
35 | instrs[0] = " ror r15, 1";
36 | instrs[1] = " imul r12d, edi";
37 | string[] instrs1 = new string[2];
38 | instrs1[0] = " ror r15, 1";
39 | instrs1[1] = " imul r11d, esi";
40 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
41 | }
42 |
43 | public override void GenerateArmAsm(StringBuilder sb)
44 | {
45 | string postLoadInstr1 = " mov x15, x25";
46 | string postLoadInstr2 = " mov x15, x26";
47 | string[] instrs = new string[2];
48 | instrs[0] = " ror x15, x15, #1";
49 | instrs[1] = " mul x12, x12, x25";
50 | string[] instrs1 = new string[2];
51 | instrs1[0] = " ror x15, x15, #1";
52 | instrs1[1] = " mul x11, x11, x26";
53 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/AsmGen/tests/NopLoopTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class NopLoopTest : UarchTest
6 | {
7 | ///
8 | ///
9 | ///
10 | /// must be greater than 2
11 | ///
12 | ///
13 | public NopLoopTest(int high, int step)
14 | {
15 | this.Counts = UarchTestHelpers.GenerateCountArray(3, high, step);
16 | this.Prefix = "noploop";
17 | this.Description = $"NOP throughput for various loop sizes";
18 | this.FunctionDefinitionParameters = "uint64_t iterations";
19 | this.GetFunctionCallParameters = "structIterations";
20 | this.DivideTimeByCount = true;
21 | }
22 |
23 | public override void GenerateX86GccAsm(StringBuilder sb)
24 | {
25 | for (int i = 0; i < Counts.Length; i++)
26 | {
27 | string funcName = this.Prefix + this.Counts[i];
28 | sb.AppendLine(funcName + ":");
29 |
30 | // count dec, jnz as instructions in the loop
31 | for (int nopIdx = 0; nopIdx < this.Counts[i] - 2; nopIdx++) sb.AppendLine(" nop");
32 | sb.AppendLine(" dec %rdi");
33 | sb.AppendLine(" jnz " + funcName);
34 | sb.AppendLine(" ret");
35 | }
36 | }
37 |
38 | public override void GenerateX86NasmAsm(StringBuilder sb)
39 | {
40 | for (int i = 0; i < Counts.Length; i++)
41 | {
42 | string funcName = this.Prefix + this.Counts[i];
43 | sb.AppendLine(funcName + ":");
44 |
45 | // count dec, jnz as instructions in the loop
46 | for (int nopIdx = 0; nopIdx < this.Counts[i] - 2; nopIdx++) sb.AppendLine(" nop");
47 | sb.AppendLine(" dec rcx");
48 | sb.AppendLine(" jnz " + funcName);
49 | sb.AppendLine(" ret");
50 | }
51 | }
52 |
53 | public override void GenerateArmAsm(StringBuilder sb)
54 | {
55 | for (int i = 0; i < Counts.Length; i++)
56 | {
57 | string funcName = this.Prefix + this.Counts[i];
58 | sb.AppendLine(funcName + ":");
59 |
60 | // count dec, jnz as instructions in the loop
61 | for (int nopIdx = 0; nopIdx < this.Counts[i] - 2; nopIdx++) sb.AppendLine(" nop");
62 | sb.AppendLine(" sub x0, x0, 1");
63 | sb.AppendLine(" cbnz x0, " + funcName);
64 | sb.AppendLine(" ret");
65 | }
66 | }
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/AsmGen/tests/FaddIntAddSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class FaddIntAddSchedTest : UarchTest
6 | {
7 | public FaddIntAddSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixfaddintaddsched";
11 | this.Description = "Mixed FP/Integer Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // xmm0 is dependent on ptr chasing load
20 | string[] unrolledAdds = new string[4];
21 | unrolledAdds[0] = " addss %xmm0, %xmm1";
22 | unrolledAdds[1] = " add %edi, %r11d";
23 | unrolledAdds[2] = " addss %xmm0, %xmm3";
24 | unrolledAdds[3] = " add %edi, %r12d";
25 |
26 | string[] unrolledAdds1 = new string[4];
27 | unrolledAdds1[0] = " addss %xmm0, %xmm1";
28 | unrolledAdds1[1] = " add %esi, %r14d";
29 | unrolledAdds1[2] = " addss %xmm0, %xmm3";
30 | unrolledAdds1[3] = " add %esi, %r15d";
31 |
32 | string rdicvt = "cvtsi2ss %rdi, %xmm0";
33 | string rsicvt = "cvtsi2ss %rsi, %xmm0";
34 |
35 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds,
36 | includePtrChasingLoads: false, postLoadInstrs1: rdicvt, postLoadInstrs2: rsicvt);
37 | }
38 |
39 | // todo....
40 | public override void GenerateX86NasmAsm(StringBuilder sb)
41 | {
42 | string[] unrolledAdds = new string[4];
43 | unrolledAdds[0] = " addss xmm1, xmm0";
44 | unrolledAdds[1] = " add r11d, edi";
45 | unrolledAdds[2] = " addss xmm3, xmm0";
46 | unrolledAdds[3] = " add r12d, edi";
47 | UarchTestHelpers.GenerateX86NasmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
48 | }
49 |
50 | public override void GenerateArmAsm(StringBuilder sb)
51 | {
52 | string[] unrolledAdds = new string[4];
53 | unrolledAdds[0] = " fadd s17, s17, s16";
54 | unrolledAdds[1] = " fadd s18, s18, s16";
55 | unrolledAdds[2] = " fadd s19, s19, s16";
56 | unrolledAdds[3] = " fadd s20, s20, s16";
57 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
58 | }
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/AsmGen/tests/Add256RfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Add256RfTest : UarchTest
6 | {
7 | public Add256RfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "add256rf";
11 | this.Description = "256-bit Integer Add RF Capacity Test - 128-bit fadd on ARM";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string initInstrs = " vmovdqu (%r8), %ymm0\n" +
20 | " vmovdqa %ymm0, %ymm1\n" +
21 | " vmovdqa %ymm0, %ymm2\n" +
22 | " vmovdqa %ymm0, %ymm3\n" +
23 | " vmovdqa %ymm0, %ymm4\n";
24 |
25 | string[] unrolledAdds = new string[4];
26 | unrolledAdds[0] = " vpaddd %ymm0, %ymm1, %ymm1";
27 | unrolledAdds[1] = " vpaddd %ymm0, %ymm2, %ymm2";
28 | unrolledAdds[2] = " vpaddd %ymm0, %ymm3, %ymm3";
29 | unrolledAdds[3] = " vpaddd %ymm0, %ymm4, %ymm3";
30 |
31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, initInstrs: initInstrs);
32 | }
33 |
34 | public override void GenerateX86NasmAsm(StringBuilder sb)
35 | {
36 | string initInstrs = " vmovdqu ymm0, [r8]\n" +
37 | " vmovdqa ymm1, ymm0\n" +
38 | " vmovdqa ymm2, ymm0\n" +
39 | " vmovdqa ymm3, ymm0\n" +
40 | " vmovdqa ymm4, ymm0\n";
41 |
42 | string[] unrolledAdds = new string[4];
43 | unrolledAdds[0] = " vpaddd ymm1, ymm1, ymm0";
44 | unrolledAdds[1] = " vpaddd ymm2, ymm2, ymm0";
45 | unrolledAdds[2] = " vpaddd ymm3, ymm3, ymm0";
46 | unrolledAdds[3] = " vpaddd ymm4, ymm4, ymm0";
47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, initInstrs: initInstrs);
48 | }
49 |
50 | public override void GenerateArmAsm(StringBuilder sb)
51 | {
52 | string[] unrolledAdds = new string[4];
53 | unrolledAdds[0] = " fadd v15.4s, v15.4s, v19.4s";
54 | unrolledAdds[1] = " fadd v16.4s, v16.4s, v19.4s";
55 | unrolledAdds[2] = " fadd v17.4s, v17.4s, v19.4s";
56 | unrolledAdds[3] = " fadd v18.4s, v18.4s, v19.4s";
57 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds);
58 | }
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixIntFpRf13Test.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixIntFp13RfTest : UarchTest
6 | {
7 | public MixIntFp13RfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixintfp13rf";
11 | this.Description = "Mix of integer and FP register file, 1:3 ratio";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string initInstrs = " movss (%r8), %xmm1\n" +
20 | " movss 4(%r8), %xmm2\n" +
21 | " movss 8(%r8), %xmm3\n" +
22 | " movss 12(%r8), %xmm4\n" +
23 | " movss 16(%r8), %xmm5\n";
24 |
25 | string[] instrs = new string[4];
26 | instrs[0] = "add %r15, %r14";
27 | instrs[1] = "addss %xmm1, %xmm2";
28 | instrs[2] = "addss %xmm1, %xmm3";
29 | instrs[3] = "addss %xmm1, %xmm4";
30 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs);
31 | }
32 |
33 | public override void GenerateX86NasmAsm(StringBuilder sb)
34 | {
35 | string initInstrs = " movss xmm1, [r8]\n" +
36 | " movss xmm2, [r8 + 4]\n" +
37 | " movss xmm3, [r8 + 8]\n" +
38 | " movss xmm4, [r8 + 12]\n" +
39 | " movss xmm5, [r8 + 16]\n";
40 |
41 | string[] instrs = new string[4];
42 | instrs[0] = "add r14, r15";
43 | instrs[1] = "addss xmm2, xmm1";
44 | instrs[2] = "addss xmm3, xmm1";
45 | instrs[3] = "addss xmm4, xmm1";
46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs);
47 | }
48 |
49 | public override void GenerateArmAsm(StringBuilder sb)
50 | {
51 | string initInstrs = " ldr s17, [x2]\n" +
52 | " ldr s18, [x2, 4]\n" +
53 | " ldr s19, [x2, 8]\n" +
54 | " ldr s20, [x2, 12]\n" +
55 | " ldr s21, [x2, 16]\n";
56 |
57 | string[] instrs = new string[4];
58 | instrs[0] = " add x15, x15, x11";
59 | instrs[1] = " fadd s18, s18, s17";
60 | instrs[2] = " fadd s19, s19, s17";
61 | instrs[3] = " fadd s20, s20, s17";
62 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs);
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/AsmGen/tests/BtsSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class BtsSchedTest : UarchTest
6 | {
7 | public BtsSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "btssched";
11 | this.Description = "Bit Test + Set CF Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " mov %rdi, %r15";
20 | string postLoadInstr2 = " mov %rsi, %r15";
21 | string[] instrs = new string[4];
22 | instrs[0] = " bts %r14, %r15";
23 | instrs[1] = " bts %r13, %r15";
24 | instrs[2] = " bts %r12, %r15";
25 | instrs[3] = " bts %r11, %r15";
26 |
27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
28 | }
29 |
30 | public override void GenerateX86NasmAsm(StringBuilder sb)
31 | {
32 | // todo im tired
33 | string[] unrolledAdds = new string[4];
34 | unrolledAdds[0] = " add r15, rdi";
35 | unrolledAdds[1] = " add r14, rdi";
36 | unrolledAdds[2] = " add r13, rdi";
37 | unrolledAdds[3] = " add r12, rdi";
38 |
39 | string[] unrolledAdds1 = new string[4];
40 | unrolledAdds1[0] = " add r15, rsi";
41 | unrolledAdds1[1] = " add r14, rsi";
42 | unrolledAdds1[2] = " add r13, rsi";
43 | unrolledAdds1[3] = " add r12, rsi";
44 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
45 | }
46 |
47 | public override void GenerateArmAsm(StringBuilder sb)
48 | {
49 | string[] unrolledAdds = new string[4];
50 | unrolledAdds[0] = " add x15, x15, x25";
51 | unrolledAdds[1] = " add x14, x14, x25";
52 | unrolledAdds[2] = " add x13, x13, x25";
53 | unrolledAdds[3] = " add x12, x12, x25";
54 |
55 | string[] unrolledAdds1 = new string[4];
56 | unrolledAdds1[0] = " add x15, x15, x26";
57 | unrolledAdds1[1] = " add x14, x14, x26";
58 | unrolledAdds1[2] = " add x13, x13, x26";
59 | unrolledAdds1[3] = " add x12, x12, x26";
60 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/AsmGen/tests/PdepSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class PdepSchedTest : UarchTest
6 | {
7 | public PdepSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "pdepsched";
11 | this.Description = "PDEP Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] instrs1 = new string[4];
20 | instrs1[0] = " pdep %rdi, %r14, %r15";
21 | instrs1[1] = " pdep %rdi, %r13, %r15";
22 | instrs1[2] = " pdep %rdi, %r12, %r15";
23 | instrs1[3] = " pdep %rdi, %r11, %r15";
24 |
25 | string[] instrs2 = new string[4];
26 | instrs2[0] = " pdep %rsi, %r14, %r15";
27 | instrs2[1] = " pdep %rsi, %r13, %r15";
28 | instrs2[2] = " pdep %rsi, %r12, %r15";
29 | instrs2[3] = " pdep %rsi, %r11, %r15";
30 |
31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false);
32 | }
33 |
34 | public override void GenerateX86NasmAsm(StringBuilder sb)
35 | {
36 | string[] instrs = new string[4];
37 | instrs[0] = " pdep r15, rdi, r14";
38 | instrs[1] = " pdep r15, rdi, r13";
39 | instrs[2] = " pdep r15, rdi, r12";
40 | instrs[3] = " pdep r15, rdi, r11";
41 |
42 | string[] instrs1 = new string[4];
43 | instrs1[0] = " pdep r15, rsi, r14";
44 | instrs1[1] = " pdep r15, rsi, r13";
45 | instrs1[2] = " pdep r15, rsi, r12";
46 | instrs1[3] = " pdep r15, rsi, r11";
47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false);
48 | }
49 |
50 | public override void GenerateArmAsm(StringBuilder sb)
51 | {
52 | string[] unrolledAdds = new string[4];
53 | unrolledAdds[0] = " add x15, x15, x25";
54 | unrolledAdds[1] = " add x14, x14, x25";
55 | unrolledAdds[2] = " add x13, x13, x25";
56 | unrolledAdds[3] = " add x12, x12, x25";
57 |
58 | string[] unrolledAdds1 = new string[4];
59 | unrolledAdds1[0] = " add x15, x15, x26";
60 | unrolledAdds1[1] = " add x14, x14, x26";
61 | unrolledAdds1[2] = " add x13, x13, x26";
62 | unrolledAdds1[3] = " add x12, x12, x26";
63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixPdepLeaSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class PdepLeaSchedTest : UarchTest
6 | {
7 | public PdepLeaSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixpdepleasched";
11 | this.Description = "Mixed PDEP/LEA Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] instrs1 = new string[4];
20 | instrs1[0] = " pdep %rdi, %r14, %r15";
21 | instrs1[1] = " lea (%rdx,%rdi,8), %r13";
22 | instrs1[2] = " pdep %rdi, %r12, %r15";
23 | instrs1[3] = " lea (%rdx,%rdi,8), %r11";
24 |
25 | string[] instrs2 = new string[4];
26 | instrs2[0] = " pdep %rsi, %r14, %r15";
27 | instrs2[1] = " lea (%rdx,%rsi,8), %r13";
28 | instrs2[2] = " pdep %rsi, %r12, %r15";
29 | instrs2[3] = " lea (%rdx,%rsi,8), %r11";
30 |
31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false);
32 | }
33 |
34 | public override void GenerateX86NasmAsm(StringBuilder sb)
35 | {
36 | // todo
37 | string[] instrs = new string[4];
38 | instrs[0] = " pdep r15, rdi, r14";
39 | instrs[1] = " pdep r15, rdi, r13";
40 | instrs[2] = " pdep r15, rdi, r12";
41 | instrs[3] = " pdep r15, rdi, r11";
42 |
43 | string[] instrs1 = new string[4];
44 | instrs1[0] = " pdep r15, rsi, r14";
45 | instrs1[1] = " pdep r15, rsi, r13";
46 | instrs1[2] = " pdep r15, rsi, r12";
47 | instrs1[3] = " pdep r15, rsi, r11";
48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false);
49 | }
50 |
51 | public override void GenerateArmAsm(StringBuilder sb)
52 | {
53 | string[] unrolledAdds = new string[4];
54 | unrolledAdds[0] = " add x15, x15, x25";
55 | unrolledAdds[1] = " add x14, x14, x25";
56 | unrolledAdds[2] = " add x13, x13, x25";
57 | unrolledAdds[3] = " add x12, x12, x25";
58 |
59 | string[] unrolledAdds1 = new string[4];
60 | unrolledAdds1[0] = " add x15, x15, x26";
61 | unrolledAdds1[1] = " add x14, x14, x26";
62 | unrolledAdds1[2] = " add x13, x13, x26";
63 | unrolledAdds1[3] = " add x12, x12, x26";
64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/AsmGen/tests/LeaSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class LeaSchedTest : UarchTest
6 | {
7 | public LeaSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "leasched";
11 | this.Description = "lea [r+r*8] Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] instrs1 = new string[4];
20 | instrs1[0] = " lea (%rdx,%rdi,8), %r15";
21 | instrs1[1] = " lea (%rdx,%rdi,8), %r14";
22 | instrs1[2] = " lea (%rdx,%rdi,8), %r13";
23 | instrs1[3] = " lea (%rdx,%rdi,8), %r12";
24 |
25 | string[] instrs2 = new string[4];
26 | instrs2[0] = " lea (%rdx,%rsi,8), %r15";
27 | instrs2[1] = " lea (%rdx,%rsi,8), %r14";
28 | instrs2[2] = " lea (%rdx,%rsi,8), %r13";
29 | instrs2[3] = " lea (%rdx,%rsi,8), %r12";
30 |
31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false);
32 | }
33 |
34 | public override void GenerateX86NasmAsm(StringBuilder sb)
35 | {
36 | // todo im tired
37 | string[] unrolledAdds = new string[4];
38 | unrolledAdds[0] = " add r15, rdi";
39 | unrolledAdds[1] = " add r14, rdi";
40 | unrolledAdds[2] = " add r13, rdi";
41 | unrolledAdds[3] = " add r12, rdi";
42 |
43 | string[] unrolledAdds1 = new string[4];
44 | unrolledAdds1[0] = " add r15, rsi";
45 | unrolledAdds1[1] = " add r14, rsi";
46 | unrolledAdds1[2] = " add r13, rsi";
47 | unrolledAdds1[3] = " add r12, rsi";
48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
49 | }
50 |
51 | public override void GenerateArmAsm(StringBuilder sb)
52 | {
53 | string[] unrolledAdds = new string[4];
54 | unrolledAdds[0] = " add x15, x15, x25";
55 | unrolledAdds[1] = " add x14, x14, x25";
56 | unrolledAdds[2] = " add x13, x13, x25";
57 | unrolledAdds[3] = " add x12, x12, x25";
58 |
59 | string[] unrolledAdds1 = new string[4];
60 | unrolledAdds1[0] = " add x15, x15, x26";
61 | unrolledAdds1[1] = " add x14, x14, x26";
62 | unrolledAdds1[2] = " add x13, x13, x26";
63 | unrolledAdds1[3] = " add x12, x12, x26";
64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/AsmGen/tests/FpRfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class FpRfTest : UarchTest
6 | {
7 | public FpRfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "fprf";
11 | this.Description = "FP (64-bit scalar) RF Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string initInstrs = " movss (%r8), %xmm1\n" +
20 | " movss 4(%r8), %xmm2\n" +
21 | " movss 8(%r8), %xmm3\n" +
22 | " movss 12(%r8), %xmm4\n" +
23 | " movss 16(%r8), %xmm5\n";
24 |
25 | string[] unrolledAdds = new string[4];
26 | unrolledAdds[0] = " addss %xmm1, %xmm2";
27 | unrolledAdds[1] = " addss %xmm1, %xmm3";
28 | unrolledAdds[2] = " addss %xmm1, %xmm4";
29 | unrolledAdds[3] = " addss %xmm1, %xmm5";
30 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
31 | }
32 |
33 | public override void GenerateX86NasmAsm(StringBuilder sb)
34 | {
35 | string initInstrs = " movss xmm1, [r8]\n" +
36 | " movss xmm2, [r8 + 4]\n" +
37 | " movss xmm3, [r8 + 8]\n" +
38 | " movss xmm4, [r8 + 12]\n" +
39 | " movss xmm5, [r8 + 16]\n";
40 |
41 | string[] unrolledAdds = new string[4];
42 | unrolledAdds[0] = " addss xmm2, xmm1";
43 | unrolledAdds[1] = " addss xmm3, xmm1";
44 | unrolledAdds[2] = " addss xmm4, xmm1";
45 | unrolledAdds[3] = " addss xmm5, xmm1";
46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
47 | }
48 |
49 | public override void GenerateArmAsm(StringBuilder sb)
50 | {
51 | string initInstrs = " ldr s17, [x2]\n" +
52 | " ldr s18, [x2, 4]\n" +
53 | " ldr s19, [x2, 8]\n" +
54 | " ldr s20, [x2, 12]\n" +
55 | " ldr s21, [x2, 16]\n";
56 |
57 | string[] unrolledAdds = new string[4];
58 | unrolledAdds[0] = " fadd s18, s18, s17";
59 | unrolledAdds[1] = " fadd s19, s19, s17";
60 | unrolledAdds[2] = " fadd s20, s20, s17";
61 | unrolledAdds[3] = " fadd s21, s21, s17";
62 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/MemoryLatency/MemoryLatency_i686.s:
--------------------------------------------------------------------------------
1 | .text
2 |
3 | .global @latencytest@8
4 | .global @preplatencyarr@8
5 | .global @stlftest@8
6 | .global @matchedstlftest@8
7 | .global latencytest
8 | .global preplatencyarr
9 | .global stlftest
10 | .global matchedstlftest
11 |
12 | /* fastcall specified in source file, so
13 | ecx = ptr to arr
14 | edx = arr len
15 | convert values in array from array indexes to pointers
16 | there has to be a way to make C do this but high level
17 | programming languages suck and make simple things harder than they should be
18 | */
19 | preplatencyarr:
20 | @preplatencyarr@8:
21 | push %eax
22 | push %esi
23 | xor %esi, %esi /* esi = array index */
24 | preplatencyarr_loop:
25 | mov (%ecx,%esi,4), %eax /* load target array index into eax */
26 | lea (%ecx,%eax,4), %eax /* calculate target address -> eax */
27 | mov %eax, (%ecx,%esi,4) /* replace array index with target address */
28 | inc %esi
29 | cmp %esi, %edx
30 | jne preplatencyarr_loop
31 | pop %esi
32 | pop %eax
33 | ret
34 |
35 | /* ecx = iterations
36 | edx = ptr to arr
37 | do pointer chasing for specified iteration count
38 | */
39 | latencytest:
40 | @latencytest@8:
41 | push %esi
42 | mov (%edx), %esi
43 | xor %eax, %eax
44 | latencytest_loop:
45 | mov (%esi), %esi
46 | add %esi, %eax
47 | dec %ecx
48 | jnz latencytest_loop
49 | pop %esi
50 | ret
51 |
52 | /* ecx = iterations
53 | edx = ptr to array. first two 32-bit ints in array are store and load offsets respectively
54 | mismatch load and store sizes by using 16-bit loads and 32-bit stores
55 | */
56 | stlftest:
57 | @stlftest@8:
58 | push %esi
59 | push %edi
60 | mov (%edx), %eax /* just get some value into rax (store value */
61 | mov (%edx), %esi
62 | mov 4(%edx), %edi
63 | add %edx, %esi /* esi = store ptr */
64 | add %edx, %edi /* edi = load ptr */
65 | stlftest_loop:
66 | mov %eax, (%esi) /* 32-bit store */
67 | mov (%edi), %ax /* 16-bit load that possibly gets forwarded result */
68 | mov %eax, (%esi)
69 | mov (%edi), %ax
70 | mov %eax, (%esi)
71 | mov (%edi), %ax
72 | mov %eax, (%esi)
73 | mov (%edi), %ax
74 | mov %eax, (%esi)
75 | mov (%edi), %ax
76 | sub $5, %ecx
77 | jg stlftest_loop
78 | pop %edi
79 | pop %esi
80 | ret
81 |
82 | matchedstlftest:
83 | @matchedstlftest@8:
84 | push %esi
85 | push %edi
86 | mov (%edx), %eax /* just get some value into rax (store value */
87 | mov (%edx), %esi
88 | mov 4(%edx), %edi
89 | add %edx, %esi /* esi = store ptr */
90 | add %edx, %edi /* edi = load ptr */
91 | matchedstlftest_loop:
92 | mov %eax, (%esi)
93 | mov (%edi), %eax
94 | mov %eax, (%esi)
95 | mov (%edi), %eax
96 | mov %eax, (%esi)
97 | mov (%edi), %eax
98 | mov %eax, (%esi)
99 | mov (%edi), %eax
100 | mov %eax, (%esi)
101 | mov (%edi), %eax
102 | sub $5, %ecx
103 | jg matchedstlftest_loop
104 | pop %edi
105 | pop %esi
106 | ret
107 |
--------------------------------------------------------------------------------
/AsmGen/tests/AddSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class AddSchedTest : UarchTest
6 | {
7 | public AddSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "addsched";
11 | this.Description = "Integer (add) Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledAdds = new string[4];
20 | unrolledAdds[0] = " add %rdi, %r15";
21 | unrolledAdds[1] = " add %rdi, %r14";
22 | unrolledAdds[2] = " add %rdi, %r13";
23 | unrolledAdds[3] = " add %rdi, %r12";
24 |
25 | string[] unrolledAdds1 = new string[4];
26 | unrolledAdds1[0] = " add %rsi, %r15";
27 | unrolledAdds1[1] = " add %rsi, %r14";
28 | unrolledAdds1[2] = " add %rsi, %r13";
29 | unrolledAdds1[3] = " add %rsi, %r12";
30 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
31 | }
32 |
33 | public override void GenerateX86NasmAsm(StringBuilder sb)
34 | {
35 | string[] unrolledAdds = new string[4];
36 | unrolledAdds[0] = " add r15, rdi";
37 | unrolledAdds[1] = " add r14, rdi";
38 | unrolledAdds[2] = " add r13, rdi";
39 | unrolledAdds[3] = " add r12, rdi";
40 |
41 | string[] unrolledAdds1 = new string[4];
42 | unrolledAdds1[0] = " add r15, rsi";
43 | unrolledAdds1[1] = " add r14, rsi";
44 | unrolledAdds1[2] = " add r13, rsi";
45 | unrolledAdds1[3] = " add r12, rsi";
46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
47 | }
48 |
49 | public override void GenerateArmAsm(StringBuilder sb)
50 | {
51 | string[] unrolledAdds = new string[4];
52 | unrolledAdds[0] = " add x15, x15, x25";
53 | unrolledAdds[1] = " add x14, x14, x25";
54 | unrolledAdds[2] = " add x13, x13, x25";
55 | unrolledAdds[3] = " add x12, x12, x25";
56 |
57 | string[] unrolledAdds1 = new string[4];
58 | unrolledAdds1[0] = " add x15, x15, x26";
59 | unrolledAdds1[1] = " add x14, x14, x26";
60 | unrolledAdds1[2] = " add x13, x13, x26";
61 | unrolledAdds1[3] = " add x12, x12, x26";
62 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/AsmGen/tests/MulSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MulSchedTest : UarchTest
6 | {
7 | public MulSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mulsched";
11 | this.Description = "Integer (64-bit mul) Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledMuls = new string[4];
20 | unrolledMuls[0] = " imul %rdi, %r15";
21 | unrolledMuls[1] = " imul %rdi, %r14";
22 | unrolledMuls[2] = " imul %rdi, %r13";
23 | unrolledMuls[3] = " imul %rdi, %r12";
24 |
25 | string[] unrolledMuls1 = new string[4];
26 | unrolledMuls1[0] = " imul %rsi, %r15";
27 | unrolledMuls1[1] = " imul %rsi, %r14";
28 | unrolledMuls1[2] = " imul %rsi, %r13";
29 | unrolledMuls1[3] = " imul %rsi, %r12";
30 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false);
31 | }
32 |
33 | public override void GenerateX86NasmAsm(StringBuilder sb)
34 | {
35 | string[] unrolledMuls = new string[4];
36 | unrolledMuls[0] = " imul r15, rdi";
37 | unrolledMuls[1] = " imul r14, rdi";
38 | unrolledMuls[2] = " imul r13, rdi";
39 | unrolledMuls[3] = " imul r12, rdi";
40 |
41 | string[] unrolledMuls1 = new string[4];
42 | unrolledMuls1[0] = " imul r15, rsi";
43 | unrolledMuls1[1] = " imul r14, rsi";
44 | unrolledMuls1[2] = " imul r13, rsi";
45 | unrolledMuls1[3] = " imul r12, rsi";
46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls, false);
47 | }
48 |
49 | public override void GenerateArmAsm(StringBuilder sb)
50 | {
51 | string[] unrolledAdds = new string[4];
52 | unrolledAdds[0] = " mul x10, x10, x25";
53 | unrolledAdds[1] = " mul x14, x14, x25";
54 | unrolledAdds[2] = " mul x13, x13, x25";
55 | unrolledAdds[3] = " mul x12, x12, x25";
56 |
57 | string[] unrolledAdds1 = new string[4];
58 | unrolledAdds1[0] = " mul x10, x10, x26";
59 | unrolledAdds1[1] = " mul x14, x14, x26";
60 | unrolledAdds1[2] = " mul x13, x13, x26";
61 | unrolledAdds1[3] = " mul x12, x12, x26";
62 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false);
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/MemoryBandwidth/README.md:
--------------------------------------------------------------------------------
1 | # Memory Bandwidth Benchmark
2 | This is a C and assembly project that tests memory bandwidth. There's a version in this directory for Linux that uses POSIX threads for multithreading. There's a Windows version in the MemoryBandwidth subdirectory that uses Windows threading APIs. The Windows version requires Visual Studio and nasm in the path to compile.
3 |
4 | To compile the linux version, do `make amd64` or `make aarch64`, depending on the target architecture
5 |
6 | # Example usage
7 |
8 | Testing single threaded bandwidth: `MemoryBandwidth.exe` or `./membw_amd64` or `./membw_aarch64`
9 |
10 | # General parameters
11 | `-threads` - How many threads to spawn. If you spawn more than one (i.e. with `-threads 4`) you might want to specify `-private` or `-shared`
12 |
13 | `-private` - A separate test array is allocated for each thread. Each thread will access its own block of data, with the total amount of test data equal to the test size. For example, with a test size of 16 KB and 4 threads, each thread is given a 4 KB array. With this mode, test results will reflect combined cache capacity. If you have four cores, each with a private 32 KB L1D, expect to see L1D bandwidth up to 4 * 32 KB = 128 KB. This is usually the best mode to use because memory bandwidth results won't be inflated by request combining.
14 |
15 | `-shared` - A single test array is accessed by all threads. For example, with 4 threads and a 16 KB test size, a single 16 KB array will be allocated and all four threads will hit it. Useful for seeing small shared caches, where the sum of private cache capacity is very close to (or exceeds) shared cache capacity. This mode often gives erroneously high memory bandwidth results because requests to the same cachelines from multiple cores may be combined. Of course using this mode with anything other than read-only access patterns is....stupid.
16 |
17 | `-method` - What test to run. Methods will vary depending on what platform you're targeting and what version (Windows or Linux) you're using. There's some naming inconsistency here that I have to clean up. Good luck. If you don't specify it, it should pick the best read-only test function to use on your system. But a few options:
18 | - `asm` (Linux only) - Uses a default read-only test function with a handwritten, unrolled assembly loop. On x86, AVX is used. NEON is used on aarch64.
19 | - `avx512` (Linux, x86-64 only) - Uses AVX-512 instructions
20 | - `write` (Linux) - Tests write bandwidth instead of read bandwidth. Will use AVX-512 if available
21 | - `copy` (Linux) - Copies one half of the array to the other
22 | - `scalar` - Plain C code that should work on any system. Only option available if you're on a weird (not x86 or aarch64) platform. Unsuitable for testing cache bandwidth because compilers are really really bad at autovectorization
23 | - `instr8`, `instr4` - Tests instruction-side bandwidth (as opposed to data side) by filling an array with NOPs and a return at the end, marking it executable, and calling it as if it were a function. On x86-64, `instr8` uses 8 byte NOPs, while `instr4` uses 4 byte NOPs.
24 |
--------------------------------------------------------------------------------
/AsmGen/tests/CvtSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class CvtSchedTest : UarchTest
6 | {
7 | public CvtSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "cvtsched";
11 | this.Description = "I2F (cvtsi2ss) Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | }
15 |
16 | public override void GenerateX86GccAsm(StringBuilder sb)
17 | {
18 | string[] unrolledInstrs = new string[4];
19 | unrolledInstrs[0] = " cvtsi2ss %rdi, %xmm1";
20 | unrolledInstrs[1] = " cvtsi2ss %rdi, %xmm2";
21 | unrolledInstrs[2] = " cvtsi2ss %rdi, %xmm3";
22 | unrolledInstrs[3] = " cvtsi2ss %rdi, %xmm4";
23 |
24 | string[] unrolledInstrs1 = new string[4];
25 | unrolledInstrs1[0] = " cvtsi2ss %rsi, %xmm1";
26 | unrolledInstrs1[1] = " cvtsi2ss %rsi, %xmm2";
27 | unrolledInstrs1[2] = " cvtsi2ss %rsi, %xmm3";
28 | unrolledInstrs1[3] = " cvtsi2ss %rsi, %xmm4";
29 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledInstrs, unrolledInstrs, false);
30 | }
31 |
32 | public override void GenerateX86NasmAsm(StringBuilder sb)
33 | {
34 | string[] unrolledInstrs = new string[4];
35 | unrolledInstrs[0] = " cvtsi2ss xmm1, rdi";
36 | unrolledInstrs[1] = " cvtsi2ss xmm2, rdi";
37 | unrolledInstrs[2] = " cvtsi2ss xmm3, rdi";
38 | unrolledInstrs[3] = " cvtsi2ss xmm4, rdi";
39 |
40 | string[] unrolledInstrs1 = new string[4];
41 | unrolledInstrs1[0] = " cvtsi2ss xmm1, rsi";
42 | unrolledInstrs1[1] = " cvtsi2ss xmm2, rsi";
43 | unrolledInstrs1[2] = " cvtsi2ss xmm3, rsi";
44 | unrolledInstrs1[3] = " cvtsi2ss xmm4, rsi";
45 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledInstrs, unrolledInstrs1, false);
46 | }
47 |
48 | public override void GenerateArmAsm(StringBuilder sb)
49 | {
50 | string[] unrolledInstrs = new string[4];
51 | unrolledInstrs[0] = " scvtf s0, w25";
52 | unrolledInstrs[1] = " scvtf s0, w25";
53 | unrolledInstrs[2] = " scvtf s0, w25";
54 | unrolledInstrs[3] = " scvtf s0, w25";
55 |
56 | string[] unrolledInstrs1 = new string[4];
57 | unrolledInstrs1[0] = " scvtf s0, w26";
58 | unrolledInstrs1[1] = " scvtf s0, w26";
59 | unrolledInstrs1[2] = " scvtf s0, w26";
60 | unrolledInstrs1[3] = " scvtf s0, w26";
61 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledInstrs, unrolledInstrs1, false);
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/AsmGen/tests/VecRfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class VecRfTest : UarchTest
6 | {
7 | public VecRfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "vec128rf";
11 | this.Description = "Vector (128-bit packed int) RF Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // it's ok, the ptr chasing arr should be way bigger than this
20 | string initInstrs = " movdqu (%rdx), %xmm1\n" +
21 | " movdqu 16(%rdx), %xmm2\n" +
22 | " movdqu 32(%rdx), %xmm3\n" +
23 | " movdqu 48(%rdx), %xmm4\n" +
24 | " movdqu 64(%rdx), %xmm5\n";
25 |
26 | string[] unrolledAdds = new string[4];
27 | unrolledAdds[0] = " paddq %xmm1, %xmm2";
28 | unrolledAdds[1] = " paddq %xmm1, %xmm3";
29 | unrolledAdds[2] = " paddq %xmm1, %xmm4";
30 | unrolledAdds[3] = " paddq %xmm1, %xmm5";
31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
32 | }
33 |
34 | public override void GenerateX86NasmAsm(StringBuilder sb)
35 | {
36 | string initInstrs = " movdqu xmm1, [rdx]\n" +
37 | " movdqu xmm2, [rdx + 16]\n" +
38 | " movdqu xmm3, [rdx + 32]\n" +
39 | " movdqu xmm4, [rdx + 48]\n" +
40 | " movdqu xmm5, [rdx + 64]\n";
41 |
42 | string[] unrolledAdds = new string[4];
43 | unrolledAdds[0] = " paddq xmm2, xmm1";
44 | unrolledAdds[1] = " paddq xmm3, xmm1";
45 | unrolledAdds[2] = " paddq xmm4, xmm1";
46 | unrolledAdds[3] = " paddq xmm5, xmm1";
47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
48 | }
49 |
50 | public override void GenerateArmAsm(StringBuilder sb)
51 | {
52 | string initInstrs = " ldr q0, [x1]\n" +
53 | " ldr q1, [x1, #0x10]\n" +
54 | " ldr q2, [x1, #0x20]\n" +
55 | " ldr q3, [x1, #0x30]\n" +
56 | " ldr q4, [x1, #0x40]\n";
57 |
58 | string[] unrolledAdds = new string[4];
59 | unrolledAdds[0] = " add v1.4s, v1.4s, v0.4s";
60 | unrolledAdds[1] = " add v2.4s, v2.4s, v0.4s";
61 | unrolledAdds[2] = " add v3.4s, v3.4s, v0.4s";
62 | unrolledAdds[3] = " add v4.4s, v4.4s, v0.4s";
63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/AsmGen/tests/Add512SchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Add512SchedTest : UarchTest
6 | {
7 | public Add512SchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "add512sched";
11 | this.Description = "512-bit Integer Add Scheduler Capacity Test (AVX-512 only)";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " movq %rdi, %xmm0\n vpbroadcastd %xmm0, %zmm0\n";
20 | string postLoadInstr2 = " movq %rsi, %xmm0\n vpbroadcastd %xmm0, %zmm0\n";
21 | // ymm0 is dependent on ptr chasing load
22 | string[] unrolledAdds = new string[4];
23 | unrolledAdds[0] = " vpaddd %zmm0, %zmm1, %zmm1";
24 | unrolledAdds[1] = " vpaddd %zmm0, %zmm2, %zmm2";
25 | unrolledAdds[2] = " vpaddd %zmm0, %zmm3, %zmm3";
26 | unrolledAdds[3] = " vpaddd %zmm0, %zmm4, %zmm3";
27 |
28 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
29 | }
30 |
31 | public override void GenerateX86NasmAsm(StringBuilder sb)
32 | {
33 | string postLoadInstr1 = " movq xmm0, rdi\n vpbroadcastd ymm0, xmm0\n";
34 | string postLoadInstr2 = " movq xmm0, rsi\n vpbroadcastd ymm0, xmm0\n";
35 |
36 | string[] unrolledAdds = new string[4];
37 | unrolledAdds[0] = " vpaddd ymm1, ymm1, ymm0";
38 | unrolledAdds[1] = " vpaddd ymm2, ymm2, ymm0";
39 | unrolledAdds[2] = " vpaddd ymm3, ymm3, ymm0";
40 | unrolledAdds[3] = " vpaddd ymm4, ymm4, ymm0";
41 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
42 | }
43 |
44 | public override void GenerateArmAsm(StringBuilder sb)
45 | {
46 | string initInstrs = " ldr q18, [x1]\n ldr q18, [x1]\n ldr q19, [x2]\n ldr q20, [x2]\n ldr q21, [x2]\n";
47 | string postLoadInstr1 = " mov v17.s[0], w25\n";
48 | string postLoadInstr2 = " mov v17.s[0], w26\n";
49 | string[] unrolledAdds = new string[4];
50 | unrolledAdds[0] = " add v18.4s, v18.4s, v17.4s";
51 | unrolledAdds[1] = " add v19.4s, v19.4s, v17.4s";
52 | unrolledAdds[2] = " add v20.4s, v20.4s, v17.4s";
53 | unrolledAdds[3] = " add v21.4s, v21.4s, v17.4s";
54 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs, postLoadInstr1, postLoadInstr2);
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/AsmGen/tests/MxcsrFeTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MxcsrFeTest : UarchTest
6 | {
7 | public MxcsrFeTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mxcsrfe";
11 | this.Description = "Abuse lack of MXCSR rename to measure frontend queue capacity";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | for (int i = 0; i < this.Counts.Length; i++)
20 | {
21 | string funcName = this.Prefix + this.Counts[i];
22 | sb.AppendLine("\n" + funcName + ":");
23 | sb.AppendLine(" mov $0x1f80, %r15");
24 | sb.AppendLine(" mov %r15, (%rsi)");
25 | sb.AppendLine(" mov $0x9fc0, %r15");
26 | sb.AppendLine(" mov %r15, 8(%rsi)");
27 | sb.AppendLine(funcName + "start:");
28 | for (int nopIdx = 0; nopIdx < this.Counts[i]; nopIdx++)
29 | {
30 | sb.AppendLine(" nop");
31 | }
32 | sb.AppendLine(" dec %rdi"); // iteration count
33 | sb.AppendLine(" jne " + funcName + "start");
34 | sb.AppendLine(" ret");
35 | }
36 | }
37 |
38 | public override void GenerateX86NasmAsm(StringBuilder sb)
39 | {
40 | // todo
41 | string[] setMxcsrInstrs = new string[2];
42 | setMxcsrInstrs[0] = " mov r15, 0x1f80\n mov [r8], r15\n ldmxcsr [r8]\n addss xmm0, xmm1"; // default
43 | setMxcsrInstrs[1] = " mov r15, 0x9fc0\n mov [r8], r15\n ldmxcsr [r8]\n addss xmm0, xmm1"; // set denormals are zero, flush to zero
44 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, setMxcsrInstrs, setMxcsrInstrs, false);
45 | }
46 |
47 | // todo
48 | public override void GenerateArmAsm(StringBuilder sb)
49 | {
50 | // read FPCR into x15, set x14 = flush denormals to zero enabled, x15 = flush denormals to zero disabled
51 | // x12 = mask with all bits set except bit 24 (flush to zero) - bitwise AND to unset bit 24
52 | // x13 = just bit 24 set with all other bits zero - bitwise OR to set bit 24
53 | string initInstrs = " mrs x15, fpcr\n mov x13, 1\n lsl x13, x13, 24\n neg x12, x13\n orr x14, x15, x13\n and x15, x15, x12";
54 | string[] setFpcrInstrs = new string[2];
55 | setFpcrInstrs[0] = " msr fpcr, x15\n fadd s2, s2, s3\n";
56 | setFpcrInstrs[1] = " msr fpcr, x14\n fadd s4, s4, s5\n";
57 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, setFpcrInstrs, setFpcrInstrs, false, initInstrs: initInstrs);
58 | }
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/GpuMemLatency/latency_test.c:
--------------------------------------------------------------------------------
1 | #include "opencltest.h"
2 |
3 | float latency_test(cl_context context,
4 | cl_command_queue command_queue,
5 | cl_kernel kernel,
6 | uint32_t list_size,
7 | uint32_t chase_iterations,
8 | short sattolo)
9 | {
10 | size_t global_item_size = 1, local_item_size = 1;
11 | cl_int ret;
12 | float latency;
13 | int64_t time_diff_ms;
14 | uint32_t result;
15 | uint32_t stride = 1211;
16 | uint32_t element_count = list_size / CACHELINE_SIZE;
17 | uint32_t increment = CACHELINE_SIZE / sizeof(uint32_t);
18 | uint32_t* A = (uint32_t*)malloc(sizeof(uint32_t) * list_size);
19 | if (sattolo) {
20 | FillPatternArr((uint32_t*)A, list_size, CACHELINE_SIZE);
21 | }
22 | else {
23 | for (int i = 0; i < list_size; i++)
24 | {
25 | A[i] = (i + stride) % list_size;
26 | }
27 | }
28 |
29 | // copy array to device
30 | cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, list_size * sizeof(uint32_t), NULL, &ret);
31 | clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, list_size * sizeof(uint32_t), A, 0, NULL, NULL);
32 |
33 | cl_mem result_obj = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(uint32_t), NULL, &ret);
34 | clEnqueueWriteBuffer(command_queue, result_obj, CL_TRUE, 0, sizeof(uint32_t), &result, 0, NULL, NULL);
35 | clFinish(command_queue);
36 |
37 | // Set kernel arguments
38 | ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&a_mem_obj);
39 | if (ret != CL_SUCCESS)
40 | {
41 | fprintf(stderr, "Failed to set list as kernel arg. clSetKernelArg returned %d\n", ret);
42 | latency = 0;
43 | goto cleanup;
44 | }
45 |
46 | ret = clSetKernelArg(kernel, 1, sizeof(cl_int), (void*)&chase_iterations);
47 | ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&result_obj);
48 |
49 | start_timing();
50 | // Execute the OpenCL kernel. launch a single thread
51 | ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
52 | if (ret != CL_SUCCESS)
53 | {
54 | fprintf(stderr, "Failed to submit kernel to command queue. clEnqueueNDRangeKernel returned %d\n", ret);
55 | latency = 0;
56 | goto cleanup;
57 | }
58 |
59 | ret = clFinish(command_queue); // returns success even when TDR happens?
60 | if (ret != CL_SUCCESS)
61 | {
62 | printf("Failed to finish command queue. clFinish returned %d\n", ret);
63 | latency = 0;
64 | goto cleanup;
65 | }
66 |
67 | time_diff_ms = end_timing();
68 | latency = 1e6 * (float)time_diff_ms / (float)chase_iterations;
69 |
70 | ret = clEnqueueReadBuffer(command_queue, result_obj, CL_TRUE, 0, sizeof(uint32_t), &result, 0, NULL, NULL);
71 | clFinish(command_queue);
72 |
73 | //fprintf(stderr, "Finished reading result. Sum: %d\n", result[0]);
74 |
75 | cleanup:
76 | clFlush(command_queue);
77 | clFinish(command_queue);
78 | clReleaseMemObject(a_mem_obj);
79 | clReleaseMemObject(result_obj);
80 | free(A);
81 | return latency;
82 | }
83 |
--------------------------------------------------------------------------------
/AsmGen/tests/Add256SchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Add256SchedTest : UarchTest
6 | {
7 | public Add256SchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "add256sched";
11 | this.Description = "256-bit Integer Add Scheduler Capacity Test (128-bit on ARM)";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " movq %rdi, %xmm0\n vpbroadcastd %xmm0, %ymm0\n";
20 | string postLoadInstr2 = " movq %rsi, %xmm0\n vpbroadcastd %xmm0, %ymm0\n";
21 | // ymm0 is dependent on ptr chasing load
22 | string[] unrolledAdds = new string[4];
23 | unrolledAdds[0] = " vpaddd %ymm0, %ymm1, %ymm1";
24 | unrolledAdds[1] = " vpaddd %ymm0, %ymm2, %ymm2";
25 | unrolledAdds[2] = " vpaddd %ymm0, %ymm3, %ymm3";
26 | unrolledAdds[3] = " vpaddd %ymm0, %ymm4, %ymm3";
27 |
28 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
29 | }
30 |
31 | public override void GenerateX86NasmAsm(StringBuilder sb)
32 | {
33 | string postLoadInstr1 = " movq xmm0, rdi\n vpbroadcastd ymm0, xmm0\n";
34 | string postLoadInstr2 = " movq xmm0, rsi\n vpbroadcastd ymm0, xmm0\n";
35 |
36 | string[] unrolledAdds = new string[4];
37 | unrolledAdds[0] = " vpaddd ymm1, ymm1, ymm0";
38 | unrolledAdds[1] = " vpaddd ymm2, ymm2, ymm0";
39 | unrolledAdds[2] = " vpaddd ymm3, ymm3, ymm0";
40 | unrolledAdds[3] = " vpaddd ymm4, ymm4, ymm0";
41 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
42 | }
43 |
44 | public override void GenerateArmAsm(StringBuilder sb)
45 | {
46 | string initInstrs = " ldr q18, [x1]\n ldr q18, [x1]\n ldr q19, [x2]\n ldr q20, [x2]\n ldr q21, [x2]\n";
47 | string postLoadInstr1 = " mov v17.s[0], w25\n";
48 | string postLoadInstr2 = " mov v17.s[0], w26\n";
49 | string[] unrolledAdds = new string[4];
50 | unrolledAdds[0] = " add v18.4s, v18.4s, v17.4s";
51 | unrolledAdds[1] = " add v19.4s, v19.4s, v17.4s";
52 | unrolledAdds[2] = " add v20.4s, v20.4s, v17.4s";
53 | unrolledAdds[3] = " add v21.4s, v21.4s, v17.4s";
54 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs, postLoadInstr1, postLoadInstr2);
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/AsmGen/tests/LdmTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class LdmTest : UarchTest
6 | {
7 | public LdmTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "ldm";
11 | this.Description = "Integer (add) without Load Dependency Matrix Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " add %rdi, %r11";
20 | string[] unrolledAdds = new string[4];
21 | unrolledAdds[0] = " add %r11, %r15";
22 | unrolledAdds[1] = " add %r11, %r14";
23 | unrolledAdds[2] = " add %r11, %r13";
24 | unrolledAdds[3] = " add %r11, %r12";
25 |
26 | string postLoadInstr2 = " add %rsi, %r11";
27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(
28 | sb,
29 | this.Counts,
30 | this.Prefix,
31 | unrolledAdds,
32 | unrolledAdds,
33 | false,
34 | postLoadInstrs1: postLoadInstr1,
35 | postLoadInstrs2: postLoadInstr2);
36 | }
37 |
38 | public override void GenerateX86NasmAsm(StringBuilder sb)
39 | {
40 | string postLoadInstr1 = " add r11, rdi";
41 | string[] unrolledAdds = new string[4];
42 | unrolledAdds[0] = " add r15, r11";
43 | unrolledAdds[1] = " add r14, r11";
44 | unrolledAdds[2] = " add r13, r11";
45 | unrolledAdds[3] = " add r12, r11";
46 |
47 | string postLoadInstr2 = " add r11, rsi";
48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(
49 | sb,
50 | this.Counts,
51 | this.Prefix,
52 | unrolledAdds,
53 | unrolledAdds,
54 | false,
55 | postLoadInstrs1: postLoadInstr1,
56 | postLoadInstrs2: postLoadInstr2);
57 | }
58 |
59 | public override void GenerateArmAsm(StringBuilder sb)
60 | {
61 | string postLoadInstr1 = " add x11, x11, x25";
62 | string postLoadInstr2 = " add x11, x11, x26";
63 | string[] unrolledAdds = new string[4];
64 | unrolledAdds[0] = " add x15, x15, x25";
65 | unrolledAdds[1] = " add x14, x14, x25";
66 | unrolledAdds[2] = " add x13, x13, x25";
67 | unrolledAdds[3] = " add x12, x12, x25";
68 |
69 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(
70 | sb,
71 | this.Counts,
72 | this.Prefix,
73 | unrolledAdds,
74 | unrolledAdds,
75 | false,
76 | postLoadInstrs1: postLoadInstr1,
77 | postLoadInstrs2: postLoadInstr2);
78 | }
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixIntFpRf12Test.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixIntFp12RfTest : UarchTest
6 | {
7 | public MixIntFp12RfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixintfp12rf";
11 | this.Description = "Mix of integer and FP register file, 1:2 ratio";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string initInstrs = " movss (%r8), %xmm1\n" +
20 | " movss 4(%r8), %xmm2\n" +
21 | " movss 8(%r8), %xmm3\n" +
22 | " movss 12(%r8), %xmm4\n" +
23 | " movss 16(%r8), %xmm5\n";
24 |
25 | string[] instrs = new string[6];
26 | instrs[0] = "add %r15, %r14";
27 | instrs[1] = "addss %xmm1, %xmm2";
28 | instrs[2] = "addss %xmm1, %xmm3";
29 | instrs[3] = "add %r15, %r12";
30 | instrs[4] = "addss %xmm1, %xmm4";
31 | instrs[5] = "addss %xmm1, %xmm5";
32 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs);
33 | }
34 |
35 | public override void GenerateX86NasmAsm(StringBuilder sb)
36 | {
37 | string initInstrs = " movss xmm1, [r8]\n" +
38 | " movss xmm2, [r8 + 4]\n" +
39 | " movss xmm3, [r8 + 8]\n" +
40 | " movss xmm4, [r8 + 12]\n" +
41 | " movss xmm5, [r8 + 16]\n";
42 |
43 | string[] instrs = new string[6];
44 | instrs[0] = "add r14, r15";
45 | instrs[1] = "addss xmm2, xmm1";
46 | instrs[2] = "addss xmm3, xmm1";
47 | instrs[3] = "add r12, r15";
48 | instrs[4] = "addss xmm4, xmm1";
49 | instrs[5] = "addss xmm5, xmm1";
50 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs);
51 | }
52 |
53 | public override void GenerateArmAsm(StringBuilder sb)
54 | {
55 | string initInstrs = " ldr s17, [x2]\n" +
56 | " ldr s18, [x2, 4]\n" +
57 | " ldr s19, [x2, 8]\n" +
58 | " ldr s20, [x2, 12]\n" +
59 | " ldr s21, [x2, 16]\n";
60 |
61 | string[] instrs = new string[6];
62 | instrs[0] = " add x15, x15, x11";
63 | instrs[1] = " fadd s18, s18, s17";
64 | instrs[2] = " fadd s19, s19, s17";
65 | instrs[3] = " add x13, x13, x11";
66 | instrs[4] = " fadd s20, s20, s17";
67 | instrs[5] = " fadd s21, s21, s17";
68 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs);
69 | }
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/AsmGen/tests/Add128SchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Add128SchedTest : UarchTest
6 | {
7 | public Add128SchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "add128sched";
11 | this.Description = "128-bit Integer Add Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " movq %rdi, %xmm0";
20 | string postLoadInstr2 = " movq %rsi, %xmm0";
21 | // xmm0 is dependent on ptr chasing load
22 | string[] unrolledAdds = new string[4];
23 | unrolledAdds[0] = " paddd %xmm0, %xmm1";
24 | unrolledAdds[1] = " paddd %xmm0, %xmm2";
25 | unrolledAdds[2] = " paddd %xmm0, %xmm3";
26 | unrolledAdds[3] = " paddd %xmm0, %xmm4";
27 |
28 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb,
29 | this.Counts,
30 | this.Prefix,
31 | unrolledAdds,
32 | unrolledAdds,
33 | includePtrChasingLoads: false,
34 | postLoadInstrs1: postLoadInstr1,
35 | postLoadInstrs2: postLoadInstr2);
36 | }
37 |
38 | public override void GenerateX86NasmAsm(StringBuilder sb)
39 | {
40 | string postLoadInstr1 = " movq xmm0, rdi\n pshufd xmm0, xmm0, 0\n";
41 | string postLoadInstr2 = " movq xmm0, rsi\n pshufd xmm0, xmm0, 0\n";
42 |
43 | string[] unrolledAdds = new string[4];
44 | unrolledAdds[0] = " paddd xmm1, xmm0";
45 | unrolledAdds[1] = " paddd xmm2, xmm0";
46 | unrolledAdds[2] = " paddd xmm3, xmm0";
47 | unrolledAdds[3] = " paddd xmm4, xmm0";
48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
49 | }
50 |
51 | public override void GenerateArmAsm(StringBuilder sb)
52 | {
53 | string initInstrs = " ldr q18, [x1]\n ldr q18, [x1]\n ldr q19, [x2]\n ldr q20, [x2]\n ldr q21, [x2]\n";
54 | string postLoadInstr1 = " mov v17.s[0], w25\n";
55 | string postLoadInstr2 = " mov v17.s[0], w26\n";
56 | string[] unrolledAdds = new string[4];
57 | unrolledAdds[0] = " add v18.4s, v18.4s, v17.4s";
58 | unrolledAdds[1] = " add v19.4s, v19.4s, v17.4s";
59 | unrolledAdds[2] = " add v20.4s, v20.4s, v17.4s";
60 | unrolledAdds[3] = " add v21.4s, v21.4s, v17.4s";
61 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs, postLoadInstr1, postLoadInstr2);
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/AsmGen/tests/Vec256RfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Vec256RfTest : UarchTest
6 | {
7 | public Vec256RfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "vec256rf";
11 | this.Description = "Vector (256-bit packed fp) RF Test - x86 only";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // it's ok, the ptr chasing arr should be way bigger than this
20 | string initInstrs = " vmovups (%r8), %ymm1\n" +
21 | " vmovups 32(%r8), %ymm2\n" +
22 | " vmovups 64(%r8), %ymm3\n" +
23 | " vmovups 96(%r8), %ymm4\n" +
24 | " vmovups 128(%r8), %ymm5\n";
25 |
26 | string[] unrolledAdds = new string[4];
27 | unrolledAdds[0] = " vaddps %ymm1, %ymm2, %ymm2";
28 | unrolledAdds[1] = " vaddps %ymm1, %ymm3, %ymm3";
29 | unrolledAdds[2] = " vaddps %ymm1, %ymm4, %ymm4";
30 | unrolledAdds[3] = " vaddps %ymm1, %ymm5, %ymm5";
31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
32 | }
33 |
34 | public override void GenerateX86NasmAsm(StringBuilder sb)
35 | {
36 | string initInstrs = " vmovups ymm1, [r8]\n" +
37 | " vmovups ymm2, [r8 + 32]\n" +
38 | " vmovups ymm3, [r8 + 64]\n" +
39 | " vmovups ymm4, [r8 + 96]\n" +
40 | " vmovups ymm5, [r8 + 128]\n";
41 |
42 | string[] unrolledAdds = new string[4];
43 | unrolledAdds[0] = " vaddps ymm2, ymm2, ymm1";
44 | unrolledAdds[1] = " vaddps ymm3, ymm3, ymm1";
45 | unrolledAdds[2] = " vaddps ymm4, ymm4, ymm1";
46 | unrolledAdds[3] = " vaddps ymm5, ymm5, ymm1";
47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
48 | }
49 |
50 | public override void GenerateArmAsm(StringBuilder sb)
51 | {
52 | string initInstrs = " ldr q0, [x1]\n" +
53 | " ldr q1, [x1, #0x10]\n" +
54 | " ldr q2, [x1, #0x20]\n" +
55 | " ldr q3, [x1, #0x30]\n" +
56 | " ldr q4, [x1, #0x40]\n";
57 |
58 | string[] unrolledAdds = new string[4];
59 | unrolledAdds[0] = " add v1.4s, v1.4s, v0.4s";
60 | unrolledAdds[1] = " add v2.4s, v2.4s, v0.4s";
61 | unrolledAdds[2] = " add v3.4s, v3.4s, v0.4s";
62 | unrolledAdds[3] = " add v4.4s, v4.4s, v0.4s";
63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/AsmGen/tests/Vec512RfTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Vec512RfTest : UarchTest
6 | {
7 | public Vec512RfTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "vec512rf";
11 | this.Description = "Vector (512-bit packed fp) RF Test - x86 only";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr";
13 | this.GetFunctionCallParameters = "structIterations, A, fpArr";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // it's ok, the ptr chasing arr should be way bigger than this
20 | string initInstrs = " vmovups (%r8), %zmm1\n" +
21 | " vmovups 64(%r8), %zmm2\n" +
22 | " vmovups 128(%r8), %zmm3\n" +
23 | " vmovups 192(%r8), %zmm4\n" +
24 | " vmovups 256(%r8), %zmm5\n";
25 |
26 | string[] unrolledAdds = new string[4];
27 | unrolledAdds[0] = " vaddps %zmm1, %zmm2, %zmm2";
28 | unrolledAdds[1] = " vaddps %zmm1, %zmm3, %zmm3";
29 | unrolledAdds[2] = " vaddps %zmm1, %zmm4, %zmm4";
30 | unrolledAdds[3] = " vaddps %zmm1, %zmm5, %zmm5";
31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
32 | }
33 |
34 | public override void GenerateX86NasmAsm(StringBuilder sb)
35 | {
36 | string initInstrs = " vmovups zmm1, [r8]\n" +
37 | " vmovups zmm2, [r8 + 64]\n" +
38 | " vmovups zmm3, [r8 + 128]\n" +
39 | " vmovups zmm4, [r8 + 192]\n" +
40 | " vmovups zmm5, [r8 + 256]\n";
41 |
42 | string[] unrolledAdds = new string[4];
43 | unrolledAdds[0] = " vaddps zmm2, zmm2, zmm1";
44 | unrolledAdds[1] = " vaddps zmm3, zmm3, zmm1";
45 | unrolledAdds[2] = " vaddps zmm4, zmm4, zmm1";
46 | unrolledAdds[3] = " vaddps zmm5, zmm5, zmm1";
47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
48 | }
49 |
50 | public override void GenerateArmAsm(StringBuilder sb)
51 | {
52 | string initInstrs = " ldr q0, [x1]\n" +
53 | " ldr q1, [x1, #0x10]\n" +
54 | " ldr q2, [x1, #0x20]\n" +
55 | " ldr q3, [x1, #0x30]\n" +
56 | " ldr q4, [x1, #0x40]\n";
57 |
58 | string[] unrolledAdds = new string[4];
59 | unrolledAdds[0] = " add v1.4s, v1.4s, v0.4s";
60 | unrolledAdds[1] = " add v2.4s, v2.4s, v0.4s";
61 | unrolledAdds[2] = " add v3.4s, v3.4s, v0.4s";
62 | unrolledAdds[3] = " add v4.4s, v4.4s, v0.4s";
63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs);
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixMulSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | // only applicable to Zhaoxin Lujiazui
6 | public class MixMulSchedTest : UarchTest
7 | {
8 | public MixMulSchedTest(int low, int high, int step)
9 | {
10 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
11 | this.Prefix = "mixmulschedtest";
12 | this.Description = "Mixed Integer (64/16-bit mul) Scheduler Capacity Test";
13 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
14 | this.GetFunctionCallParameters = "structIterations, A";
15 | this.DivideTimeByCount = false;
16 | }
17 |
18 | public override void GenerateX86GccAsm(StringBuilder sb)
19 | {
20 | string resetMulsInstr = "mov $1, %r15\n mov $1, %r13";
21 | string[] unrolledMuls = new string[4];
22 | unrolledMuls[0] = " imul %di, %r15w";
23 | unrolledMuls[1] = " imul %rdi, %r14";
24 | unrolledMuls[2] = " imul %di, %r13w";
25 | unrolledMuls[3] = " imul %rdi, %r12";
26 |
27 | string[] unrolledMuls1 = new string[4];
28 | unrolledMuls1[0] = " imul %si, %r15w";
29 | unrolledMuls1[1] = " imul %rsi, %r14";
30 | unrolledMuls1[2] = " imul %si, %r13w";
31 | unrolledMuls1[3] = " imul %rsi, %r12";
32 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false, postLoadInstrs1: resetMulsInstr, postLoadInstrs2: resetMulsInstr);
33 | }
34 |
35 | public override void GenerateX86NasmAsm(StringBuilder sb)
36 | {
37 | string[] unrolledMuls = new string[4];
38 | unrolledMuls[0] = " imul r15w, di";
39 | unrolledMuls[1] = " imul r14, rdi";
40 | unrolledMuls[2] = " imul r13w, di";
41 | unrolledMuls[3] = " imul r12, rdi";
42 |
43 | string[] unrolledMuls1 = new string[4];
44 | unrolledMuls1[0] = " imul r15w, si";
45 | unrolledMuls1[1] = " imul r14, rsi";
46 | unrolledMuls1[2] = " imul r13w, si";
47 | unrolledMuls1[3] = " imul r12, rsi";
48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls, false);
49 | }
50 |
51 | public override void GenerateArmAsm(StringBuilder sb)
52 | {
53 | string[] unrolledMuls = new string[4];
54 | unrolledMuls[0] = " mul w15, w15, w25";
55 | unrolledMuls[1] = " mul x14, x14, x25";
56 | unrolledMuls[2] = " mul w13, w13, w25";
57 | unrolledMuls[3] = " mul x12, x12, x25";
58 |
59 | string[] unrolledMuls1 = new string[4];
60 | unrolledMuls1[0] = " mul w15, w15, w26";
61 | unrolledMuls1[1] = " mul x14, x14, x26";
62 | unrolledMuls1[2] = " mul w13, w13, w26";
63 | unrolledMuls1[3] = " mul x12, x12, x26";
64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false);
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/AsmGen/tests/MixJumpMulSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class MixJmpMulSchedTest : UarchTest
6 | {
7 | public MixJmpMulSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mixmuljmpsched";
11 | this.Description = "Mixed integer multiply and not-taken Jump Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string[] unrolledJumps = new string[6];
20 | unrolledJumps[0] = " cmp %rdi, %rsi\n je muljmpsched_reallybadthing";
21 | unrolledJumps[1] = " imul %edi, %r12d";
22 | unrolledJumps[2] = " cmp %rdi, %rsi\n je muljmpsched_reallybadthing";
23 | unrolledJumps[3] = " imul %edi, %r13d";
24 | unrolledJumps[4] = " cmp %rdi, %rsi\n je muljmpsched_reallybadthing";
25 | unrolledJumps[5] = " imul %edi, %r14d";
26 |
27 | string[] unrolledJumps1 = new string[2];
28 | unrolledJumps1[0] = " cmp %rdi, %rsi\n je muljmpsched_reallybadthing";
29 | unrolledJumps1[1] = " imul %esi, %r11d";
30 |
31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps1, false);
32 |
33 | sb.AppendLine("muljmpsched_reallybadthing:");
34 | sb.AppendLine(" int3");
35 | }
36 |
37 | public override void GenerateX86NasmAsm(StringBuilder sb)
38 | {
39 | string[] unrolledJumps = new string[2];
40 | unrolledJumps[0] = " cmp rdi, rsi\n je muljmpsched_reallybadthing";
41 | unrolledJumps[0] = " imul r12d, edi";
42 |
43 | string[] unrolledJumps1 = new string[2];
44 | unrolledJumps1[0] = " cmp rdi, rsi\n je muljmpsched_reallybadthing";
45 | unrolledJumps1[0] = " imul r11d, esi";
46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps1, false);
47 |
48 | sb.AppendLine("muljmpsched_reallybadthing:");
49 | sb.AppendLine(" int3");
50 | }
51 |
52 | public override void GenerateArmAsm(StringBuilder sb)
53 | {
54 | string[] unrolledJumps = new string[2];
55 | unrolledJumps[0] = " cmp x25, x26\n b.eq muljmpsched_reallybadthing";
56 | unrolledJumps[1] = " mul x12, x12, x25";
57 |
58 | string[] unrolledJumps1 = new string[2];
59 | unrolledJumps1[0] = " cmp x25, x26\n b.eq muljmpsched_reallybadthing";
60 | unrolledJumps1[1] = " mul x14, x14, x26";
61 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false);
62 |
63 | sb.AppendLine("muljmpsched_reallybadthing:");
64 | sb.AppendLine(" .word 0xf7f0a000");
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/MemoryLatency/MemoryLatency_arm.s:
--------------------------------------------------------------------------------
1 | .text
2 |
3 | .global latencytest
4 | .global preplatencyarr
5 | .global stlftest
6 | .global stlftest32
7 | .global matchedstlftest
8 |
9 | /* x0 = ptr to arr
10 | x1 = arr len
11 | convert values in array from array indexes to pointers */
12 | preplatencyarr:
13 | sub sp, sp, #0x20
14 | stp x14, x15, [sp, #0x10]
15 | mov x15, 0
16 | preplatencyarr_loop:
17 | ldr x14, [x0, w15, uxtw #3]
18 | lsl x14, x14, 3
19 | add x14, x14, x0
20 | str x14, [x0, w15, uxtw #3]
21 | add w15, w15, 1
22 | cmp x15, x1
23 | b.ne preplatencyarr_loop
24 | ldp x14, x15, [sp, #0x10]
25 | add sp, sp, #0x20
26 | ret
27 |
28 | /* x0 = iteration count
29 | x1 = ptr to arr
30 | do pointer chasing for specified iteration count */
31 | latencytest:
32 | sub sp, sp, #0x20
33 | stp x14, x15, [sp, #0x10]
34 | mov x14, 0
35 | ldr x15, [x1]
36 | latencytest_loop:
37 | ldr x15, [x15]
38 | add x14, x14, x15
39 | sub x0, x0, 1
40 | cbnz x0, latencytest_loop
41 | mov x0, x14
42 | ldp x14, x15, [sp, #0x10]
43 | add sp, sp, #0x20
44 | ret
45 |
46 | /* x0 = iteration count
47 | x1 = ptr to arr. first 32-bit int = store offset, second = load offset */
48 | stlftest:
49 | sub sp, sp, #0x40
50 | stp x14, x15, [sp, #0x10]
51 | stp x12, x13, [sp, #0x20] /* x12 = store ptr, x13 = load ptr */
52 | ldr x15, [x1]
53 | ldr w12, [x1]
54 | ldr w13, [x1, 4]
55 | add x12, x12, x1
56 | add x13, x13, x1
57 | stlftest_loop:
58 | str x15, [x12]
59 | ldr w15, [x13]
60 | str x15, [x12]
61 | ldr w15, [x13]
62 | str x15, [x12]
63 | ldr w15, [x13]
64 | str x15, [x12]
65 | ldr w15, [x13]
66 | str x15, [x12]
67 | ldr w15, [x13]
68 | sub x0, x0, 5
69 | cmp x0, 0
70 | b.gt stlftest_loop
71 | ldp x12, x13, [sp, #0x10]
72 | ldp x14, x15, [sp, #0x10]
73 | add sp, sp, #0x40
74 | ret
75 |
76 | stlftest32:
77 | sub sp, sp, #0x40
78 | stp x14, x15, [sp, #0x10]
79 | stp x12, x13, [sp, #0x20] /* x12 = store ptr, x13 = load ptr */
80 | ldr x15, [x1]
81 | ldr w12, [x1]
82 | ldr w13, [x1, 4]
83 | add x12, x12, x1
84 | add x13, x13, x1
85 | stlftest32_loop:
86 | str w15, [x12]
87 | ldrh w15, [x13]
88 | str w15, [x12]
89 | ldrh w15, [x13]
90 | str w15, [x12]
91 | ldrh w15, [x13]
92 | str w15, [x12]
93 | ldrh w15, [x13]
94 | str w15, [x12]
95 | ldrh w15, [x13]
96 | sub x0, x0, 5
97 | cmp x0, 0
98 | b.gt stlftest32_loop
99 | ldp x12, x13, [sp, #0x10]
100 | ldp x14, x15, [sp, #0x10]
101 | add sp, sp, #0x40
102 | ret
103 |
104 | matchedstlftest:
105 | sub sp, sp, #0x40
106 | stp x14, x15, [sp, #0x10]
107 | stp x12, x13, [sp, #0x20] /* x12 = store ptr, x13 = load ptr */
108 | ldr x15, [x1]
109 | ldr w12, [x1]
110 | ldr w13, [x1, 4]
111 | add x12, x12, x1
112 | add x13, x13, x1
113 | matchedstlftest_loop:
114 | str x15, [x12]
115 | ldr x15, [x13]
116 | str x15, [x12]
117 | ldr x15, [x13]
118 | str x15, [x12]
119 | ldr x15, [x13]
120 | str x15, [x12]
121 | ldr x15, [x13]
122 | str x15, [x12]
123 | ldr x15, [x13]
124 | sub x0, x0, 5
125 | cmp x0, 0
126 | b.gt matchedstlftest_loop
127 | ldp x12, x13, [sp, #0x10]
128 | ldp x14, x15, [sp, #0x10]
129 | add sp, sp, #0x40
130 | ret
131 |
--------------------------------------------------------------------------------
/AsmGen/tests/VecStoreDataSchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class VecStoreDataSchedTest : UarchTest
6 | {
7 | public VecStoreDataSchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "vecstoredatasched";
11 | this.Description = "Store 128-bit Data Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1";
13 | this.GetFunctionCallParameters = "structIterations, A, B";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | string postLoadInstr1 = " movups (%rdx, %rdi, 2), %xmm1";
20 | string postLoadInstr2 = " movups (%rdx, %rsi, 2), %xmm1";
21 | string[] dependentStores = new string[4];
22 | dependentStores[0] = " movups %xmm1, (%r8)";
23 | dependentStores[1] = " movups %xmm1, (%r8, %r14, 8)";
24 | dependentStores[2] = " movups %xmm1, (%r8, %r13, 8)";
25 | dependentStores[3] = " movups %xmm1, (%r8, %r12, 8)";
26 |
27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, dependentStores, dependentStores, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2);
28 | }
29 |
30 | public override void GenerateX86NasmAsm(StringBuilder sb)
31 | {
32 | string initInstrs = " vpcmpeqd xmm1, xmm1, xmm1\n vpxor xmm0, xmm0, xmm1";
33 | string postLoadInstr1 = " cvtsi2ss xmm0, rdi";
34 | string postLoadInstr2 = " cvtsi2ss xmm0, rsi";
35 | string[] dependentStores = new string[4];
36 | dependentStores[0] = " movups [r8], xmm0";
37 | dependentStores[1] = " movups [r8 + r14 * 8], xmm0";
38 | dependentStores[2] = " movups [r8 + r13 * 8], xmm0";
39 | dependentStores[3] = " movups [r8 + r12 * 8], xmm0";
40 |
41 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, dependentStores, dependentStores, false, initInstrs, postLoadInstr1, postLoadInstr2);
42 | }
43 |
44 | public override void GenerateArmAsm(StringBuilder sb)
45 | {
46 | // todo
47 | string[] dependentStores = new string[4];
48 | dependentStores[0] = " str w15, [x2, w25, uxtw #2]";
49 | dependentStores[1] = " str w15, [x2, w25, uxtw #2]";
50 | dependentStores[2] = " str w15, [x2, w25, uxtw #2]";
51 | dependentStores[3] = " str w15, [x2, w25, uxtw #2]";
52 |
53 | string[] dependentStores1 = new string[4];
54 | dependentStores1[0] = " str w15, [x2, w26, uxtw #2]";
55 | dependentStores1[1] = " str w15, [x2, w26, uxtw #2]";
56 | dependentStores1[2] = " str w15, [x2, w26, uxtw #2]";
57 | dependentStores1[3] = " str w15, [x2, w26, uxtw #2]";
58 | UarchTestHelpers.GenerateArmAsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentStores, dependentStores1, false);
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/AsmGen/tests/Mul16SchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Mul16SchedTest : UarchTest
6 | {
7 | public Mul16SchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mul16sched";
11 | this.Description = "Integer (16-bit mul) Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // trying to unsuccessfully counter some weird behavior on zhaoxin
20 | string resetMulsInstr = "mov $11, %r15\n mov $13, %r14\n mov $15, %r13\n mov $17, %r12\n";
21 | string[] unrolledMuls = new string[4];
22 | unrolledMuls[0] = " imul %di, %r15w";
23 | unrolledMuls[1] = " imul %di, %r14w";
24 | unrolledMuls[2] = " imul %di, %r13w";
25 | unrolledMuls[3] = " imul %di, %r12w";
26 |
27 | string[] unrolledMuls1 = new string[4];
28 | unrolledMuls1[0] = " imul %si, %r15w";
29 | unrolledMuls1[1] = " imul %si, %r14w";
30 | unrolledMuls1[2] = " imul %si, %r13w";
31 | unrolledMuls1[3] = " imul %si, %r12w";
32 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false, postLoadInstrs1: resetMulsInstr, postLoadInstrs2: resetMulsInstr);
33 | }
34 |
35 | public override void GenerateX86NasmAsm(StringBuilder sb)
36 | {
37 | string[] unrolledMuls = new string[4];
38 | unrolledMuls[0] = " imul r15w, di";
39 | unrolledMuls[1] = " imul r14w, di";
40 | unrolledMuls[2] = " imul r13w, di";
41 | unrolledMuls[3] = " imul r12w, di";
42 |
43 | string[] unrolledMuls1 = new string[4];
44 | unrolledMuls1[0] = " imul r15w, si";
45 | unrolledMuls1[1] = " imul r14w, si";
46 | unrolledMuls1[2] = " imul r13w, si";
47 | unrolledMuls1[3] = " imul r12w, si";
48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls, false);
49 | }
50 |
51 | public override void GenerateArmAsm(StringBuilder sb)
52 | {
53 | string[] unrolledMuls = new string[4];
54 | unrolledMuls[0] = " mul w15, w15, w25";
55 | unrolledMuls[1] = " mul w14, w14, w25";
56 | unrolledMuls[2] = " mul w13, w13, w25";
57 | unrolledMuls[3] = " mul w12, w12, w25";
58 |
59 | string[] unrolledMuls1 = new string[4];
60 | unrolledMuls1[0] = " mul w15, w15, w26";
61 | unrolledMuls1[1] = " mul w14, w14, w26";
62 | unrolledMuls1[2] = " mul w13, w13, w26";
63 | unrolledMuls1[3] = " mul w12, w12, w26";
64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false);
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/AsmGen/tests/Add128SNsqTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Add128NsqTest : UarchTest
6 | {
7 | private int high;
8 | public Add128NsqTest(int low, int high, int step)
9 | {
10 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
11 | this.Prefix = "add128nsq";
12 | this.Description = "128-bit Integer Add Scheduler Capacity Test, excluding NSQ";
13 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2";
14 | this.GetFunctionCallParameters = "structIterations, A, B";
15 | this.DivideTimeByCount = false;
16 | this.high = high;
17 | }
18 |
19 | public override void GenerateX86GccAsm(StringBuilder sb)
20 | {
21 | string initInstrs = " pxor %xmm3, %xmm3\n pxor %xmm4, %xmm4\n movq %r15, %xmm5\n";
22 | string postLoadInstr = " movq %rdi, %xmm0\n";
23 | //string postLoadInstr2 = " movq %rsi, %xmm0\n pshufd $0, %xmm0, %xmm0\n";
24 | // xmm0 is dependent on ptr chasing load
25 | string[] depAdds = new string[2];
26 | depAdds[0] = " paddd %xmm0, %xmm1";
27 | depAdds[1] = " paddd %xmm0, %xmm2";
28 |
29 | string[] indepAdds = new string[2];
30 | indepAdds[0] = " paddd %xmm3, %xmm5";
31 | indepAdds[1] = " paddd %xmm4, %xmm5";
32 |
33 | UarchTestHelpers.GenerateX86AsmNsqTestFuncs(sb, this.high, this.Counts, this.Prefix, depAdds, indepAdds, false, initInstrs, postLoadInstr);
34 | }
35 |
36 | public override void GenerateX86NasmAsm(StringBuilder sb)
37 | {
38 | string initInstrs = " pxor xmm3, xmm3\n pxor xmm4, xmm4\n movq r15, xmm5\n";
39 | string postLoadInstr = " movq xmm0, rdi";
40 | //string postLoadInstr2 = " movq xmm0, rsi";
41 |
42 | string[] depAdds = new string[2];
43 | depAdds[0] = " paddd xmm1, xmm0";
44 | depAdds[1] = " paddd xmm2, xmm0";
45 |
46 | string[] indepAdds = new string[2];
47 | indepAdds[0] = " paddd xmm5, xmm3";
48 | indepAdds[1] = " paddd xmm6, xmm4";
49 | UarchTestHelpers.GenerateX86NasmNsqTestFuncs(sb, this.high, this.Counts, this.Prefix, depAdds, indepAdds, false, initInstrs, postLoadInstr);
50 | }
51 |
52 | public override void GenerateArmAsm(StringBuilder sb)
53 | {
54 | // todo
55 | string initInstrs = " ldr q18, [x1]\n ldr q18, [x1]\n ldr q19, [x2]\n ldr q20, [x2]\n ldr q21, [x2]\n";
56 | string postLoadInstr1 = " mov v17.s[0], w25\n";
57 | string postLoadInstr2 = " mov v17.s[0], w26\n";
58 | string[] unrolledAdds = new string[4];
59 | unrolledAdds[0] = " add v18.4s, v18.4s, v17.4s";
60 | unrolledAdds[1] = " add v19.4s, v19.4s, v17.4s";
61 | unrolledAdds[2] = " add v20.4s, v20.4s, v17.4s";
62 | unrolledAdds[3] = " add v21.4s, v21.4s, v17.4s";
63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs, postLoadInstr1, postLoadInstr2);
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/AsmGen/tests/Mul32SchedTest.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace AsmGen
4 | {
5 | public class Mul32SchedTest : UarchTest
6 | {
7 | public Mul32SchedTest(int low, int high, int step)
8 | {
9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step);
10 | this.Prefix = "mul32sched";
11 | this.Description = "Integer (32-bit mul) Scheduler Capacity Test";
12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr";
13 | this.GetFunctionCallParameters = "structIterations, A";
14 | this.DivideTimeByCount = false;
15 | }
16 |
17 | public override void GenerateX86GccAsm(StringBuilder sb)
18 | {
19 | // trying to unsuccessfully counter some weird behavior on zhaoxin
20 | string resetMulsInstr = "mov $11, %r15\n mov $13, %r14\n mov $15, %r13\n mov $17, %r12\n";
21 | string[] unrolledMuls = new string[4];
22 | unrolledMuls[0] = " imul %edi, %r15d";
23 | unrolledMuls[1] = " imul %edi, %r14d";
24 | unrolledMuls[2] = " imul %edi, %r13d";
25 | unrolledMuls[3] = " imul %edi, %r12d";
26 |
27 | string[] unrolledMuls1 = new string[4];
28 | unrolledMuls1[0] = " imul %esi, %r15d";
29 | unrolledMuls1[1] = " imul %esi, %r14d";
30 | unrolledMuls1[2] = " imul %esi, %r13d";
31 | unrolledMuls1[3] = " imul %esi, %r12d";
32 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false, postLoadInstrs1: resetMulsInstr, postLoadInstrs2: resetMulsInstr);
33 | }
34 |
35 | public override void GenerateX86NasmAsm(StringBuilder sb)
36 | {
37 | string[] unrolledMuls = new string[4];
38 | unrolledMuls[0] = " imul r15d, edi";
39 | unrolledMuls[1] = " imul r14d, edi";
40 | unrolledMuls[2] = " imul r13d, edi";
41 | unrolledMuls[3] = " imul r12d, edi";
42 |
43 | string[] unrolledMuls1 = new string[4];
44 | unrolledMuls1[0] = " imul r15d, esi";
45 | unrolledMuls1[1] = " imul r14d, esi";
46 | unrolledMuls1[2] = " imul r13d, esi";
47 | unrolledMuls1[3] = " imul r12d, esi";
48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls, false);
49 | }
50 |
51 | public override void GenerateArmAsm(StringBuilder sb)
52 | {
53 | string[] unrolledMuls = new string[4];
54 | unrolledMuls[0] = " mul w15, w15, w25";
55 | unrolledMuls[1] = " mul w14, w14, w25";
56 | unrolledMuls[2] = " mul w13, w13, w25";
57 | unrolledMuls[3] = " mul w12, w12, w25";
58 |
59 | string[] unrolledMuls1 = new string[4];
60 | unrolledMuls1[0] = " mul w15, w15, w26";
61 | unrolledMuls1[1] = " mul w14, w14, w26";
62 | unrolledMuls1[2] = " mul w13, w13, w26";
63 | unrolledMuls1[3] = " mul w12, w12, w26";
64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false);
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------