├── GpuMemLatency ├── OpenCL │ ├── lib │ │ └── OpenCL.lib │ ├── include │ │ └── CL │ │ │ ├── opencl.h │ │ │ └── cl_gl_ext.h │ └── README.md ├── Makefile ├── opencltest.sln ├── instruction_rate_fp16_kernel.cl ├── instruction_rate_fp64_kernel.cl ├── opencltest.h └── latency_test.c ├── AsmGen ├── Properties │ └── launchSettings.json ├── IUarchTestParallelBuild.cs ├── IUarchTest.cs ├── DataFiles │ ├── CommonFunctions.c │ ├── VsBranchHistFunction.c │ ├── BranchhistTestBlock.c │ ├── GccBranchHistFunction.c │ ├── IndirectBranchTestBlock.c │ ├── VsIndirectBranchFunction.c │ └── GccIndirectBranchFunction.c ├── AsmGen.sln ├── AsmGen.csproj ├── tests │ ├── RobTest.cs │ ├── FlagsRfTest.cs │ ├── LoadDivSchedTest.cs │ ├── LdqStqTest.cs │ ├── RorSchedTest.cs │ ├── IntRfTest.cs │ ├── JumpSchedTest.cs │ ├── NotIntRfTest.cs │ ├── MovImmIntRfTest.cs │ ├── LdqTest.cs │ ├── LoadDivNsqTest.cs │ ├── StqTest.cs │ ├── MixMaskIntRfTest.cs │ ├── MaskRfTest.cs │ ├── FaddSchedTest.cs │ ├── FmulSchedTest.cs │ ├── MixLoadStoreDivSchedTest.cs │ ├── MixFaddFmulSchedTest.cs │ ├── Fadd256SchedTest.cs │ ├── JumpNsqTest.cs │ ├── FaddNsqTest.cs │ ├── MixJumpAddSchedTest.cs │ ├── JumpAddSchedTest.cs │ ├── MixBtsMulSchedTest.cs │ ├── MixPdepMulSchedTest.cs │ ├── MixLeaMulSchedTest.cs │ ├── MixAddJump21SchedTest.cs │ ├── MxcsrTest.cs │ ├── MixRorBtsSchedTest.cs │ ├── MmxRfTest.cs │ ├── MixRorMulSchedTest.cs │ ├── NopLoopTest.cs │ ├── FaddIntAddSchedTest.cs │ ├── Add256RfTest.cs │ ├── MixIntFpRf13Test.cs │ ├── BtsSchedTest.cs │ ├── PdepSchedTest.cs │ ├── MixPdepLeaSchedTest.cs │ ├── LeaSchedTest.cs │ ├── FpRfTest.cs │ ├── AddSchedTest.cs │ ├── MulSchedTest.cs │ ├── CvtSchedTest.cs │ ├── VecRfTest.cs │ ├── Add512SchedTest.cs │ ├── MxcsrFeTest.cs │ ├── Add256SchedTest.cs │ ├── LdmTest.cs │ ├── MixIntFpRf12Test.cs │ ├── Add128SchedTest.cs │ ├── Vec256RfTest.cs │ ├── Vec512RfTest.cs │ ├── MixMulSchedTest.cs │ ├── MixJumpMulSchedTest.cs │ ├── VecStoreDataSchedTest.cs │ ├── Mul16SchedTest.cs │ ├── Add128SNsqTest.cs │ └── Mul32SchedTest.cs └── README.md ├── Makefile ├── MemoryBandwidth ├── Makefile ├── MemoryBandwidth │ ├── MemoryBandwidth.vcxproj.filters │ └── MemoryBandwidth.sln └── README.md ├── MemoryLatency ├── Makefile ├── MemoryLatencyFunctions.asm ├── MemoryLatency.sln ├── README.md ├── MemoryLatency_i686.s └── MemoryLatency_arm.s ├── CoreClockChecker ├── Makefile ├── CoreClockChecker_x86.s ├── BoostClockChecker_arm.s ├── BoostClockChecker.s └── BoostClockChecker.c ├── .github └── workflows │ └── linux.yaml ├── InstructionRate └── Makefile ├── README.md ├── clammicrobench ├── clammicrobench.vcxproj.filters └── clammicrobench.sln └── CoherencyLatency └── CoherencyLatency.sln /GpuMemLatency/OpenCL/lib/OpenCL.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChipsandCheese/Microbenchmarks/HEAD/GpuMemLatency/OpenCL/lib/OpenCL.lib -------------------------------------------------------------------------------- /AsmGen/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "AsmGen": { 4 | "commandName": "Project", 5 | "commandLineArgs": "autocopy" 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Folders to recursive make into, not everything has a Makefile 2 | folders := MemoryLatency MemoryBandwidth instructionrate CoreClockChecker GpuMemLatency 3 | 4 | all: $(folders) 5 | 6 | $(folders): .FORCE 7 | $(MAKE) -C $@ 8 | 9 | 10 | .FORCE: 11 | -------------------------------------------------------------------------------- /MemoryBandwidth/Makefile: -------------------------------------------------------------------------------- 1 | amd64: 2 | x86_64-linux-gnu-gcc -pthread -O3 MemoryBandwidth.c MemoryBandwidth_x86.s -o membw_amd64 -lm 3 | 4 | aarch64: 5 | aarch64-linux-gnu-gcc -pthread -O3 MemoryBandwidth.c MemoryBandwidth_arm.s -o membw_aarch64 -lm 6 | 7 | win64: 8 | x86_64-w64-mingw32-gcc-win32 -pthread -O3 MemoryBandwidth.c MemoryBandwidth_x86.s -o membw.exe -lm 9 | -------------------------------------------------------------------------------- /MemoryLatency/Makefile: -------------------------------------------------------------------------------- 1 | amd64: 2 | x86_64-linux-gnu-gcc -O3 MemoryLatency.c MemoryLatency_x86.s -o MemoryLatency -lm 3 | aarch64: 4 | aarch64-linux-gnu-gcc -O3 MemoryLatency.c MemoryLatency_arm.s -o MemoryLatency -lm 5 | win64: 6 | x86_64-w64-mingw32-gcc -O3 MemoryLatency.c MemoryLatency_x86.s -o MemoryLatency.exe -lm 7 | win32: 8 | i686-w64-mingw32-gcc -O3 MemoryLatency.c MemoryLatency_i686.s -o MemoryLatency32.exe -lm 9 | -------------------------------------------------------------------------------- /AsmGen/IUarchTestParallelBuild.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace AsmGen 4 | { 5 | public interface IUarchTestParallelBuild : IUarchTest 6 | { 7 | /// 8 | /// Generate and write out NASM files 9 | /// 10 | /// list of nasm filenames to include in build 11 | public List GenerateNasmFiles(); 12 | 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /CoreClockChecker/Makefile: -------------------------------------------------------------------------------- 1 | amd64: 2 | x86_64-linux-gnu-gcc -pthread -O3 CoreClockChecker.c CoreClockChecker_x86.s -o CoreClockChecker -lm 3 | win64: 4 | x86_64-w64-mingw32-gcc -pthread -O3 CoreClockChecker.c CoreClockChecker_x86.s -o CoreClockChecker -lm 5 | boostclockchecker: 6 | gcc -O3 BoostClockChecker.c BoostClockChecker.s -o BoostClockChecker 7 | win64boostclockchecker: 8 | x86_64-w64-mingw32-gcc BoostClockChecker.c BoostClockChecker.s -o BoostClockChecker.exe 9 | -------------------------------------------------------------------------------- /.github/workflows/linux.yaml: -------------------------------------------------------------------------------- 1 | name: Build benchmarks on Ubuntu 2 | on: [push, pull_request] 3 | jobs: 4 | BuildBenchmarks: 5 | # Only Ubuntu for now. 6 | runs-on: ubuntu-latest 7 | steps: 8 | - run: sudo apt-get update 9 | - run: sudo apt-get install -y build-essential ocl-icd-opencl-dev opencl-headers 10 | - name: Check out repository code 11 | uses: actions/checkout@v3 12 | - name: Try to build all benchmarks with a Makefile 13 | run: make -j4 all 14 | -------------------------------------------------------------------------------- /InstructionRate/Makefile: -------------------------------------------------------------------------------- 1 | x86instructionrate: x86instructionrate.s x86instructionrate.c x86instructionrate.h 2 | gcc -O3 x86instructionrate.s x86instructionrate.c x86instructionrate.h -o x86instructionrate 3 | arm_instructionrate: arminstructionrate.s arminstructionrate.c arminstructionrate.h 4 | gcc -O3 arminstructionrate.s arminstructionrate.c arminstructionrate.h -o arminstructionrate 5 | x86_instructionrate_win64: 6 | x86_64-w64-mingw32-gcc -O3 x86instructionrate.c x86instructionrate.s x86instructionrate.h -o x86instructionrate.exe 7 | -------------------------------------------------------------------------------- /GpuMemLatency/Makefile: -------------------------------------------------------------------------------- 1 | UNAME_S := $(shell uname -s) 2 | CC = gcc 3 | CFLAGS = -O3 4 | DEPS = ../common/timings.h 5 | OBJ = opencltest.o latency_test.o bw_test.o common.o atomic_test.o instruction_rate.o timing.o 6 | 7 | ifeq ($(UNAME_S),Darwin) 8 | LDFLAGS += -framework OpenCL 9 | else 10 | LDFLAGS += -lOpenCL 11 | endif 12 | 13 | opencltest: $(OBJ) 14 | gcc $(CFLAGS) $^ -o $@ -lm $(LDFLAGS) 15 | 16 | %.o: %.c $(DEPS) 17 | $(CC) $(CFLAGS) -c -o $@ $< 18 | 19 | timing.o: 20 | $(CC) $(CFLAGS) -c ../common/timing.c -o timing.o 21 | -------------------------------------------------------------------------------- /MemoryLatency/MemoryLatencyFunctions.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | bits 64 3 | 4 | global preplatencyarr 5 | global latencytest 6 | 7 | preplatencyarr: 8 | push r15 9 | push r14 10 | xor r15, r15 ; array index 11 | preplatencyarr_loop: 12 | mov r14, [rcx + r15 * 8] 13 | lea r14, [rcx + r14 * 8] 14 | mov [rcx + r15 * 8], r14 15 | inc r15 16 | cmp rdx, r15 17 | jne preplatencyarr_loop 18 | pop r14 19 | pop r15 20 | ret 21 | 22 | latencytest: 23 | push r15 24 | mov r15, [rdx] 25 | xor rax, rax 26 | latencytest_loop: 27 | mov r15, [r15] 28 | add rax, r15 29 | dec rcx 30 | jnz latencytest_loop 31 | pop r15 32 | ret 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Microbenchmarks 2 | Trying to figure various CPU things out 3 | 4 | Basically my playground to microbenchmark various CPU-related things like ROB/register file sizes, lock/cache coherency latency, and cache/memory performance. 5 | 6 | # Building Clammicrobench with Generated Code 7 | Get NASM (https://www.nasm.us/) and make sure it's in your path. Then things should build under Visual Studio 2019. 8 | 9 | Some microbenchmarks have the source code and assembly generated by C# code, to avoid crazy stuff like self modifying code. For clammicrobench, build/run the AsmGen project. Pass "autocopy" on the command line to have it automatically place generated ASM files for Visual Studio. Then, the clammicrobench project should build. 10 | -------------------------------------------------------------------------------- /CoreClockChecker/CoreClockChecker_x86.s: -------------------------------------------------------------------------------- 1 | .global clktest 2 | 3 | /* 4 | %rdi = arg0 = iteration count 5 | */ 6 | clktest: 7 | push %rbx 8 | push %r8 9 | push %r9 10 | mov $1, %r8 11 | mov $20, %r9 12 | xor %rbx, %rbx 13 | clktest_loop: 14 | add %r8, %rbx 15 | add %r8, %rbx 16 | add %r8, %rbx 17 | add %r8, %rbx 18 | add %r8, %rbx 19 | add %r8, %rbx 20 | add %r8, %rbx 21 | add %r8, %rbx 22 | add %r8, %rbx 23 | add %r8, %rbx 24 | add %r8, %rbx 25 | add %r8, %rbx 26 | add %r8, %rbx 27 | add %r8, %rbx 28 | add %r8, %rbx 29 | add %r8, %rbx 30 | add %r8, %rbx 31 | add %r8, %rbx 32 | add %r8, %rbx 33 | add %r8, %rbx 34 | sub %r9, %rdi 35 | jnz clktest_loop 36 | pop %r9 37 | pop %r8 38 | pop %rbx 39 | ret 40 | -------------------------------------------------------------------------------- /AsmGen/IUarchTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public interface IUarchTest 6 | { 7 | // enough to generate global lines, function calls, and let user pick from tests 8 | public string Prefix { get; } 9 | public string Description { get; } 10 | public bool DivideTimeByCount { get; } 11 | public void GenerateX86GccAsm(StringBuilder sb); 12 | public void GenerateX86NasmAsm(StringBuilder sb); 13 | public void GenerateArmAsm(StringBuilder sb); 14 | public void GenerateVsTestBlock(StringBuilder sb); 15 | public void GenerateTestBlock(StringBuilder sb); 16 | 17 | public void GenerateAsmGlobalLines(StringBuilder sb); 18 | public void GenerateNasmGlobalLines(StringBuilder sb); 19 | 20 | public void GenerateVsExternLines(StringBuilder sb); 21 | public void GenerateExternLines(StringBuilder sb); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /AsmGen/DataFiles/CommonFunctions.c: -------------------------------------------------------------------------------- 1 | // this is a partial C file that's appended into generated code 2 | // stuff here is generic enough to work for both windows/vs and gcc 3 | 4 | void printCsvHeader(uint32_t* xCounts, uint32_t xLen) { 5 | printf("x"); 6 | for (uint32_t testSizeIdx = 0; testSizeIdx < xLen; testSizeIdx++) { 7 | printf(", %d", xCounts[testSizeIdx]); 8 | } 9 | 10 | printf("\n"); 11 | } 12 | 13 | // print results in format that excel can take 14 | void printResultFloatArr(float* arr, uint32_t *xCounts, uint32_t xLen, uint32_t *yCounts, uint32_t yLen) { 15 | uint32_t testSizeCount = xLen; 16 | printCsvHeader(xCounts, xLen); 17 | for (uint32_t branchCountIdx = 0; branchCountIdx < yLen; branchCountIdx++) { 18 | // row header 19 | printf("%d", yCounts[branchCountIdx]); 20 | for (uint32_t testSizeIdx = 0; testSizeIdx < testSizeCount; testSizeIdx++) { 21 | printf(",%f", arr[branchCountIdx * testSizeCount + testSizeIdx]); 22 | } 23 | 24 | printf("\n"); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /CoreClockChecker/BoostClockChecker_arm.s: -------------------------------------------------------------------------------- 1 | .text 2 | .global clktsctest 3 | 4 | /* x0 = iterations, return elapsed TSC in x0 */ 5 | clktsctest: 6 | sub sp, sp, #0x40 7 | stp x10, x11, [sp, #0x10] 8 | stp x12, x13, [sp, #0x20] 9 | stp x14, x15, [sp, #0x30] 10 | mov x10, 1 11 | mov x11, 20 12 | mov x12, 0 13 | /* stackoverflow says this is a good idea */ 14 | mrs x14, cntvct_el0 15 | clktsctest_loop: 16 | add x12, x12, x10 17 | add x12, x12, x10 18 | add x12, x12, x10 19 | add x12, x12, x10 20 | add x12, x12, x10 21 | add x12, x12, x10 22 | add x12, x12, x10 23 | add x12, x12, x10 24 | add x12, x12, x10 25 | add x12, x12, x10 26 | add x12, x12, x10 27 | add x12, x12, x10 28 | add x12, x12, x10 29 | add x12, x12, x10 30 | add x12, x12, x10 31 | add x12, x12, x10 32 | add x12, x12, x10 33 | add x12, x12, x10 34 | add x12, x12, x10 35 | add x12, x12, x10 36 | sub x0, x0, x11 37 | cbnz x0, clktsctest_loop 38 | mrs x15, cntvct_el0 39 | sub x0, x15, x14 40 | ldp x14, x15, [sp, #0x30] 41 | ldp x12, x13, [sp, #0x20] 42 | ldp x10, x11, [sp, #0x10] 43 | add sp, sp, #0x40 44 | ret 45 | -------------------------------------------------------------------------------- /GpuMemLatency/OpenCL/include/CL/opencl.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2008-2020 The Khronos Group Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | 17 | #ifndef __OPENCL_H 18 | #define __OPENCL_H 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif /* __OPENCL_H */ 34 | -------------------------------------------------------------------------------- /AsmGen/AsmGen.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.2.32516.85 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AsmGen", "AsmGen.csproj", "{B8930E86-946C-4831-B088-F571E73EEDC4}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {B8930E86-946C-4831-B088-F571E73EEDC4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {B8930E86-946C-4831-B088-F571E73EEDC4}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {B8930E86-946C-4831-B088-F571E73EEDC4}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {B8930E86-946C-4831-B088-F571E73EEDC4}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {4433D029-CD62-44B9-862E-A8DE52DA45CE} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /CoreClockChecker/BoostClockChecker.s: -------------------------------------------------------------------------------- 1 | .global clktsctest 2 | 3 | /* rcx = iterations, return elapsed TSC in rax */ 4 | clktsctest: 5 | push %rdx 6 | push %rbx 7 | push %r8 8 | push %r9 9 | push %r10 10 | mov %rcx, %rdi 11 | mov $1, %r8 12 | mov $20, %r9 13 | xor %rbx, %rbx 14 | rdtsc /* high 32 bits in EDX, low 32 bits in EAX */ 15 | shl $32, %rdx /* shift high 32 bits into upper half of EDX */ 16 | add %rax, %rdx /* place full 64-bit value in rdx */ 17 | mov %rdx, %r10 18 | clktsctest_loop: 19 | add %r8, %rbx 20 | add %r8, %rbx 21 | add %r8, %rbx 22 | add %r8, %rbx 23 | add %r8, %rbx 24 | add %r8, %rbx 25 | add %r8, %rbx 26 | add %r8, %rbx 27 | add %r8, %rbx 28 | add %r8, %rbx 29 | add %r8, %rbx 30 | add %r8, %rbx 31 | add %r8, %rbx 32 | add %r8, %rbx 33 | add %r8, %rbx 34 | add %r8, %rbx 35 | add %r8, %rbx 36 | add %r8, %rbx 37 | add %r8, %rbx 38 | add %r8, %rbx 39 | sub %r9, %rdi 40 | jnz clktsctest_loop 41 | rdtsc 42 | shl $32, %rdx 43 | add %rdx, %rax /* now rax has the new value */ 44 | sub %r10, %rax /* subtract old TSC value from the new one, which should be larger */ 45 | pop %r10 46 | pop %r9 47 | pop %r8 48 | pop %rbx 49 | pop %rdx 50 | ret 51 | -------------------------------------------------------------------------------- /AsmGen/AsmGen.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | 7 | 8 | 9 | 10 | Always 11 | 12 | 13 | Always 14 | 15 | 16 | Always 17 | 18 | 19 | Always 20 | 21 | 22 | Always 23 | 24 | 25 | Always 26 | 27 | 28 | Always 29 | 30 | 31 | Always 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /AsmGen/tests/RobTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class RobTest : UarchTest 6 | { 7 | private string[] nops; 8 | 9 | public RobTest(int low, int high, int step) 10 | { 11 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 12 | this.Prefix = "rob"; 13 | this.Description = "Reorder Buffer Test"; 14 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 15 | this.GetFunctionCallParameters = "structIterations, A"; 16 | this.DivideTimeByCount = false; 17 | this.nops = new string[] { "nop" }; 18 | } 19 | 20 | public override void GenerateX86GccAsm(StringBuilder sb) 21 | { 22 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true); 23 | } 24 | 25 | public override void GenerateX86NasmAsm(StringBuilder sb) 26 | { 27 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true); 28 | } 29 | 30 | public override void GenerateArmAsm(StringBuilder sb) 31 | { 32 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /clammicrobench/clammicrobench.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | Source Files 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /AsmGen/DataFiles/VsBranchHistFunction.c: -------------------------------------------------------------------------------- 1 | // partial C file that gets appended into generated code 2 | 3 | float runBranchHistTest(uint32_t historyLen, uint32_t branchCountIdx, int random) { 4 | struct timeb start, end; 5 | uint32_t branchCount = branchCounts[branchCountIdx]; 6 | uint64_t iterations = 160000000 / branchCount; 7 | uint64_t(*branchtestFunc)(uint64_t, uint32_t**, uint32_t) = branchtestFuncArr[branchCountIdx]; 8 | 9 | uint32_t** testArrToArr = (uint32_t**)malloc(sizeof(uint32_t*) * branchCount); 10 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) { 11 | uint32_t* testArr = (uint32_t*)malloc(sizeof(uint32_t) * historyLen); 12 | for (uint32_t i = 0; i < historyLen; i++) testArr[i] = random ? (rand() & 0x400U != 0) : 0; 13 | testArrToArr[testArrIdx] = testArr; 14 | } 15 | 16 | ftime(&start); 17 | branchtestFunc(iterations, testArrToArr, historyLen); 18 | ftime(&end); 19 | uint64_t time_diff_ms = 1000 * (end.time - start.time) + (end.millitm - start.millitm); 20 | float latency = 1e6 * (float)time_diff_ms / (float)iterations; 21 | 22 | // give result in latency per branch 23 | latency = latency / branchCount; 24 | 25 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) free(testArrToArr[testArrIdx]); 26 | free(testArrToArr); 27 | return latency; 28 | } 29 | -------------------------------------------------------------------------------- /MemoryBandwidth/MemoryBandwidth/MemoryBandwidth.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | Source Files 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /GpuMemLatency/OpenCL/include/CL/cl_gl_ext.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2008-2020 The Khronos Group Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | 17 | #ifndef __OPENCL_CL_GL_EXT_H 18 | #define __OPENCL_CL_GL_EXT_H 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #include 25 | 26 | /* 27 | * cl_khr_gl_event extension 28 | */ 29 | #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D 30 | 31 | extern CL_API_ENTRY cl_event CL_API_CALL 32 | clCreateEventFromGLsyncKHR(cl_context context, 33 | cl_GLsync cl_GLsync, 34 | cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; 35 | 36 | #ifdef __cplusplus 37 | } 38 | #endif 39 | 40 | #endif /* __OPENCL_CL_GL_EXT_H */ 41 | -------------------------------------------------------------------------------- /GpuMemLatency/opencltest.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30503.244 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "opencltest", "opencltest.vcxproj", "{FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Debug|x64.ActiveCfg = Debug|x64 17 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Debug|x64.Build.0 = Debug|x64 18 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Debug|x86.ActiveCfg = Debug|Win32 19 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Debug|x86.Build.0 = Debug|Win32 20 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Release|x64.ActiveCfg = Release|x64 21 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Release|x64.Build.0 = Release|x64 22 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Release|x86.ActiveCfg = Release|Win32 23 | {FA51D7F4-F6E0-4CB5-9CDD-AD39A3519F78}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {4447E91D-E7A1-4249-87A7-E75A78167E71} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /AsmGen/DataFiles/BranchhistTestBlock.c: -------------------------------------------------------------------------------- 1 | uint32_t testSizeCount = sizeof(branchHistoryLengths) / sizeof(int); 2 | initializeBranchHistFuncArr(); 3 | srand(time(NULL)); 4 | 5 | size_t resultSize = sizeof(float) * maxBranchCount * testSizeCount; 6 | float* randomResults = (float*)malloc(resultSize); 7 | float* predictableResults = (float*)malloc(resultSize); 8 | for (uint32_t branchCountIdx = 0; branchCountIdx < maxBranchCount; branchCountIdx++) { 9 | for (uint32_t testSizeIdx = 0; testSizeIdx < testSizeCount; testSizeIdx++) { 10 | uint32_t testSize = branchHistoryLengths[testSizeIdx]; 11 | uint32_t branchCount = branchCounts[branchCountIdx]; 12 | printf("Testing branch count %d history length %d\n", branchCount, testSize); 13 | randomResults[branchCountIdx * testSizeCount + testSizeIdx] = runBranchHistTest(testSize, branchCountIdx, 1); 14 | predictableResults[branchCountIdx * testSizeCount + testSizeIdx] = runBranchHistTest(testSize, branchCountIdx, 0); 15 | printf("%d, %f, %f\n", testSize, 16 | randomResults[branchCountIdx * testSizeCount + testSizeIdx], 17 | predictableResults[branchCountIdx * testSizeCount + testSizeIdx]); 18 | } 19 | } 20 | 21 | printf("Random:\n"); 22 | printResultFloatArr(randomResults, branchHistoryLengths, testSizeCount, branchCounts, maxBranchCount); 23 | printf("\nPredictable:\n"); 24 | printResultFloatArr(predictableResults, branchHistoryLengths, testSizeCount, branchCounts, maxBranchCount); 25 | 26 | free(randomResults); 27 | free(predictableResults); 28 | -------------------------------------------------------------------------------- /AsmGen/tests/FlagsRfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class FlagRfTest : UarchTest 6 | { 7 | public FlagRfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "flagrf"; 11 | this.Description = "Flags register file capacity"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] nops = new string[1]; 20 | nops[0] = "test %r15, %r14"; 21 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true); 22 | } 23 | 24 | public override void GenerateX86NasmAsm(StringBuilder sb) 25 | { 26 | string[] nops = new string[1]; 27 | nops[0] = "test r15, r14"; 28 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true); 29 | } 30 | 31 | public override void GenerateArmAsm(StringBuilder sb) 32 | { 33 | string[] nops = new string[1]; 34 | nops[0] = "cmp x14, x15"; 35 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, nops, nops, true); 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /MemoryLatency/MemoryLatency.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.31229.75 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MemoryLatency", "MemoryLatency.vcxproj", "{3A98A230-A87B-432D-931D-369872DE24AF}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {3A98A230-A87B-432D-931D-369872DE24AF}.Debug|x64.ActiveCfg = Debug|x64 17 | {3A98A230-A87B-432D-931D-369872DE24AF}.Debug|x64.Build.0 = Debug|x64 18 | {3A98A230-A87B-432D-931D-369872DE24AF}.Debug|x86.ActiveCfg = Debug|Win32 19 | {3A98A230-A87B-432D-931D-369872DE24AF}.Debug|x86.Build.0 = Debug|Win32 20 | {3A98A230-A87B-432D-931D-369872DE24AF}.Release|x64.ActiveCfg = Release|x64 21 | {3A98A230-A87B-432D-931D-369872DE24AF}.Release|x64.Build.0 = Release|x64 22 | {3A98A230-A87B-432D-931D-369872DE24AF}.Release|x86.ActiveCfg = Release|Win32 23 | {3A98A230-A87B-432D-931D-369872DE24AF}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {F2D00DD2-A22B-4A3C-A2FF-9CE8CF9070D1} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /clammicrobench/clammicrobench.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.31410.357 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clammicrobench", "clammicrobench.vcxproj", "{7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Debug|x64.ActiveCfg = Debug|x64 17 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Debug|x64.Build.0 = Debug|x64 18 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Debug|x86.ActiveCfg = Debug|Win32 19 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Debug|x86.Build.0 = Debug|Win32 20 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Release|x64.ActiveCfg = Release|x64 21 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Release|x64.Build.0 = Release|x64 22 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Release|x86.ActiveCfg = Release|Win32 23 | {7E8CF2BA-57A7-4B42-B721-97E02BF9A8B8}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {A4441112-E760-4CF1-9A63-6BE0A3ACB1C6} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /CoherencyLatency/CoherencyLatency.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.31025.194 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CoherencyLatency", "CoherencyLatency.vcxproj", "{6D9CCC8C-09F5-484B-8630-BE18A9CF1995}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Debug|x64.ActiveCfg = Debug|x64 17 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Debug|x64.Build.0 = Debug|x64 18 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Debug|x86.ActiveCfg = Debug|Win32 19 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Debug|x86.Build.0 = Debug|Win32 20 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Release|x64.ActiveCfg = Release|x64 21 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Release|x64.Build.0 = Release|x64 22 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Release|x86.ActiveCfg = Release|Win32 23 | {6D9CCC8C-09F5-484B-8630-BE18A9CF1995}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {A6E60C3D-60ED-4DBF-B4AA-7C1C3A140325} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /MemoryLatency/README.md: -------------------------------------------------------------------------------- 1 | # Memory Latency Test 2 | 3 | This test measures random memory access latency within increasing array sizes, and (hopefully) shows the latency and size of caches as well as memory latency. Modes, passed as the first parameter: 4 | - (no parameter) - Uses plain C code and `current = A[current]` to measure latency 5 | - asm - Uses `mov r15, [r15]` for x86-64 or `ldr x15, [x15]`. This can help accurately measure L1D latency, because many x86 CPUs take an extra cycle to calculate "complex" addresses. And compilers like to do that for the plain C version above. This doesn't seem to make a difference for ARM 6 | - tlb - Accesses just one element per 4 KB region to measure virtual to physical address translation latency (so TLBs and page walkers). Cache latency is subtracted out to isolate address translation latency. 7 | 8 | # Building and Running 9 | 10 | Make sure optimization is on, or L1D latencies may be quite a bit higher than expected. 11 | 12 | ## Windows 13 | Under WSL, do `x86_64-w64-mingw32-gcc-win32 -O3 MemoryLatency.c MemoryLatency_x86.s -o MemoryLatency.exe` 14 | 15 | Run with 16 | `MemoryLatency.exe` 17 | `MemoryLatency.exe asm` 18 | `MemoryLatency.exe tlb` 19 | ## Linux, x86-64 20 | `gcc -O3 MemoryLatency.c MemoryLatency_x86.s -o MemoryLatency` 21 | 22 | ## Linux/Android+Termux, aarch64 23 | `gcc -O3 MemoryLatency.c MemoryLatency_arm.s -o MemoryLatency` 24 | 25 | ## VS version 26 | Open solution and build. But this will be removed in the near future because cross-compiling from WSL is sufficient to produce a Windows exe, since calling conventions are lined up. 27 | -------------------------------------------------------------------------------- /MemoryBandwidth/MemoryBandwidth/MemoryBandwidth.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.31410.357 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MemoryBandwidth", "MemoryBandwidth.vcxproj", "{E968D202-64A2-43A5-8BBD-D7D010D06564}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Debug|x64.ActiveCfg = Debug|x64 17 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Debug|x64.Build.0 = Debug|x64 18 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Debug|x86.ActiveCfg = Debug|Win32 19 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Debug|x86.Build.0 = Debug|Win32 20 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Release|x64.ActiveCfg = Release|x64 21 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Release|x64.Build.0 = Release|x64 22 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Release|x86.ActiveCfg = Release|Win32 23 | {E968D202-64A2-43A5-8BBD-D7D010D06564}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {2EA86D6F-CEE0-40A9-B4DD-AC59CCAD358F} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /GpuMemLatency/OpenCL/README.md: -------------------------------------------------------------------------------- 1 | # OpenCLTM API Headers 2 | 3 | This repository contains C language headers for the OpenCL API. 4 | 5 | The authoritative public repository for these headers is located at: 6 | 7 | https://github.com/KhronosGroup/OpenCL-Headers 8 | 9 | Issues, proposed fixes for issues, and other suggested changes should be 10 | created using Github. 11 | 12 | ## Branch Structure 13 | 14 | The OpenCL API headers in this repository are Unified headers and are designed 15 | to work with all released OpenCL versions. This differs from previous OpenCL 16 | API headers, where version-specific API headers either existed in separate 17 | branches, or in separate folders in a branch. 18 | 19 | ## Compiling for a Specific OpenCL Version 20 | 21 | By default, the OpenCL API headers in this repository are for the latest 22 | OpenCL version (currently OpenCL 2.2). To use these API headers to target 23 | a different OpenCL version, an application may `#define` the preprocessor 24 | value `CL_TARGET_OPENCL_VERSION` before including the OpenCL API headers. 25 | The `CL_TARGET_OPENCL_VERSION` is a three digit decimal value representing 26 | the OpenCL API version. 27 | 28 | For example, to enforce usage of no more than the OpenCL 1.2 APIs, you may 29 | include the OpenCL API headers as follows: 30 | 31 | ``` 32 | #define CL_TARGET_OPENCL_VERSION 120 33 | #include 34 | ``` 35 | 36 | ## Directory Structure 37 | 38 | ``` 39 | README.md This file 40 | LICENSE Source license for the OpenCL API headers 41 | CL/ Unified OpenCL API headers tree 42 | ``` 43 | 44 | ## License 45 | 46 | See [LICENSE](LICENSE). 47 | 48 | --- 49 | 50 | OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos. 51 | -------------------------------------------------------------------------------- /AsmGen/DataFiles/GccBranchHistFunction.c: -------------------------------------------------------------------------------- 1 | // this is a partial C file that's appended into generated code 2 | 3 | // Run a test, return the result in time (ns) per branch 4 | // historyLen: length of random array that the test loops through 5 | // branchCountIdx: index into array of branch counts, max determined by generated header/asm 6 | // random: if 1, randomize test array contents. If 0, fill with zeroes 7 | float runBranchHistTest(uint32_t historyLen, uint32_t branchCountIdx, int random) { 8 | struct timeval startTv, endTv; 9 | struct timezone startTz, endTz; 10 | uint32_t branchCount = branchCounts[branchCountIdx]; 11 | uint64_t iterations = 80000000 / branchCount; 12 | uint64_t(*branchtestFunc)(uint64_t, uint32_t**, uint32_t) __attribute((sysv_abi)) = branchtestFuncArr[branchCountIdx]; 13 | 14 | uint32_t** testArrToArr = (uint32_t**)malloc(sizeof(uint32_t*) * branchCount); 15 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) { 16 | uint32_t* testArr = (uint32_t*)malloc(sizeof(uint32_t) * historyLen); 17 | for (uint32_t i = 0; i < historyLen; i++) testArr[i] = random ? rand() % 2 : 0; 18 | testArrToArr[testArrIdx] = testArr; 19 | } 20 | 21 | gettimeofday(&startTv, &startTz); 22 | branchtestFunc(iterations, testArrToArr, historyLen); 23 | gettimeofday(&endTv, &endTz); 24 | uint64_t time_diff_ms = 1000 * (endTv.tv_sec - startTv.tv_sec) + ((endTv.tv_usec - startTv.tv_usec) / 1000); 25 | float latency = 1e6 * (float)time_diff_ms / (float)iterations; 26 | 27 | // give result in latency per branch 28 | latency = latency / branchCount; 29 | 30 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) free(testArrToArr[testArrIdx]); 31 | free(testArrToArr); 32 | return latency; 33 | } 34 | -------------------------------------------------------------------------------- /AsmGen/DataFiles/IndirectBranchTestBlock.c: -------------------------------------------------------------------------------- 1 | // generated code will have: 2 | // - indirectBranchTargetCounts = array containing # of targets per branch 3 | // - indirectBranchCounts = array containing # of branches to test 4 | // - maxIndirectBranchCount = length of ^^ 5 | // - initializeIndirectBranchFuncArr = populates 6 | 7 | uint32_t testSizeCount = sizeof(indirectBranchTargetCounts) / sizeof(int); 8 | initializeIndirectBranchFuncArr(); 9 | srand(time(NULL)); 10 | 11 | size_t resultSize = sizeof(float) * maxIndirectBranchCount * testSizeCount; 12 | float* results = (float*)malloc(resultSize); 13 | float* refResults = (float*)malloc(resultSize); 14 | for (uint32_t branchCountIdx = 0; branchCountIdx < maxIndirectBranchCount; branchCountIdx++) { 15 | for (uint32_t targetCountIdx = 0; targetCountIdx < testSizeCount; targetCountIdx++) { 16 | uint32_t testSize = indirectBranchTargetCounts[targetCountIdx]; 17 | uint32_t branchCount = indirectBranchCounts[branchCountIdx]; 18 | printf("Testing branch count %d target count %d:", branchCount, testSize); 19 | results[branchCountIdx * testSizeCount + targetCountIdx] = runIndirectBranchTest(branchCountIdx, targetCountIdx, 0); 20 | refResults[branchCountIdx * testSizeCount + targetCountIdx] = runIndirectBranchTest(branchCountIdx, targetCountIdx, 2); 21 | printf("%f ns, reference %f ns\n", 22 | results[branchCountIdx * testSizeCount + targetCountIdx], 23 | refResults[branchCountIdx * testSizeCount + targetCountIdx]); 24 | } 25 | } 26 | 27 | printf("Indirect branch results:\n"); 28 | printResultFloatArr(results, indirectBranchTargetCounts, testSizeCount, indirectBranchCounts, maxIndirectBranchCount); 29 | printf("Reference indirect branch results:\n"); 30 | printResultFloatArr(refResults, indirectBranchTargetCounts, testSizeCount, indirectBranchCounts, maxIndirectBranchCount); 31 | 32 | free(results); 33 | free(refResults); 34 | -------------------------------------------------------------------------------- /AsmGen/tests/LoadDivSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class LoadDivSchedTest : UarchTest 6 | { 7 | public LoadDivSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "loaddivsched"; 11 | this.Description = "Load Scheduler Capacity Test, using divs to block retirement"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int count, int *arr2"; 13 | this.GetFunctionCallParameters = "structIterations, list_size, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] dependentLoads = new string[1]; 20 | dependentLoads[0] = " mov (%r8, %rdx, 4), %r15"; 21 | 22 | UarchTestHelpers.GenerateX86AsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false); 23 | } 24 | 25 | public override void GenerateX86NasmAsm(StringBuilder sb) 26 | { 27 | string[] dependentLoads = new string[1]; 28 | dependentLoads[0] = " mov r15, [r8 + rdx * 4]"; 29 | 30 | UarchTestHelpers.GenerateX86NasmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false); 31 | } 32 | 33 | public override void GenerateArmAsm(StringBuilder sb) 34 | { 35 | string[] dependentLoads = new string[1]; 36 | dependentLoads[0] = " ldr w15, [x2, w25, uxtw #2]"; 37 | 38 | string[] dependentLoads1 = new string[1]; 39 | dependentLoads1[0] = " ldr w15, [x2, w26, uxtw #2]"; 40 | 41 | UarchTestHelpers.GenerateArmAsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads1, false); 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /GpuMemLatency/instruction_rate_fp16_kernel.cl: -------------------------------------------------------------------------------- 1 | #define rate_local_mem_test_size 256 2 | __kernel void fp16_add_rate_test(__global half8 *A, int count, __global half8 *ret) { 3 | int tid = get_local_id(0); 4 | int max_offset = get_local_size(0); 5 | __global half8 *local_a = A; 6 | 7 | int masked_tid = tid & (rate_local_mem_test_size - 1); 8 | half8 v0 = local_a[masked_tid]; 9 | half8 v1 = local_a[masked_tid + 1]; 10 | half8 v2 = local_a[masked_tid + 2]; 11 | half8 v3 = local_a[masked_tid + 3]; 12 | half8 v4 = v0 + v1; 13 | half8 v5 = v0 + v2; 14 | half8 v6 = v0 + v3; 15 | half8 v7 = v1 + v2; 16 | half8 acc = local_a[0]; 17 | 18 | for (int i = 0; i < count; i++) { 19 | v0 += acc; 20 | v1 += acc; 21 | v2 += acc; 22 | v3 += acc; 23 | v4 += acc; 24 | v5 += acc; 25 | v6 += acc; 26 | v7 += acc; 27 | } 28 | 29 | ret[get_global_id(0)] = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7; 30 | } 31 | 32 | __kernel void fp16_fma_rate_test(__global half8 *A, int count, __global half8 *ret) { 33 | int tid = get_local_id(0); 34 | int max_offset = get_local_size(0); 35 | __global half8 *local_a = A; 36 | 37 | int masked_tid = tid & (rate_local_mem_test_size - 1); 38 | half8 v0 = local_a[masked_tid]; 39 | half8 v1 = local_a[masked_tid + 1]; 40 | half8 v2 = local_a[masked_tid + 2]; 41 | half8 v3 = local_a[masked_tid + 3]; 42 | half8 v4 = v0 + v1; 43 | half8 v5 = v0 + v2; 44 | half8 v6 = v0 + v3; 45 | half8 v7 = v1 + v2; 46 | half8 acc = local_a[0]; 47 | 48 | for (int i = 0; i < count; i++) { 49 | v0 += acc * v0; 50 | v1 += acc * v1; 51 | v2 += acc * v2; 52 | v3 += acc * v3; 53 | v4 += acc * v4; 54 | v5 += acc * v5; 55 | v6 += acc * v6; 56 | v7 += acc * v7; 57 | } 58 | 59 | ret[get_global_id(0)] = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7; 60 | } 61 | -------------------------------------------------------------------------------- /AsmGen/tests/LdqStqTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class LdqStqTest : UarchTest 6 | { 7 | public LdqStqTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixldqstq"; 11 | this.Description = "Mixed Load/Store Queue Test (mem ops pending retire)"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | } 15 | 16 | public override void GenerateX86GccAsm(StringBuilder sb) 17 | { 18 | string[] instrs = new string[4]; 19 | instrs[0] = " mov %r15, (%r8)"; 20 | instrs[1] = " mov (%rdx), %r14"; 21 | instrs[2] = " mov %r13, (%r8)"; 22 | instrs[3] = " mov (%rdx), %r12"; 23 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true); 24 | } 25 | 26 | public override void GenerateX86NasmAsm(StringBuilder sb) 27 | { 28 | string[] instrs = new string[4]; 29 | instrs[0] = " mov [r8], r15"; 30 | instrs[1] = " mov r14, [rdx]"; 31 | instrs[2] = " mov [r8], r13"; 32 | instrs[3] = " mov r12, [rdx]"; 33 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true); 34 | } 35 | 36 | public override void GenerateArmAsm(StringBuilder sb) 37 | { 38 | string[] instrs = new string[4]; 39 | instrs[0] = " str x15, [x2]"; 40 | instrs[1] = " ldr x14, [x1]"; 41 | instrs[2] = " str x13, [x2]"; 42 | instrs[3] = " ldr x12, [x1]"; 43 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true); 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /GpuMemLatency/instruction_rate_fp64_kernel.cl: -------------------------------------------------------------------------------- 1 | #define rate_local_mem_test_size 256 2 | __kernel void fp64_add_rate_test(__global double2 *A, int count, __global double2 *ret) { 3 | int tid = get_local_id(0); 4 | int max_offset = get_local_size(0); 5 | __global double2 *local_a = A; 6 | 7 | int masked_tid = tid & (rate_local_mem_test_size - 1); 8 | double2 v0 = local_a[masked_tid]; 9 | double2 v1 = local_a[masked_tid + 1]; 10 | double2 v2 = local_a[masked_tid + 2]; 11 | double2 v3 = local_a[masked_tid + 3]; 12 | double2 v4 = v0 + v1; 13 | double2 v5 = v0 + v2; 14 | double2 v6 = v0 + v3; 15 | double2 v7 = v1 + v2; 16 | double2 acc = local_a[0]; 17 | 18 | for (int i = 0; i < count; i++) { 19 | v0 += acc; 20 | v1 += acc; 21 | v2 += acc; 22 | v3 += acc; 23 | v4 += acc; 24 | v5 += acc; 25 | v6 += acc; 26 | v7 += acc; 27 | } 28 | 29 | ret[get_global_id(0)] = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7; 30 | } 31 | 32 | __kernel void fp64_fma_rate_test(__global double2 *A, int count, __global double2 *ret) { 33 | int tid = get_local_id(0); 34 | int max_offset = get_local_size(0); 35 | __global double2 *local_a = A; 36 | 37 | int masked_tid = tid & (rate_local_mem_test_size - 1); 38 | double2 v0 = local_a[masked_tid]; 39 | double2 v1 = local_a[masked_tid + 1]; 40 | double2 v2 = local_a[masked_tid + 2]; 41 | double2 v3 = local_a[masked_tid + 3]; 42 | double2 v4 = v0 + v1; 43 | double2 v5 = v0 + v2; 44 | double2 v6 = v0 + v3; 45 | double2 v7 = v1 + v2; 46 | double2 acc = local_a[0]; 47 | 48 | for (int i = 0; i < count; i++) { 49 | v0 += acc * v0; 50 | v1 += acc * v1; 51 | v2 += acc * v2; 52 | v3 += acc * v3; 53 | v4 += acc * v4; 54 | v5 += acc * v5; 55 | v6 += acc * v6; 56 | v7 += acc * v7; 57 | } 58 | 59 | ret[get_global_id(0)] = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7; 60 | } 61 | -------------------------------------------------------------------------------- /AsmGen/README.md: -------------------------------------------------------------------------------- 1 | # Microbenchmark Generator 2 | C# project to generate C and assembly for CPU structure size benchmarks that use different code for each data point, making them 3 | impractical to write by hand. For more details on methodology for out-of-order structure size measurement, see https://blog.stuffedcow.net/2013/05/measuring-rob-capacity/ 4 | 5 | Branch predictor benchmarks are different: 6 | - BTB tests: Spams different numbers of unconditional jumps in a loop, spaced by different distances because branch predictors sometimes have trouble tracking branches that are too close together. 7 | - Branch history test: Generates branches that are taken or not taken in some random pattern, then increases the length of that pattern and the number of branches. Each branch is given its own pattern. This test thus tries to see how long of a pattern the branch predictor can track before getting a lot of mispredicts. 8 | - Indirect branch prediction test: Varies the number of branch targets and branches to see how many total targets the indirect branch predictor can track 9 | 10 | # Building 11 | 12 | Compile the project and run AsmGen.exe. That gives several output files. Compilation for Linux: 13 | `gcc clammicrobench.c clammicrobench_x86.s -o clammicrobench` for x86_64 14 | `gcc clammicrobench.c clammicrobench_arm.s -o clammicrobench` for aarch64 15 | `aarch64-linux-gnu-gcc clammicrobench.c clammicrobench_arm.s -o clammicrobench` to cross compile for aarch64 (for example from a fast desktop) 16 | 17 | For Windows, run `AsmGen.exe autocopy`. That copies generated files to the /clammicrobench directory, assuming it's run from the default VS output location. Then, open /clammicrobench/clammicrobench.sln and build. You need nasm in your path for that, as covered on README.md at repo root. 18 | 19 | The indirect branch test can take a while to build with nasm, so you might want to reduce the branch and target counts for that. Or just keep it commented out. 20 | 21 | # Running 22 | Generally, the syntax is `clammicrobench [test name] [list size for latency test] [iteration count]`. The last two parameters are optional. 23 | -------------------------------------------------------------------------------- /AsmGen/tests/RorSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class RorSchedTest : UarchTest 6 | { 7 | public RorSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "rorsched"; 11 | this.Description = "Rotate Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " mov %rdi, %r15"; 20 | string postLoadInstr2 = " mov %rsi, %r15"; 21 | string[] rors = new string[1]; 22 | rors[0] = " ror $1, %r15"; 23 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, rors, rors, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 24 | } 25 | 26 | public override void GenerateX86NasmAsm(StringBuilder sb) 27 | { 28 | string postLoadInstr1 = " mov r15, rdi"; 29 | string postLoadInstr2 = " mov r15, rsi"; 30 | string[] rors = new string[1]; 31 | rors[0] = " ror r15, 1"; 32 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, rors, rors, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 33 | } 34 | 35 | public override void GenerateArmAsm(StringBuilder sb) 36 | { 37 | string postLoadInstr1 = " mov x15, x25"; 38 | string postLoadInstr2 = " mov x15, x26"; 39 | string[] rors = new string[1]; 40 | rors[0] = " ror x15, x15, #1"; 41 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, rors, rors, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /AsmGen/tests/IntRfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class IntRfTest : UarchTest 6 | { 7 | public IntRfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "intrf"; 11 | this.Description = "Integer RF Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledAdds = new string[4]; 20 | unrolledAdds[0] = " add %r11, %r15"; 21 | unrolledAdds[1] = " add %r11, %r14"; 22 | unrolledAdds[2] = " add %r11, %r13"; 23 | unrolledAdds[3] = " add %r11, %r12"; 24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 25 | } 26 | 27 | public override void GenerateX86NasmAsm(StringBuilder sb) 28 | { 29 | string[] unrolledAdds = new string[4]; 30 | unrolledAdds[0] = " add r15, r11"; 31 | unrolledAdds[1] = " add r14, r11"; 32 | unrolledAdds[2] = " add r13, r11"; 33 | unrolledAdds[3] = " add r12, r11"; 34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 35 | } 36 | 37 | public override void GenerateArmAsm(StringBuilder sb) 38 | { 39 | string[] unrolledAdds = new string[4]; 40 | unrolledAdds[0] = " add x15, x15, x11"; 41 | unrolledAdds[1] = " add x14, x14, x11"; 42 | unrolledAdds[2] = " add x13, x13, x11"; 43 | unrolledAdds[3] = " add x12, x12, x11"; 44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /GpuMemLatency/opencltest.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef opencltestheader 4 | #define opencltestheader 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "../common/timing.h" 11 | 12 | #define false 0 13 | #define true 1 14 | 15 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS 16 | #ifndef __APPLE__ 17 | #include 18 | #else 19 | #include 20 | #endif 21 | #define MAX_SOURCE_SIZE (0x100000) 22 | 23 | #define CACHELINE_SIZE 64 24 | 25 | #ifndef _MSC_VER 26 | #define _strnicmp strncmp 27 | #endif 28 | extern cl_device_id selected_device_id; 29 | extern cl_platform_id selected_platform_id; 30 | extern cl_ulong max_global_test_size; 31 | cl_context get_context_from_user(int platform_index, int device_index); 32 | cl_program build_program(cl_context context, const char* fname); 33 | void FillPatternArr(uint32_t* pattern_arr, uint32_t list_size, uint32_t byte_increment); 34 | cl_uint getCuCount(); 35 | 36 | float int_atomic_latency_test(cl_context context, 37 | cl_command_queue command_queue, 38 | cl_kernel kernel, 39 | uint32_t iterations, 40 | short local); 41 | float latency_test(cl_context context, 42 | cl_command_queue command_queue, 43 | cl_kernel kernel, 44 | uint32_t list_size, 45 | uint32_t chase_iterations, 46 | short sattolo); 47 | float bw_test(cl_context context, 48 | cl_command_queue command_queue, 49 | cl_kernel kernel, 50 | uint64_t list_size, 51 | uint32_t thread_count, 52 | uint32_t local_size, 53 | uint32_t skip, 54 | uint32_t chase_iterations); 55 | void link_bw_test(cl_context context, 56 | cl_command_queue command_queue, 57 | cl_kernel kernel, 58 | uint32_t iterations); 59 | float c2c_atomic_latency_test(cl_context context, 60 | cl_command_queue command_queue, 61 | cl_kernel kernel, 62 | uint32_t iterations); 63 | 64 | float instruction_rate_test(cl_context context, 65 | cl_command_queue command_queue, 66 | uint32_t thread_count, 67 | uint32_t local_size, 68 | uint32_t chase_iterations); 69 | #endif 70 | -------------------------------------------------------------------------------- /AsmGen/tests/JumpSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class JumpSchedTest : UarchTest 6 | { 7 | public JumpSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "jmpsched"; 11 | this.Description = "Not-taken Jump Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledJumps = new string[1]; 20 | unrolledJumps[0] = $" cmp %rdi, %rsi\n je jumpsched_reallybadthing"; 21 | 22 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 23 | 24 | sb.AppendLine("jumpsched_reallybadthing:"); 25 | sb.AppendLine(" int3"); 26 | } 27 | 28 | public override void GenerateX86NasmAsm(StringBuilder sb) 29 | { 30 | string[] unrolledJumps = new string[1]; 31 | unrolledJumps[0] = " cmp rdi, rsi\n je jumpsched_reallybadthing"; 32 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 33 | 34 | sb.AppendLine("jumpsched_reallybadthing:"); 35 | sb.AppendLine(" int3"); 36 | } 37 | 38 | public override void GenerateArmAsm(StringBuilder sb) 39 | { 40 | string[] unrolledJumps = new string[1]; 41 | unrolledJumps[0] = " cmp x25, x26\n b.eq jumpsched_reallybadthing"; 42 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs( 43 | sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 44 | 45 | sb.AppendLine("jumpsched_reallybadthing:"); 46 | sb.AppendLine(" .word 0xf7f0a000"); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /AsmGen/tests/NotIntRfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class NotIntRfTest : UarchTest 6 | { 7 | public NotIntRfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "notintrf"; 11 | this.Description = "Integer RF Test with not (no setting flags)"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledAdds = new string[4]; 20 | unrolledAdds[0] = " not %r15"; 21 | unrolledAdds[1] = " not %r14"; 22 | unrolledAdds[2] = " not %r13"; 23 | unrolledAdds[3] = " not %r12"; 24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 25 | } 26 | 27 | public override void GenerateX86NasmAsm(StringBuilder sb) 28 | { 29 | string[] unrolledAdds = new string[4]; 30 | unrolledAdds[0] = " not r15"; 31 | unrolledAdds[1] = " not r14"; 32 | unrolledAdds[2] = " not r13"; 33 | unrolledAdds[3] = " not r12"; 34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 35 | } 36 | 37 | // todo 38 | public override void GenerateArmAsm(StringBuilder sb) 39 | { 40 | string[] unrolledAdds = new string[4]; 41 | unrolledAdds[0] = " add x15, x15, x11"; 42 | unrolledAdds[1] = " add x14, x14, x11"; 43 | unrolledAdds[2] = " add x13, x13, x11"; 44 | unrolledAdds[3] = " add x12, x12, x11"; 45 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /AsmGen/tests/MovImmIntRfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MovImmIntRfTest : UarchTest 6 | { 7 | public MovImmIntRfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "movimmintrf"; 11 | this.Description = "Integer RF Test (move immediate)"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledAdds = new string[4]; 20 | unrolledAdds[0] = " mov $1, %r15"; 21 | unrolledAdds[1] = " mov $2, %r14"; 22 | unrolledAdds[2] = " mov $3, %r13"; 23 | unrolledAdds[3] = " mov $4, %r12"; 24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 25 | } 26 | 27 | public override void GenerateX86NasmAsm(StringBuilder sb) 28 | { 29 | string[] unrolledAdds = new string[4]; 30 | unrolledAdds[0] = " mov r15, 1"; 31 | unrolledAdds[1] = " mov r14, 2"; 32 | unrolledAdds[2] = " mov r13, 3"; 33 | unrolledAdds[3] = " mov r12, 4"; 34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 35 | } 36 | 37 | public override void GenerateArmAsm(StringBuilder sb) 38 | { 39 | string[] unrolledAdds = new string[4]; 40 | unrolledAdds[0] = " add x15, x15, x11"; 41 | unrolledAdds[1] = " add x14, x14, x11"; 42 | unrolledAdds[2] = " add x13, x13, x11"; 43 | unrolledAdds[3] = " add x12, x12, x11"; 44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /AsmGen/tests/LdqTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class LdqTest : UarchTest 6 | { 7 | public LdqTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "ldq"; 11 | this.Description = "Load Queue Test (loads pending retire)"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledLoads = new string[4]; 20 | unrolledLoads[0] = " mov (%r8), %r15"; 21 | unrolledLoads[1] = " mov (%r8), %r14"; 22 | unrolledLoads[2] = " mov (%r8), %r13"; 23 | unrolledLoads[3] = " mov (%r8), %r12"; 24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true); 25 | } 26 | 27 | public override void GenerateX86NasmAsm(StringBuilder sb) 28 | { 29 | string[] unrolledLoads = new string[4]; 30 | unrolledLoads[0] = " mov r15, [r8]"; 31 | unrolledLoads[1] = " mov r14, [r8]"; 32 | unrolledLoads[2] = " mov r13, [r8]"; 33 | unrolledLoads[3] = " mov r12, [r8]"; 34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true); 35 | } 36 | 37 | public override void GenerateArmAsm(StringBuilder sb) 38 | { 39 | string[] unrolledLoads = new string[4]; 40 | unrolledLoads[0] = " ldr x15, [x2]"; 41 | unrolledLoads[1] = " ldr x14, [x2]"; 42 | unrolledLoads[2] = " ldr x13, [x2]"; 43 | unrolledLoads[3] = " ldr x12, [x2]"; 44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /AsmGen/tests/LoadDivNsqTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class LoadDivNsqTest : UarchTest 6 | { 7 | public LoadDivNsqTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "loaddivnsq"; 11 | this.Description = "Load Scheduler Capacity Test, using divs to block retirement, excluding NSQ"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int count, int *arr2"; 13 | this.GetFunctionCallParameters = "structIterations, list_size, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] dependentLoads = new string[1]; 20 | dependentLoads[0] = " mov (%r8, %rdx, 4), %r15"; 21 | 22 | string[] indepLoads = new string[1]; 23 | indepLoads[0] = " mov (%r8), %r15"; 24 | 25 | UarchTestHelpers.GenerateX86AsmDivNsqTestFuncs(sb, this.Counts[this.Counts.Length - 1], this.Counts, this.Prefix, dependentLoads, indepLoads, false); 26 | } 27 | 28 | public override void GenerateX86NasmAsm(StringBuilder sb) 29 | { 30 | // not implemented 31 | string[] dependentLoads = new string[1]; 32 | dependentLoads[0] = " mov r15, [r8 + rdx * 4]"; 33 | 34 | UarchTestHelpers.GenerateX86NasmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false); 35 | } 36 | 37 | public override void GenerateArmAsm(StringBuilder sb) 38 | { 39 | // not implemented 40 | string[] dependentLoads = new string[1]; 41 | dependentLoads[0] = " ldr w15, [x2, w25, uxtw #2]"; 42 | 43 | string[] dependentLoads1 = new string[1]; 44 | dependentLoads1[0] = " ldr w15, [x2, w26, uxtw #2]"; 45 | 46 | UarchTestHelpers.GenerateArmAsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads1, false); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /AsmGen/tests/StqTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class StqTest : UarchTest 6 | { 7 | public StqTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "stq"; 11 | this.Description = "Store Queue Test (stores pending retire)"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledLoads = new string[4]; 20 | unrolledLoads[0] = " mov %r15, (%r8)"; 21 | unrolledLoads[1] = " mov %r14, (%r8)"; 22 | unrolledLoads[2] = " mov %r13, (%r8)"; 23 | unrolledLoads[3] = " mov %r12, (%r8)"; 24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true); 25 | } 26 | 27 | public override void GenerateX86NasmAsm(StringBuilder sb) 28 | { 29 | string[] unrolledLoads = new string[4]; 30 | unrolledLoads[0] = " mov [r8], r15"; 31 | unrolledLoads[1] = " mov [r8], r14"; 32 | unrolledLoads[2] = " mov [r8], r13"; 33 | unrolledLoads[3] = " mov [r8], r12"; 34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true); 35 | } 36 | 37 | public override void GenerateArmAsm(StringBuilder sb) 38 | { 39 | string[] unrolledLoads = new string[4]; 40 | unrolledLoads[0] = " str x15, [x2]"; 41 | unrolledLoads[1] = " str x14, [x2]"; 42 | unrolledLoads[2] = " str x13, [x2]"; 43 | unrolledLoads[3] = " str x12, [x2]"; 44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledLoads, unrolledLoads, true); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /AsmGen/tests/MixMaskIntRfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixMaskIntRfTest : UarchTest 6 | { 7 | public MixMaskIntRfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixmaskintrf"; 11 | this.Description = "Mixed Integer and Mask (K regs) RF Test - AVX-512 x86 CPUs only"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledAdds = new string[4]; 20 | unrolledAdds[0] = " kaddb %k0, %k1, %k1"; 21 | unrolledAdds[1] = " add %r14, %r13"; 22 | unrolledAdds[2] = " kaddb %k0, %k3, %k3"; 23 | unrolledAdds[3] = " add %r11, %r12"; 24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false); 25 | } 26 | 27 | public override void GenerateX86NasmAsm(StringBuilder sb) 28 | { 29 | string[] unrolledAdds = new string[4]; 30 | unrolledAdds[0] = " kaddb k1, k1, k0"; 31 | unrolledAdds[1] = " add r13, r14"; 32 | unrolledAdds[2] = " kaddb k3, k3, k0"; 33 | unrolledAdds[3] = " add r12, r11"; 34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false); 35 | } 36 | 37 | public override void GenerateArmAsm(StringBuilder sb) 38 | { 39 | string[] unrolledAdds = new string[4]; 40 | unrolledAdds[0] = " add x15, x15, x11"; 41 | unrolledAdds[1] = " add x14, x14, x11"; 42 | unrolledAdds[2] = " add x13, x13, x11"; 43 | unrolledAdds[3] = " add x12, x12, x11"; 44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /AsmGen/tests/MaskRfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MaskRfTest : UarchTest 6 | { 7 | public MaskRfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "maskrf"; 11 | this.Description = "Mask (K regs) RF Test - AVX-512 x86 CPUs only"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledAdds = new string[4]; 20 | unrolledAdds[0] = " kaddb %k0, %k1, %k1"; 21 | unrolledAdds[1] = " kaddb %k0, %k2, %k2"; 22 | unrolledAdds[2] = " kaddb %k0, %k3, %k3"; 23 | unrolledAdds[3] = " kaddb %k0, %k4, %k4"; 24 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false); 25 | } 26 | 27 | public override void GenerateX86NasmAsm(StringBuilder sb) 28 | { 29 | string[] unrolledAdds = new string[4]; 30 | unrolledAdds[0] = " kaddb k1, k1, k0"; 31 | unrolledAdds[1] = " kaddb k2, k2, k0"; 32 | unrolledAdds[2] = " kaddb k3, k3, k0"; 33 | unrolledAdds[3] = " kaddb k4, k4, k0"; 34 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false); 35 | } 36 | 37 | public override void GenerateArmAsm(StringBuilder sb) 38 | { 39 | string[] unrolledAdds = new string[4]; 40 | unrolledAdds[0] = " add x15, x15, x11"; 41 | unrolledAdds[1] = " add x14, x14, x11"; 42 | unrolledAdds[2] = " add x13, x13, x11"; 43 | unrolledAdds[3] = " add x12, x12, x11"; 44 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, true); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /AsmGen/tests/FaddSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class FaddSchedTest : UarchTest 6 | { 7 | public FaddSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "faddsched"; 11 | this.Description = "FP (32-bit add) Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // xmm0 is dependent on ptr chasing load 20 | string[] unrolledAdds = new string[4]; 21 | unrolledAdds[0] = " addss %xmm0, %xmm1"; 22 | unrolledAdds[1] = " addss %xmm0, %xmm2"; 23 | unrolledAdds[2] = " addss %xmm0, %xmm3"; 24 | unrolledAdds[3] = " addss %xmm0, %xmm4"; 25 | 26 | UarchTestHelpers.GenerateX86AsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 27 | } 28 | 29 | public override void GenerateX86NasmAsm(StringBuilder sb) 30 | { 31 | string[] unrolledAdds = new string[4]; 32 | unrolledAdds[0] = " addss xmm1, xmm0"; 33 | unrolledAdds[1] = " addss xmm2, xmm0"; 34 | unrolledAdds[2] = " addss xmm3, xmm0"; 35 | unrolledAdds[3] = " addss xmm4, xmm0"; 36 | UarchTestHelpers.GenerateX86NasmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 37 | } 38 | 39 | public override void GenerateArmAsm(StringBuilder sb) 40 | { 41 | string[] unrolledAdds = new string[4]; 42 | unrolledAdds[0] = " fadd s17, s17, s16"; 43 | unrolledAdds[1] = " fadd s18, s18, s16"; 44 | unrolledAdds[2] = " fadd s19, s19, s16"; 45 | unrolledAdds[3] = " fadd s20, s20, s16"; 46 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /AsmGen/tests/FmulSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class FmulSchedTest : UarchTest 6 | { 7 | public FmulSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "fmulsched"; 11 | this.Description = "FP (32-bit multiply) Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // xmm0 is dependent on ptr chasing load 20 | string[] unrolledAdds = new string[4]; 21 | unrolledAdds[0] = " mulss %xmm0, %xmm1"; 22 | unrolledAdds[1] = " mulss %xmm0, %xmm2"; 23 | unrolledAdds[2] = " mulss %xmm0, %xmm3"; 24 | unrolledAdds[3] = " mulss %xmm0, %xmm4"; 25 | 26 | UarchTestHelpers.GenerateX86AsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 27 | } 28 | 29 | public override void GenerateX86NasmAsm(StringBuilder sb) 30 | { 31 | string[] unrolledAdds = new string[4]; 32 | unrolledAdds[0] = " mulss xmm1, xmm0"; 33 | unrolledAdds[1] = " mulss xmm2, xmm0"; 34 | unrolledAdds[2] = " mulss xmm3, xmm0"; 35 | unrolledAdds[3] = " mulss xmm4, xmm0"; 36 | UarchTestHelpers.GenerateX86NasmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 37 | } 38 | 39 | public override void GenerateArmAsm(StringBuilder sb) 40 | { 41 | string[] unrolledAdds = new string[4]; 42 | unrolledAdds[0] = " fmul s17, s17, s16"; 43 | unrolledAdds[1] = " fmul s18, s18, s16"; 44 | unrolledAdds[2] = " fmul s19, s19, s16"; 45 | unrolledAdds[3] = " fmul s20, s20, s16"; 46 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /AsmGen/tests/MixLoadStoreDivSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixLoadStoreDivSchedTest : UarchTest 6 | { 7 | public MixLoadStoreDivSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixloadstoredivsched"; 11 | this.Description = "Load/Store Scheduler Capacity Test, using divs to block retirement"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int count, int *arr2, int *arr3"; 13 | this.GetFunctionCallParameters = "structIterations, list_size, B, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] dependentLoads = new string[2]; 20 | dependentLoads[0] = " mov (%r9, %rdx, 4), %r15"; 21 | dependentLoads[1] = " mov %r14, (%r8, %rdx, 4)"; 22 | 23 | UarchTestHelpers.GenerateX86AsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false); 24 | } 25 | 26 | public override void GenerateX86NasmAsm(StringBuilder sb) 27 | { 28 | string[] dependentLoads = new string[2]; 29 | dependentLoads[0] = " mov r15, [r9 + rdx * 4]"; 30 | dependentLoads[1] = " mov [r8 + rdx * 4], r14"; 31 | 32 | UarchTestHelpers.GenerateX86NasmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads, false); 33 | } 34 | 35 | public override void GenerateArmAsm(StringBuilder sb) 36 | { 37 | string[] dependentLoads = new string[2]; 38 | dependentLoads[0] = " ldr w15, [x3, w25, uxtw #2]"; 39 | dependentLoads[1] = " str w14, [x2, w25, uxtw #2]"; 40 | 41 | string[] dependentLoads1 = new string[2]; 42 | dependentLoads1[0] = " ldr w15, [x3, w26, uxtw #2]"; 43 | dependentLoads1[1] = " str w14, [x2, w26, uxtw #2]"; 44 | 45 | UarchTestHelpers.GenerateArmAsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentLoads, dependentLoads1, false); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /AsmGen/tests/MixFaddFmulSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixFaddFmulSchedTest : UarchTest 6 | { 7 | public MixFaddFmulSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixfaddfmulsched"; 11 | this.Description = "FP (mixed 32-bit add and multiply) Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // xmm0 is dependent on ptr chasing load 20 | string[] unrolledAdds = new string[4]; 21 | unrolledAdds[0] = " addss %xmm0, %xmm1"; 22 | unrolledAdds[1] = " mulss %xmm0, %xmm2"; 23 | unrolledAdds[2] = " addss %xmm0, %xmm3"; 24 | unrolledAdds[3] = " mulss %xmm0, %xmm4"; 25 | 26 | UarchTestHelpers.GenerateX86AsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 27 | } 28 | 29 | public override void GenerateX86NasmAsm(StringBuilder sb) 30 | { 31 | string[] unrolledAdds = new string[4]; 32 | unrolledAdds[0] = " addss xmm1, xmm0"; 33 | unrolledAdds[1] = " mulss xmm2, xmm0"; 34 | unrolledAdds[2] = " addss xmm3, xmm0"; 35 | unrolledAdds[3] = " mulss xmm4, xmm0"; 36 | UarchTestHelpers.GenerateX86NasmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 37 | } 38 | 39 | public override void GenerateArmAsm(StringBuilder sb) 40 | { 41 | string[] unrolledAdds = new string[4]; 42 | unrolledAdds[0] = " fadd s17, s17, s16"; 43 | unrolledAdds[1] = " fmul s18, s18, s16"; 44 | unrolledAdds[2] = " fadd s19, s19, s16"; 45 | unrolledAdds[3] = " fmul s20, s20, s16"; 46 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /AsmGen/tests/Fadd256SchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Fadd256SchedTest : UarchTest 6 | { 7 | public Fadd256SchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "fadd256sched"; 11 | this.Description = "256-bit FADD Scheduler Capacity Test, 128-bit on ARM"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // ymm0 is dependent on ptr chasing load 20 | string[] unrolledAdds = new string[4]; 21 | unrolledAdds[0] = " vaddps %ymm0, %ymm1, %ymm1"; 22 | unrolledAdds[1] = " vaddps %ymm0, %ymm2, %ymm2"; 23 | unrolledAdds[2] = " vaddps %ymm0, %ymm3, %ymm3"; 24 | unrolledAdds[3] = " vaddps %ymm0, %ymm4, %ymm3"; 25 | 26 | UarchTestHelpers.GenerateX86AsmFp256SchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 27 | } 28 | 29 | public override void GenerateX86NasmAsm(StringBuilder sb) 30 | { 31 | string[] unrolledAdds = new string[4]; 32 | unrolledAdds[0] = " vaddps ymm1, ymm1, ymm0"; 33 | unrolledAdds[1] = " vaddps ymm2, ymm2, ymm0"; 34 | unrolledAdds[2] = " vaddps ymm3, ymm3, ymm0"; 35 | unrolledAdds[3] = " vaddps ymm4, ymm4, ymm0"; 36 | UarchTestHelpers.GenerateX86NasmFp256SchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 37 | } 38 | 39 | public override void GenerateArmAsm(StringBuilder sb) 40 | { 41 | string[] unrolledAdds = new string[4]; 42 | unrolledAdds[0] = " fadd v20.4s, v15.4s, v16.4s"; 43 | unrolledAdds[1] = " fadd v17.4s, v15.4s, v16.4s"; 44 | unrolledAdds[2] = " fadd v18.4s, v15.4s, v16.4s"; 45 | unrolledAdds[3] = " fadd v19.4s, v15.4s, v16.4s"; 46 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /AsmGen/tests/JumpNsqTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class JumpNsqTest : UarchTest 6 | { 7 | public JumpNsqTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "jmpnsq"; 11 | this.Description = "Not-taken Jump Scheduler Capacity Test, Excluding NSQ"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string ohnoLabelName = "jumpnsq_reallybadthing"; 20 | string[] dependentJumps = new string[1]; 21 | dependentJumps[0] = $" cmp %rdi, %rsi\n je {ohnoLabelName}"; 22 | 23 | // R14 is set to 1, so the test instruction will never set the zero flag 24 | string[] independentJumps = new string[1]; 25 | independentJumps[0] = $" test %r14, %r14\n je {ohnoLabelName}"; 26 | 27 | UarchTestHelpers.GenerateX86AsmNsqTestFuncs(sb, this.Counts[this.Counts.Length - 1], this.Counts, this.Prefix, dependentJumps, independentJumps, false); 28 | 29 | sb.AppendLine(ohnoLabelName + ":"); 30 | sb.AppendLine(" int3"); 31 | } 32 | 33 | public override void GenerateX86NasmAsm(StringBuilder sb) 34 | { 35 | string ohnoLabelName = "jumpnsq_reallybadthing"; 36 | string[] dependentJumps = new string[1]; 37 | dependentJumps[0] = $" cmp rdi, rsi\n je {ohnoLabelName}"; 38 | 39 | // R14 is set to 1, so the test instruction will never set the zero flag 40 | string[] independentJumps = new string[1]; 41 | independentJumps[0] = $" test r14, r14\n je {ohnoLabelName}"; 42 | 43 | UarchTestHelpers.GenerateX86NasmNsqTestFuncs(sb, this.Counts[this.Counts.Length - 1], this.Counts, this.Prefix, dependentJumps, independentJumps, false); 44 | 45 | sb.AppendLine(ohnoLabelName + ":"); 46 | sb.AppendLine(" int3"); 47 | } 48 | 49 | public override void GenerateArmAsm(StringBuilder sb) 50 | { 51 | UarchTestHelpers.GenerateStub(sb, this.Counts, this.Prefix); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /AsmGen/tests/FaddNsqTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class FaddNsqTest : UarchTest 6 | { 7 | private int high; 8 | 9 | public FaddNsqTest(int low, int high, int step) 10 | { 11 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 12 | this.Prefix = "faddnsq"; 13 | this.Description = "FP (32-bit add) Scheduler Test, excluding any NSQ"; 14 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 15 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 16 | this.DivideTimeByCount = false; 17 | this.high = high; 18 | } 19 | 20 | public override void GenerateX86GccAsm(StringBuilder sb) 21 | { 22 | // xmm0 is dependent on ptr chasing load 23 | string initInstrs = " cvtsi2ss %r11, %xmm3\n"; 24 | string postLoadInstr = " cvtsi2ss %rdi, %xmm0"; 25 | string[] depAdds = new string[2]; 26 | depAdds[0] = " addss %xmm0, %xmm1"; 27 | depAdds[1] = " addss %xmm0, %xmm2"; 28 | 29 | string[] indepAdds = new string[2]; 30 | indepAdds[0] = " addss %xmm3, %xmm4"; 31 | indepAdds[1] = " addss %xmm3, %xmm5"; 32 | 33 | UarchTestHelpers.GenerateX86AsmNsqTestFuncs(sb, this.high, this.Counts, this.Prefix, depAdds, indepAdds, false, initInstrs, postLoadInstr); 34 | } 35 | 36 | public override void GenerateX86NasmAsm(StringBuilder sb) 37 | { 38 | string initInstrs = " movq xmm3, r11\n xorps xmm1, xmm1\n xorps xmm2, xmm2\n xorps xmm4, xmm4\n xorps xmm5, xmm5\n"; 39 | string postLoadInstr = " cvtsi2ss xmm0, rdi"; 40 | string[] depAdds = new string[2]; 41 | depAdds[0] = " addss xmm1, xmm0"; 42 | depAdds[1] = " addss xmm2, xmm0"; 43 | 44 | string[] indepAdds = new string[2]; 45 | indepAdds[0] = " addss xmm4, xmm3"; 46 | indepAdds[1] = " addss xmm5, xmm3"; 47 | 48 | UarchTestHelpers.GenerateX86NasmNsqTestFuncs(sb, this.high, this.Counts, this.Prefix, depAdds, indepAdds, false, initInstrs, postLoadInstr); 49 | } 50 | 51 | public override void GenerateArmAsm(StringBuilder sb) 52 | { 53 | UarchTestHelpers.GenerateStub(sb, this.Counts, this.Prefix); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /AsmGen/tests/MixJumpAddSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixJmpAddSched : UarchTest 6 | { 7 | public MixJmpAddSched(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixjmpaddsched"; 11 | this.Description = "Not-taken Jump + Add Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " add %rdi, %r11"; 20 | string postLoadInstr2 = " add %rsi, %r11"; 21 | string[] unrolledJumps = new string[2]; 22 | unrolledJumps[0] = $" cmp %rdi, %rsi\n je jumpsched_reallybadthing_jadd"; 23 | unrolledJumps[1] = " add %r11, %r15"; 24 | 25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 26 | 27 | sb.AppendLine("jumpsched_reallybadthing_jadd:"); 28 | sb.AppendLine(" int3"); 29 | } 30 | 31 | public override void GenerateX86NasmAsm(StringBuilder sb) 32 | { 33 | string[] unrolledJumps = new string[2]; 34 | unrolledJumps[0] = " cmp rdi, rsi\n je jumpsched_reallybadthing_jadd"; 35 | unrolledJumps[1] = " add r15, rdi"; 36 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 37 | 38 | sb.AppendLine("jumpsched_reallybadthing_jadd:"); 39 | sb.AppendLine(" int3"); 40 | } 41 | 42 | public override void GenerateArmAsm(StringBuilder sb) 43 | { 44 | string[] unrolledJumps = new string[2]; 45 | //string initInstrs = "jumpsched_reallybadthing_jadd:"; 46 | unrolledJumps[0] = " add w14, w13, w25"; 47 | unrolledJumps[1] = " cbz w14, jumpsched_reallybadthing_jadd"; 48 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 49 | 50 | sb.AppendLine("jumpsched_reallybadthing_jadd:"); 51 | sb.AppendLine(" .word 0xf7f0a000"); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /AsmGen/tests/JumpAddSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class JumpAddSchedTest : UarchTest 6 | { 7 | public JumpAddSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "jmpaddsched"; 11 | this.Description = "Not-taken Jump + Add Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " add %rdi, %r11"; 20 | string postLoadInstr2 = " add %rsi, %r11"; 21 | string[] unrolledJumps = new string[2]; 22 | unrolledJumps[0] = $" cmp %rdi, %rsi\n je jumpsched_reallybadthing_jadd"; 23 | unrolledJumps[1] = " add %r11, %r15"; 24 | 25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 26 | 27 | sb.AppendLine("jumpsched_reallybadthing_jadd:"); 28 | sb.AppendLine(" int3"); 29 | } 30 | 31 | public override void GenerateX86NasmAsm(StringBuilder sb) 32 | { 33 | string[] unrolledJumps = new string[2]; 34 | unrolledJumps[0] = " cmp rdi, rsi\n je jumpsched_reallybadthing_jadd"; 35 | unrolledJumps[1] = " add r15, rdi"; 36 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 37 | 38 | sb.AppendLine("jumpsched_reallybadthing_jadd:"); 39 | sb.AppendLine(" int3"); 40 | } 41 | 42 | public override void GenerateArmAsm(StringBuilder sb) 43 | { 44 | string[] unrolledJumps = new string[2]; 45 | //string initInstrs = "jumpsched_reallybadthing_jadd:"; 46 | unrolledJumps[0] = " cmp x25, x26\n b.eq jumpsched_reallybadthing_jadd"; 47 | unrolledJumps[1] = " add w14, w13, w25"; 48 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 49 | 50 | sb.AppendLine("jumpsched_reallybadthing_jadd:"); 51 | sb.AppendLine(" .word 0xf7f0a000"); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /AsmGen/tests/MixBtsMulSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixMulBtsSchedTest : UarchTest 6 | { 7 | public MixMulBtsSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixmulbtssched"; 11 | this.Description = "Mixed Multiply/BTS Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] instrs1 = new string[2]; 20 | instrs1[0] = " bts %rdi, %r15"; 21 | instrs1[1] = " imul %edi, %r12d"; 22 | string[] instrs2 = new string[2]; 23 | instrs2[0] = " bts %rdi, %r15"; 24 | instrs2[1] = " imul %esi, %r11d"; 25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false); 26 | } 27 | 28 | public override void GenerateX86NasmAsm(StringBuilder sb) 29 | { 30 | string postLoadInstr1 = " mov r15, rdi"; 31 | string postLoadInstr2 = " mov r15, rsi"; 32 | string[] instrs = new string[2]; 33 | instrs[0] = " bts r15, rdi"; 34 | instrs[1] = " imul r12d, edi"; 35 | string[] instrs1 = new string[2]; 36 | instrs1[0] = " bts r15, rsi"; 37 | instrs1[1] = " imul r11d, esi"; 38 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 39 | } 40 | 41 | public override void GenerateArmAsm(StringBuilder sb) 42 | { 43 | string postLoadInstr1 = " mov x15, x25"; 44 | string postLoadInstr2 = " mov x15, x26"; 45 | string[] instrs = new string[2]; 46 | instrs[0] = " ror x15, x15, #1"; 47 | instrs[1] = " mul x12, x12, x25"; 48 | string[] instrs1 = new string[2]; 49 | instrs1[0] = " ror x15, x15, #1"; 50 | instrs1[1] = " mul x11, x11, x26"; 51 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /AsmGen/DataFiles/VsIndirectBranchFunction.c: -------------------------------------------------------------------------------- 1 | // similar but for indirect branch test 2 | // needs indirectBranchTestFuncArr generated 3 | // mode: 4 | // 0 - cycle through targets 5 | // 1 - random target selection 6 | // 2 - jump to middle 7 | float runIndirectBranchTest(uint32_t branchCountIdx, uint32_t targetCountIdx, uint32_t mode) { 8 | struct timeb start, end; 9 | uint32_t branchCount = indirectBranchCounts[branchCountIdx]; 10 | uint32_t targetCount = indirectBranchTargetCounts[targetCountIdx]; 11 | uint64_t iterations = 80000000 / branchCount; 12 | uint64_t(*branchtestFunc)(uint64_t, uint32_t**, uint32_t, uint64_t**) = indirectBranchTestFuncArr[branchCountIdx][targetCountIdx]; 13 | 14 | // generate an array containing jump target indexes for every branch 15 | uint32_t** testArrToArr = (uint32_t**)malloc(sizeof(uint32_t*) * branchCount); 16 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) { 17 | uint32_t* testArr = (uint32_t*)malloc(sizeof(uint32_t) * targetCount); 18 | if (mode == 1) 19 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = rand() % targetCount; 20 | else if (mode == 0) 21 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = i; 22 | else if (mode == 2) 23 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = targetCount / 2; 24 | testArrToArr[testArrIdx] = testArr; 25 | } 26 | 27 | // each branch needs a jump table 28 | uint64_t** jumpTables = (uint64_t**)malloc(sizeof(uint64_t*) * branchCount); 29 | for (int jumpTableIdx = 0; jumpTableIdx < branchCount; jumpTableIdx++) 30 | { 31 | uint64_t* jumpTable = (uint64_t*)malloc(sizeof(uint64_t) * targetCount); 32 | jumpTables[jumpTableIdx] = jumpTable; 33 | } 34 | 35 | ftime(&start); 36 | // uint64_t iterations, uint32_t **arr, uint32_t arrLen, uint64_t **scratch 37 | branchtestFunc(iterations, testArrToArr, targetCount, jumpTables); 38 | ftime(&end); 39 | uint64_t time_diff_ms = 1000 * (end.time - start.time) + (end.millitm - start.millitm); 40 | float latency = 1e6 * (float)time_diff_ms / (float)iterations; 41 | 42 | // give result in latency per branch 43 | latency = latency / branchCount; 44 | 45 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) free(testArrToArr[testArrIdx]); 46 | free(testArrToArr); 47 | for (int jumpTableIdx = 0; jumpTableIdx < branchCount; jumpTableIdx++) free(jumpTables[jumpTableIdx]); 48 | free(jumpTables); 49 | return latency; 50 | } 51 | -------------------------------------------------------------------------------- /CoreClockChecker/BoostClockChecker.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | extern uint64_t clktsctest(uint64_t iterations) __attribute((ms_abi)); 10 | 11 | int main(int argc, char *argv[]) { 12 | struct timeval startTv, endTv; 13 | uint64_t iterations = 500000, samples = 100; 14 | unsigned int sleepSeconds = 5; 15 | time_t time_diff_ms; 16 | 17 | for (int argIdx = 1; argIdx < argc; argIdx++) { 18 | if (*(argv[argIdx]) == '-') { 19 | char *arg = argv[argIdx] + 1; 20 | if (strncmp(arg, "samples", 7) == 0) { 21 | argIdx++; 22 | samples = atol(argv[argIdx]); 23 | } else if (strncmp(arg, "iterations", 10) == 0) { 24 | argIdx++; 25 | iterations = atol(argv[argIdx]); 26 | } else if (strncmp(arg, "sleep", 5) == 0) { 27 | argIdx++; 28 | sleepSeconds = atoi(argv[argIdx]); 29 | } 30 | } 31 | } 32 | 33 | sleep(sleepSeconds); 34 | 35 | uint64_t *measuredTscs = malloc(samples * sizeof(uint64_t)); 36 | for (uint64_t sampleIdx = 0; sampleIdx < samples; sampleIdx++) { 37 | uint64_t elapsedTsc = clktsctest(iterations); 38 | measuredTscs[sampleIdx] = elapsedTsc; 39 | } 40 | 41 | fprintf(stderr, "Used %lu samples\n", samples); 42 | fprintf(stderr, "Used %lu iterations\n", iterations); 43 | // figure out TSC to real time ratio 44 | fprintf(stderr, "Checking TSC ratio...\n"); 45 | uint64_t iterationsHi = 8e9; // should be a couple seconds at least? 46 | gettimeofday(&startTv, NULL); 47 | uint64_t referenceElapsedTsc = clktsctest(iterationsHi); 48 | gettimeofday(&endTv, NULL); 49 | time_diff_ms = 1000 * (endTv.tv_sec - startTv.tv_sec) + ((endTv.tv_usec - startTv.tv_usec) / 1000); 50 | float tsc_per_ms = (float)referenceElapsedTsc / (float)time_diff_ms; 51 | float tsc_per_ns = tsc_per_ms / 1e6; 52 | fprintf(stderr, "TSC = %lu, elapsed ms = %lu\n", referenceElapsedTsc, time_diff_ms); 53 | fprintf(stderr, "TSC per ms: %f, TSC per ns: %f\n", tsc_per_ms, tsc_per_ns); 54 | 55 | printf("Time (ms), Clk (GHz), TSC\n"); 56 | float elapsedTime = 0; 57 | for (uint64_t sampleIdx = 0; sampleIdx < samples; sampleIdx++) { 58 | // (tsc / ms) * tsc = 1 / ms 59 | float elapsedTimeMs = measuredTscs[sampleIdx] / tsc_per_ms; 60 | elapsedTime += elapsedTimeMs; 61 | float latency = 1e6 * elapsedTimeMs / (float)iterations; 62 | float addsPerNs = 1 / latency; 63 | printf("%f,%f,%lu\n", elapsedTime, addsPerNs, measuredTscs[sampleIdx]); 64 | } 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /AsmGen/tests/MixPdepMulSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixPdepMulSchedTest : UarchTest 6 | { 7 | public MixPdepMulSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixpdepmulsched"; 11 | this.Description = "Mixed Multiply/PDEP Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] instrs1 = new string[2]; 20 | instrs1[0] = " pdep %rdi, %r14, %r15"; 21 | instrs1[1] = " imul %edi, %r12d"; 22 | string[] instrs2 = new string[2]; 23 | instrs2[0] = " pdep %rsi, %r14, %r15"; 24 | instrs2[1] = " imul %esi, %r11d"; 25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false); 26 | } 27 | 28 | public override void GenerateX86NasmAsm(StringBuilder sb) 29 | { 30 | string postLoadInstr1 = " mov r15, rdi"; 31 | string postLoadInstr2 = " mov r15, rsi"; 32 | string[] instrs = new string[2]; 33 | instrs[0] = " pdep r15, rdi, r14"; 34 | instrs[1] = " imul r12d, edi"; 35 | string[] instrs1 = new string[2]; 36 | instrs1[0] = " pdep r15, rsi, r14"; 37 | instrs1[1] = " imul r11d, esi"; 38 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 39 | } 40 | 41 | public override void GenerateArmAsm(StringBuilder sb) 42 | { 43 | // todo, or not. no lea on aarch64 44 | string postLoadInstr1 = " mov x15, x25"; 45 | string postLoadInstr2 = " mov x15, x26"; 46 | string[] instrs = new string[2]; 47 | instrs[0] = " ror x15, x15, #1"; 48 | instrs[1] = " mul x12, x12, x25"; 49 | string[] instrs1 = new string[2]; 50 | instrs1[0] = " ror x15, x15, #1"; 51 | instrs1[1] = " mul x11, x11, x26"; 52 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /AsmGen/tests/MixLeaMulSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixLeaMulSchedTest : UarchTest 6 | { 7 | public MixLeaMulSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixleamulsched"; 11 | this.Description = "Mixed Multiply/lea Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] instrs1 = new string[2]; 20 | instrs1[0] = " lea (%rdx,%rdi,8), %r15"; 21 | instrs1[1] = " imul %edi, %r12d"; 22 | string[] instrs2 = new string[2]; 23 | instrs2[0] = " lea (%rdx,%rsi,8), %r15"; 24 | instrs2[1] = " imul %esi, %r11d"; 25 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false); 26 | } 27 | 28 | public override void GenerateX86NasmAsm(StringBuilder sb) 29 | { 30 | string postLoadInstr1 = " mov r15, rdi"; 31 | string postLoadInstr2 = " mov r15, rsi"; 32 | string[] instrs = new string[2]; 33 | instrs[0] = " lea r15, [rdx + rdi * 8]"; 34 | instrs[1] = " imul r12d, edi"; 35 | string[] instrs1 = new string[2]; 36 | instrs1[0] = " lea r15, [rdx + rsi * 8]"; 37 | instrs1[1] = " imul r11d, esi"; 38 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 39 | } 40 | 41 | public override void GenerateArmAsm(StringBuilder sb) 42 | { 43 | // todo, or not. no lea on aarch64 44 | string postLoadInstr1 = " mov x15, x25"; 45 | string postLoadInstr2 = " mov x15, x26"; 46 | string[] instrs = new string[2]; 47 | instrs[0] = " ror x15, x15, #1"; 48 | instrs[1] = " mul x12, x12, x25"; 49 | string[] instrs1 = new string[2]; 50 | instrs1[0] = " ror x15, x15, #1"; 51 | instrs1[1] = " mul x11, x11, x26"; 52 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /AsmGen/DataFiles/GccIndirectBranchFunction.c: -------------------------------------------------------------------------------- 1 | // similar but for indirect branch test 2 | // needs indirectBranchTestFuncArr generated 3 | // mode: 4 | // 0 - cycle through targets 5 | // 1 - random target selection 6 | // 2 - jump to middle 7 | float runIndirectBranchTest(uint32_t branchCountIdx, uint32_t targetCountIdx, uint32_t mode) { 8 | struct timeval startTv, endTv; 9 | struct timezone startTz, endTz; 10 | uint32_t branchCount = indirectBranchCounts[branchCountIdx]; 11 | uint32_t targetCount = indirectBranchTargetCounts[targetCountIdx]; 12 | uint64_t iterations = 80000000 / branchCount; 13 | uint64_t(*branchtestFunc)(uint64_t, uint32_t**, uint32_t, uint64_t **) __attribute((sysv_abi)) = indirectBranchTestFuncArr[branchCountIdx][targetCountIdx]; 14 | 15 | // generate an array containing jump target indexes for every branch 16 | uint32_t** testArrToArr = (uint32_t**)malloc(sizeof(uint32_t*) * branchCount); 17 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) { 18 | uint32_t* testArr = (uint32_t*)malloc(sizeof(uint32_t) * targetCount); 19 | if (mode == 1) 20 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = rand() % targetCount; 21 | else if (mode == 0) 22 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = i; 23 | else if (mode == 2) 24 | for (uint32_t i = 0; i < targetCount; i++) testArr[i] = targetCount / 2; 25 | testArrToArr[testArrIdx] = testArr; 26 | } 27 | 28 | // each branch needs a jump table 29 | uint64_t** jumpTables = (uint64_t**)malloc(sizeof(uint64_t*) * branchCount); 30 | for (int jumpTableIdx = 0; jumpTableIdx < branchCount; jumpTableIdx++) 31 | { 32 | uint64_t* jumpTable = (uint64_t*)malloc(sizeof(uint64_t) * targetCount); 33 | jumpTables[jumpTableIdx] = jumpTable; 34 | } 35 | 36 | gettimeofday(&startTv, &startTz); 37 | // uint64_t iterations, uint32_t **arr, uint32_t arrLen, uint64_t **scratch 38 | branchtestFunc(iterations, testArrToArr, targetCount, jumpTables); 39 | gettimeofday(&endTv, &endTz); 40 | uint64_t time_diff_ms = 1000 * (endTv.tv_sec - startTv.tv_sec) + ((endTv.tv_usec - startTv.tv_usec) / 1000); 41 | float latency = 1e6 * (float)time_diff_ms / (float)iterations; 42 | 43 | // give result in latency per branch 44 | latency = latency / branchCount; 45 | 46 | for (int testArrIdx = 0; testArrIdx < branchCount; testArrIdx++) free(testArrToArr[testArrIdx]); 47 | free(testArrToArr); 48 | for (int jumpTableIdx = 0; jumpTableIdx < branchCount; jumpTableIdx++) free(jumpTables[jumpTableIdx]); 49 | free(jumpTables); 50 | return latency; 51 | } 52 | -------------------------------------------------------------------------------- /AsmGen/tests/MixAddJump21SchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixAddJmp21Sched : UarchTest 6 | { 7 | public MixAddJmp21Sched(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixaddjmp21sched"; 11 | this.Description = "Not-taken Jump + Add Scheduler Capacity Test, 1:2 ratio"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " add %rdi, %r11"; 20 | string postLoadInstr2 = " add %rsi, %r11"; 21 | string[] unrolledJumps = new string[3]; 22 | unrolledJumps[0] = $" cmp %rdi, %rsi\n je jumpsched21_reallybadthing_jadd"; 23 | unrolledJumps[1] = " add %r11, %r15"; 24 | unrolledJumps[2] = " add %r11, %r14"; 25 | 26 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 27 | 28 | sb.AppendLine("jumpsched21_reallybadthing_jadd:"); 29 | sb.AppendLine(" int3"); 30 | } 31 | 32 | public override void GenerateX86NasmAsm(StringBuilder sb) 33 | { 34 | string[] unrolledJumps = new string[3]; 35 | unrolledJumps[0] = " cmp rdi, rsi\n je jumpsched21_reallybadthing_jadd"; 36 | unrolledJumps[1] = " add r15, rdi"; 37 | unrolledJumps[2] = " add r14, rdi"; 38 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 39 | 40 | sb.AppendLine("jumpsched21_reallybadthing_jadd:"); 41 | sb.AppendLine(" int3"); 42 | } 43 | 44 | public override void GenerateArmAsm(StringBuilder sb) 45 | { 46 | string[] unrolledJumps = new string[3]; 47 | //string initInstrs = "jumpsched_reallybadthing_jadd:"; 48 | unrolledJumps[0] = " add x14, x13, x25"; 49 | unrolledJumps[1] = " add x12, x13, x25"; 50 | unrolledJumps[2] = " cbz w12, jumpsched21_reallybadthing_jadd"; 51 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 52 | 53 | sb.AppendLine("jumpsched21_reallybadthing_jadd:"); 54 | sb.AppendLine(" .word 0xf7f0a000"); 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /AsmGen/tests/MxcsrTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MxcsrTest : UarchTest 6 | { 7 | public MxcsrTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mxcsrrename"; 11 | this.Description = "MXCSR renamed registers"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] setMxcsrInstrs = new string[2]; 20 | setMxcsrInstrs[0] = " mov $0x1f80, %r15\n mov %r15, (%r8)\n ldmxcsr (%r8)\n addss %xmm0, %xmm1"; // default 21 | setMxcsrInstrs[1] = " mov $0x9fc0, %r15\n mov %r15, (%r8)\n ldmxcsr (%r8)\n addss %xmm0, %xmm1"; // set denormals are zero, flush to zero 22 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, setMxcsrInstrs, setMxcsrInstrs, false); 23 | } 24 | 25 | public override void GenerateX86NasmAsm(StringBuilder sb) 26 | { 27 | string[] setMxcsrInstrs = new string[2]; 28 | setMxcsrInstrs[0] = " mov r15, 0x1f80\n mov [r8], r15\n ldmxcsr [r8]\n addss xmm0, xmm1"; // default 29 | setMxcsrInstrs[1] = " mov r15, 0x9fc0\n mov [r8], r15\n ldmxcsr [r8]\n addss xmm0, xmm1"; // set denormals are zero, flush to zero 30 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, setMxcsrInstrs, setMxcsrInstrs, false); 31 | } 32 | 33 | // todo 34 | public override void GenerateArmAsm(StringBuilder sb) 35 | { 36 | // read FPCR into x15, set x14 = flush denormals to zero enabled, x15 = flush denormals to zero disabled 37 | // x12 = mask with all bits set except bit 24 (flush to zero) - bitwise AND to unset bit 24 38 | // x13 = just bit 24 set with all other bits zero - bitwise OR to set bit 24 39 | string initInstrs = " mrs x15, fpcr\n mov x13, 1\n lsl x13, x13, 24\n neg x12, x13\n orr x14, x15, x13\n and x15, x15, x12"; 40 | string[] setFpcrInstrs = new string[2]; 41 | setFpcrInstrs[0] = " msr fpcr, x15\n fadd s2, s2, s3\n"; 42 | setFpcrInstrs[1] = " msr fpcr, x14\n fadd s4, s4, s5\n"; 43 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, setFpcrInstrs, setFpcrInstrs, false, initInstrs: initInstrs); 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /AsmGen/tests/MixRorBtsSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixRorBtsSchedTest : UarchTest 6 | { 7 | public MixRorBtsSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixrorbtssched"; 11 | this.Description = "Mixed BTS/ROR Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " mov %rdi, %r15"; 20 | string postLoadInstr2 = " mov %rsi, %r15"; 21 | string[] instrs1 = new string[2]; 22 | instrs1[0] = " ror $1, %r15"; 23 | instrs1[1] = " bts %rdi, %r12"; 24 | string[] instrs2 = new string[2]; 25 | instrs2[0] = " ror $1, %r15"; 26 | instrs2[1] = " bts %rsi, %r11"; 27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 28 | } 29 | 30 | public override void GenerateX86NasmAsm(StringBuilder sb) 31 | { 32 | string postLoadInstr1 = " mov r15, rdi"; 33 | string postLoadInstr2 = " mov r15, rsi"; 34 | string[] instrs = new string[2]; 35 | instrs[0] = " ror r15, 1"; 36 | instrs[1] = " bts r12d, edi"; 37 | string[] instrs1 = new string[2]; 38 | instrs1[0] = " ror r15, 1"; 39 | instrs1[1] = " bts r11d, esi"; 40 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 41 | } 42 | 43 | public override void GenerateArmAsm(StringBuilder sb) 44 | { 45 | // todo 46 | string postLoadInstr1 = " mov x15, x25"; 47 | string postLoadInstr2 = " mov x15, x26"; 48 | string[] instrs = new string[2]; 49 | instrs[0] = " ror x15, x15, #1"; 50 | instrs[1] = " mul x12, x12, x25"; 51 | string[] instrs1 = new string[2]; 52 | instrs1[0] = " ror x15, x15, #1"; 53 | instrs1[1] = " mul x11, x11, x26"; 54 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /AsmGen/tests/MmxRfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MmxRfTest : UarchTest 6 | { 7 | public MmxRfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mmxrf"; 11 | this.Description = "64-bit MMX RF Capacity Test. x86 only"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string initInstrs = " movq (%rdx), %mm0\n" + 20 | " movq 8(%rdx), %mm1\n" + 21 | " movq 16(%rdx), %mm2\n" + 22 | " movq 24(%rdx), %mm3\n" + 23 | " movq 32(%rdx), %mm4\n"; 24 | 25 | string[] unrolledAdds = new string[4]; 26 | unrolledAdds[0] = " paddw %mm0, %mm1"; 27 | unrolledAdds[1] = " paddw %mm0, %mm2"; 28 | unrolledAdds[2] = " paddw %mm0, %mm3"; 29 | unrolledAdds[3] = " paddw %mm0, %mm4"; 30 | 31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, initInstrs: initInstrs); 32 | } 33 | 34 | public override void GenerateX86NasmAsm(StringBuilder sb) 35 | { 36 | string initInstrs = " movq mm0, [rdx]\n" + 37 | " movq mm1, [rdx + 8]\n" + 38 | " movq mm2, [rdx + 16]\n" + 39 | " movq mm3, [rdx + 24]\n" + 40 | " movq mm4, [rdx + 32]\n"; 41 | 42 | string[] unrolledAdds = new string[4]; 43 | unrolledAdds[0] = " paddw mm1, mm0"; 44 | unrolledAdds[1] = " paddw mm2, mm0"; 45 | unrolledAdds[2] = " paddw mm3, mm0"; 46 | unrolledAdds[3] = " paddw mm4, mm0"; 47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, initInstrs: initInstrs); 48 | } 49 | 50 | public override void GenerateArmAsm(StringBuilder sb) 51 | { 52 | string[] unrolledAdds = new string[4]; 53 | unrolledAdds[0] = " add v15.2s, v15.2s, v19.2s"; 54 | unrolledAdds[1] = " add v16.2s, v16.2s, v19.2s"; 55 | unrolledAdds[2] = " add v17.2s, v17.2s, v19.2s"; 56 | unrolledAdds[3] = " add v18.2s, v18.2s, v19.2s"; 57 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /AsmGen/tests/MixRorMulSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixMulRorSchedTest : UarchTest 6 | { 7 | public MixMulRorSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixmulrorsched"; 11 | this.Description = "Mixed Multiply/Rotate Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " mov %rdi, %r15"; 20 | string postLoadInstr2 = " mov %rsi, %r15"; 21 | string[] instrs1 = new string[2]; 22 | instrs1[0] = " ror $1, %r15"; 23 | instrs1[1] = " imul %edi, %r12d"; 24 | string[] instrs2 = new string[2]; 25 | instrs2[0] = " ror $1, %r15"; 26 | instrs2[1] = " imul %esi, %r11d"; 27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 28 | } 29 | 30 | public override void GenerateX86NasmAsm(StringBuilder sb) 31 | { 32 | string postLoadInstr1 = " mov r15, rdi"; 33 | string postLoadInstr2 = " mov r15, rsi"; 34 | string[] instrs = new string[2]; 35 | instrs[0] = " ror r15, 1"; 36 | instrs[1] = " imul r12d, edi"; 37 | string[] instrs1 = new string[2]; 38 | instrs1[0] = " ror r15, 1"; 39 | instrs1[1] = " imul r11d, esi"; 40 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 41 | } 42 | 43 | public override void GenerateArmAsm(StringBuilder sb) 44 | { 45 | string postLoadInstr1 = " mov x15, x25"; 46 | string postLoadInstr2 = " mov x15, x26"; 47 | string[] instrs = new string[2]; 48 | instrs[0] = " ror x15, x15, #1"; 49 | instrs[1] = " mul x12, x12, x25"; 50 | string[] instrs1 = new string[2]; 51 | instrs1[0] = " ror x15, x15, #1"; 52 | instrs1[1] = " mul x11, x11, x26"; 53 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /AsmGen/tests/NopLoopTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class NopLoopTest : UarchTest 6 | { 7 | /// 8 | /// 9 | /// 10 | /// must be greater than 2 11 | /// 12 | /// 13 | public NopLoopTest(int high, int step) 14 | { 15 | this.Counts = UarchTestHelpers.GenerateCountArray(3, high, step); 16 | this.Prefix = "noploop"; 17 | this.Description = $"NOP throughput for various loop sizes"; 18 | this.FunctionDefinitionParameters = "uint64_t iterations"; 19 | this.GetFunctionCallParameters = "structIterations"; 20 | this.DivideTimeByCount = true; 21 | } 22 | 23 | public override void GenerateX86GccAsm(StringBuilder sb) 24 | { 25 | for (int i = 0; i < Counts.Length; i++) 26 | { 27 | string funcName = this.Prefix + this.Counts[i]; 28 | sb.AppendLine(funcName + ":"); 29 | 30 | // count dec, jnz as instructions in the loop 31 | for (int nopIdx = 0; nopIdx < this.Counts[i] - 2; nopIdx++) sb.AppendLine(" nop"); 32 | sb.AppendLine(" dec %rdi"); 33 | sb.AppendLine(" jnz " + funcName); 34 | sb.AppendLine(" ret"); 35 | } 36 | } 37 | 38 | public override void GenerateX86NasmAsm(StringBuilder sb) 39 | { 40 | for (int i = 0; i < Counts.Length; i++) 41 | { 42 | string funcName = this.Prefix + this.Counts[i]; 43 | sb.AppendLine(funcName + ":"); 44 | 45 | // count dec, jnz as instructions in the loop 46 | for (int nopIdx = 0; nopIdx < this.Counts[i] - 2; nopIdx++) sb.AppendLine(" nop"); 47 | sb.AppendLine(" dec rcx"); 48 | sb.AppendLine(" jnz " + funcName); 49 | sb.AppendLine(" ret"); 50 | } 51 | } 52 | 53 | public override void GenerateArmAsm(StringBuilder sb) 54 | { 55 | for (int i = 0; i < Counts.Length; i++) 56 | { 57 | string funcName = this.Prefix + this.Counts[i]; 58 | sb.AppendLine(funcName + ":"); 59 | 60 | // count dec, jnz as instructions in the loop 61 | for (int nopIdx = 0; nopIdx < this.Counts[i] - 2; nopIdx++) sb.AppendLine(" nop"); 62 | sb.AppendLine(" sub x0, x0, 1"); 63 | sb.AppendLine(" cbnz x0, " + funcName); 64 | sb.AppendLine(" ret"); 65 | } 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /AsmGen/tests/FaddIntAddSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class FaddIntAddSchedTest : UarchTest 6 | { 7 | public FaddIntAddSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixfaddintaddsched"; 11 | this.Description = "Mixed FP/Integer Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // xmm0 is dependent on ptr chasing load 20 | string[] unrolledAdds = new string[4]; 21 | unrolledAdds[0] = " addss %xmm0, %xmm1"; 22 | unrolledAdds[1] = " add %edi, %r11d"; 23 | unrolledAdds[2] = " addss %xmm0, %xmm3"; 24 | unrolledAdds[3] = " add %edi, %r12d"; 25 | 26 | string[] unrolledAdds1 = new string[4]; 27 | unrolledAdds1[0] = " addss %xmm0, %xmm1"; 28 | unrolledAdds1[1] = " add %esi, %r14d"; 29 | unrolledAdds1[2] = " addss %xmm0, %xmm3"; 30 | unrolledAdds1[3] = " add %esi, %r15d"; 31 | 32 | string rdicvt = "cvtsi2ss %rdi, %xmm0"; 33 | string rsicvt = "cvtsi2ss %rsi, %xmm0"; 34 | 35 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, 36 | includePtrChasingLoads: false, postLoadInstrs1: rdicvt, postLoadInstrs2: rsicvt); 37 | } 38 | 39 | // todo.... 40 | public override void GenerateX86NasmAsm(StringBuilder sb) 41 | { 42 | string[] unrolledAdds = new string[4]; 43 | unrolledAdds[0] = " addss xmm1, xmm0"; 44 | unrolledAdds[1] = " add r11d, edi"; 45 | unrolledAdds[2] = " addss xmm3, xmm0"; 46 | unrolledAdds[3] = " add r12d, edi"; 47 | UarchTestHelpers.GenerateX86NasmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 48 | } 49 | 50 | public override void GenerateArmAsm(StringBuilder sb) 51 | { 52 | string[] unrolledAdds = new string[4]; 53 | unrolledAdds[0] = " fadd s17, s17, s16"; 54 | unrolledAdds[1] = " fadd s18, s18, s16"; 55 | unrolledAdds[2] = " fadd s19, s19, s16"; 56 | unrolledAdds[3] = " fadd s20, s20, s16"; 57 | UarchTestHelpers.GenerateArmAsmFpSchedTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /AsmGen/tests/Add256RfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Add256RfTest : UarchTest 6 | { 7 | public Add256RfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "add256rf"; 11 | this.Description = "256-bit Integer Add RF Capacity Test - 128-bit fadd on ARM"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string initInstrs = " vmovdqu (%r8), %ymm0\n" + 20 | " vmovdqa %ymm0, %ymm1\n" + 21 | " vmovdqa %ymm0, %ymm2\n" + 22 | " vmovdqa %ymm0, %ymm3\n" + 23 | " vmovdqa %ymm0, %ymm4\n"; 24 | 25 | string[] unrolledAdds = new string[4]; 26 | unrolledAdds[0] = " vpaddd %ymm0, %ymm1, %ymm1"; 27 | unrolledAdds[1] = " vpaddd %ymm0, %ymm2, %ymm2"; 28 | unrolledAdds[2] = " vpaddd %ymm0, %ymm3, %ymm3"; 29 | unrolledAdds[3] = " vpaddd %ymm0, %ymm4, %ymm3"; 30 | 31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, initInstrs: initInstrs); 32 | } 33 | 34 | public override void GenerateX86NasmAsm(StringBuilder sb) 35 | { 36 | string initInstrs = " vmovdqu ymm0, [r8]\n" + 37 | " vmovdqa ymm1, ymm0\n" + 38 | " vmovdqa ymm2, ymm0\n" + 39 | " vmovdqa ymm3, ymm0\n" + 40 | " vmovdqa ymm4, ymm0\n"; 41 | 42 | string[] unrolledAdds = new string[4]; 43 | unrolledAdds[0] = " vpaddd ymm1, ymm1, ymm0"; 44 | unrolledAdds[1] = " vpaddd ymm2, ymm2, ymm0"; 45 | unrolledAdds[2] = " vpaddd ymm3, ymm3, ymm0"; 46 | unrolledAdds[3] = " vpaddd ymm4, ymm4, ymm0"; 47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, initInstrs: initInstrs); 48 | } 49 | 50 | public override void GenerateArmAsm(StringBuilder sb) 51 | { 52 | string[] unrolledAdds = new string[4]; 53 | unrolledAdds[0] = " fadd v15.4s, v15.4s, v19.4s"; 54 | unrolledAdds[1] = " fadd v16.4s, v16.4s, v19.4s"; 55 | unrolledAdds[2] = " fadd v17.4s, v17.4s, v19.4s"; 56 | unrolledAdds[3] = " fadd v18.4s, v18.4s, v19.4s"; 57 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /AsmGen/tests/MixIntFpRf13Test.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixIntFp13RfTest : UarchTest 6 | { 7 | public MixIntFp13RfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixintfp13rf"; 11 | this.Description = "Mix of integer and FP register file, 1:3 ratio"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string initInstrs = " movss (%r8), %xmm1\n" + 20 | " movss 4(%r8), %xmm2\n" + 21 | " movss 8(%r8), %xmm3\n" + 22 | " movss 12(%r8), %xmm4\n" + 23 | " movss 16(%r8), %xmm5\n"; 24 | 25 | string[] instrs = new string[4]; 26 | instrs[0] = "add %r15, %r14"; 27 | instrs[1] = "addss %xmm1, %xmm2"; 28 | instrs[2] = "addss %xmm1, %xmm3"; 29 | instrs[3] = "addss %xmm1, %xmm4"; 30 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs); 31 | } 32 | 33 | public override void GenerateX86NasmAsm(StringBuilder sb) 34 | { 35 | string initInstrs = " movss xmm1, [r8]\n" + 36 | " movss xmm2, [r8 + 4]\n" + 37 | " movss xmm3, [r8 + 8]\n" + 38 | " movss xmm4, [r8 + 12]\n" + 39 | " movss xmm5, [r8 + 16]\n"; 40 | 41 | string[] instrs = new string[4]; 42 | instrs[0] = "add r14, r15"; 43 | instrs[1] = "addss xmm2, xmm1"; 44 | instrs[2] = "addss xmm3, xmm1"; 45 | instrs[3] = "addss xmm4, xmm1"; 46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs); 47 | } 48 | 49 | public override void GenerateArmAsm(StringBuilder sb) 50 | { 51 | string initInstrs = " ldr s17, [x2]\n" + 52 | " ldr s18, [x2, 4]\n" + 53 | " ldr s19, [x2, 8]\n" + 54 | " ldr s20, [x2, 12]\n" + 55 | " ldr s21, [x2, 16]\n"; 56 | 57 | string[] instrs = new string[4]; 58 | instrs[0] = " add x15, x15, x11"; 59 | instrs[1] = " fadd s18, s18, s17"; 60 | instrs[2] = " fadd s19, s19, s17"; 61 | instrs[3] = " fadd s20, s20, s17"; 62 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /AsmGen/tests/BtsSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class BtsSchedTest : UarchTest 6 | { 7 | public BtsSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "btssched"; 11 | this.Description = "Bit Test + Set CF Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " mov %rdi, %r15"; 20 | string postLoadInstr2 = " mov %rsi, %r15"; 21 | string[] instrs = new string[4]; 22 | instrs[0] = " bts %r14, %r15"; 23 | instrs[1] = " bts %r13, %r15"; 24 | instrs[2] = " bts %r12, %r15"; 25 | instrs[3] = " bts %r11, %r15"; 26 | 27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 28 | } 29 | 30 | public override void GenerateX86NasmAsm(StringBuilder sb) 31 | { 32 | // todo im tired 33 | string[] unrolledAdds = new string[4]; 34 | unrolledAdds[0] = " add r15, rdi"; 35 | unrolledAdds[1] = " add r14, rdi"; 36 | unrolledAdds[2] = " add r13, rdi"; 37 | unrolledAdds[3] = " add r12, rdi"; 38 | 39 | string[] unrolledAdds1 = new string[4]; 40 | unrolledAdds1[0] = " add r15, rsi"; 41 | unrolledAdds1[1] = " add r14, rsi"; 42 | unrolledAdds1[2] = " add r13, rsi"; 43 | unrolledAdds1[3] = " add r12, rsi"; 44 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 45 | } 46 | 47 | public override void GenerateArmAsm(StringBuilder sb) 48 | { 49 | string[] unrolledAdds = new string[4]; 50 | unrolledAdds[0] = " add x15, x15, x25"; 51 | unrolledAdds[1] = " add x14, x14, x25"; 52 | unrolledAdds[2] = " add x13, x13, x25"; 53 | unrolledAdds[3] = " add x12, x12, x25"; 54 | 55 | string[] unrolledAdds1 = new string[4]; 56 | unrolledAdds1[0] = " add x15, x15, x26"; 57 | unrolledAdds1[1] = " add x14, x14, x26"; 58 | unrolledAdds1[2] = " add x13, x13, x26"; 59 | unrolledAdds1[3] = " add x12, x12, x26"; 60 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /AsmGen/tests/PdepSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class PdepSchedTest : UarchTest 6 | { 7 | public PdepSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "pdepsched"; 11 | this.Description = "PDEP Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] instrs1 = new string[4]; 20 | instrs1[0] = " pdep %rdi, %r14, %r15"; 21 | instrs1[1] = " pdep %rdi, %r13, %r15"; 22 | instrs1[2] = " pdep %rdi, %r12, %r15"; 23 | instrs1[3] = " pdep %rdi, %r11, %r15"; 24 | 25 | string[] instrs2 = new string[4]; 26 | instrs2[0] = " pdep %rsi, %r14, %r15"; 27 | instrs2[1] = " pdep %rsi, %r13, %r15"; 28 | instrs2[2] = " pdep %rsi, %r12, %r15"; 29 | instrs2[3] = " pdep %rsi, %r11, %r15"; 30 | 31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false); 32 | } 33 | 34 | public override void GenerateX86NasmAsm(StringBuilder sb) 35 | { 36 | string[] instrs = new string[4]; 37 | instrs[0] = " pdep r15, rdi, r14"; 38 | instrs[1] = " pdep r15, rdi, r13"; 39 | instrs[2] = " pdep r15, rdi, r12"; 40 | instrs[3] = " pdep r15, rdi, r11"; 41 | 42 | string[] instrs1 = new string[4]; 43 | instrs1[0] = " pdep r15, rsi, r14"; 44 | instrs1[1] = " pdep r15, rsi, r13"; 45 | instrs1[2] = " pdep r15, rsi, r12"; 46 | instrs1[3] = " pdep r15, rsi, r11"; 47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false); 48 | } 49 | 50 | public override void GenerateArmAsm(StringBuilder sb) 51 | { 52 | string[] unrolledAdds = new string[4]; 53 | unrolledAdds[0] = " add x15, x15, x25"; 54 | unrolledAdds[1] = " add x14, x14, x25"; 55 | unrolledAdds[2] = " add x13, x13, x25"; 56 | unrolledAdds[3] = " add x12, x12, x25"; 57 | 58 | string[] unrolledAdds1 = new string[4]; 59 | unrolledAdds1[0] = " add x15, x15, x26"; 60 | unrolledAdds1[1] = " add x14, x14, x26"; 61 | unrolledAdds1[2] = " add x13, x13, x26"; 62 | unrolledAdds1[3] = " add x12, x12, x26"; 63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /AsmGen/tests/MixPdepLeaSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class PdepLeaSchedTest : UarchTest 6 | { 7 | public PdepLeaSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixpdepleasched"; 11 | this.Description = "Mixed PDEP/LEA Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] instrs1 = new string[4]; 20 | instrs1[0] = " pdep %rdi, %r14, %r15"; 21 | instrs1[1] = " lea (%rdx,%rdi,8), %r13"; 22 | instrs1[2] = " pdep %rdi, %r12, %r15"; 23 | instrs1[3] = " lea (%rdx,%rdi,8), %r11"; 24 | 25 | string[] instrs2 = new string[4]; 26 | instrs2[0] = " pdep %rsi, %r14, %r15"; 27 | instrs2[1] = " lea (%rdx,%rsi,8), %r13"; 28 | instrs2[2] = " pdep %rsi, %r12, %r15"; 29 | instrs2[3] = " lea (%rdx,%rsi,8), %r11"; 30 | 31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false); 32 | } 33 | 34 | public override void GenerateX86NasmAsm(StringBuilder sb) 35 | { 36 | // todo 37 | string[] instrs = new string[4]; 38 | instrs[0] = " pdep r15, rdi, r14"; 39 | instrs[1] = " pdep r15, rdi, r13"; 40 | instrs[2] = " pdep r15, rdi, r12"; 41 | instrs[3] = " pdep r15, rdi, r11"; 42 | 43 | string[] instrs1 = new string[4]; 44 | instrs1[0] = " pdep r15, rsi, r14"; 45 | instrs1[1] = " pdep r15, rsi, r13"; 46 | instrs1[2] = " pdep r15, rsi, r12"; 47 | instrs1[3] = " pdep r15, rsi, r11"; 48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs1, false); 49 | } 50 | 51 | public override void GenerateArmAsm(StringBuilder sb) 52 | { 53 | string[] unrolledAdds = new string[4]; 54 | unrolledAdds[0] = " add x15, x15, x25"; 55 | unrolledAdds[1] = " add x14, x14, x25"; 56 | unrolledAdds[2] = " add x13, x13, x25"; 57 | unrolledAdds[3] = " add x12, x12, x25"; 58 | 59 | string[] unrolledAdds1 = new string[4]; 60 | unrolledAdds1[0] = " add x15, x15, x26"; 61 | unrolledAdds1[1] = " add x14, x14, x26"; 62 | unrolledAdds1[2] = " add x13, x13, x26"; 63 | unrolledAdds1[3] = " add x12, x12, x26"; 64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /AsmGen/tests/LeaSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class LeaSchedTest : UarchTest 6 | { 7 | public LeaSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "leasched"; 11 | this.Description = "lea [r+r*8] Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] instrs1 = new string[4]; 20 | instrs1[0] = " lea (%rdx,%rdi,8), %r15"; 21 | instrs1[1] = " lea (%rdx,%rdi,8), %r14"; 22 | instrs1[2] = " lea (%rdx,%rdi,8), %r13"; 23 | instrs1[3] = " lea (%rdx,%rdi,8), %r12"; 24 | 25 | string[] instrs2 = new string[4]; 26 | instrs2[0] = " lea (%rdx,%rsi,8), %r15"; 27 | instrs2[1] = " lea (%rdx,%rsi,8), %r14"; 28 | instrs2[2] = " lea (%rdx,%rsi,8), %r13"; 29 | instrs2[3] = " lea (%rdx,%rsi,8), %r12"; 30 | 31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs1, instrs1, false); 32 | } 33 | 34 | public override void GenerateX86NasmAsm(StringBuilder sb) 35 | { 36 | // todo im tired 37 | string[] unrolledAdds = new string[4]; 38 | unrolledAdds[0] = " add r15, rdi"; 39 | unrolledAdds[1] = " add r14, rdi"; 40 | unrolledAdds[2] = " add r13, rdi"; 41 | unrolledAdds[3] = " add r12, rdi"; 42 | 43 | string[] unrolledAdds1 = new string[4]; 44 | unrolledAdds1[0] = " add r15, rsi"; 45 | unrolledAdds1[1] = " add r14, rsi"; 46 | unrolledAdds1[2] = " add r13, rsi"; 47 | unrolledAdds1[3] = " add r12, rsi"; 48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 49 | } 50 | 51 | public override void GenerateArmAsm(StringBuilder sb) 52 | { 53 | string[] unrolledAdds = new string[4]; 54 | unrolledAdds[0] = " add x15, x15, x25"; 55 | unrolledAdds[1] = " add x14, x14, x25"; 56 | unrolledAdds[2] = " add x13, x13, x25"; 57 | unrolledAdds[3] = " add x12, x12, x25"; 58 | 59 | string[] unrolledAdds1 = new string[4]; 60 | unrolledAdds1[0] = " add x15, x15, x26"; 61 | unrolledAdds1[1] = " add x14, x14, x26"; 62 | unrolledAdds1[2] = " add x13, x13, x26"; 63 | unrolledAdds1[3] = " add x12, x12, x26"; 64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /AsmGen/tests/FpRfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class FpRfTest : UarchTest 6 | { 7 | public FpRfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "fprf"; 11 | this.Description = "FP (64-bit scalar) RF Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string initInstrs = " movss (%r8), %xmm1\n" + 20 | " movss 4(%r8), %xmm2\n" + 21 | " movss 8(%r8), %xmm3\n" + 22 | " movss 12(%r8), %xmm4\n" + 23 | " movss 16(%r8), %xmm5\n"; 24 | 25 | string[] unrolledAdds = new string[4]; 26 | unrolledAdds[0] = " addss %xmm1, %xmm2"; 27 | unrolledAdds[1] = " addss %xmm1, %xmm3"; 28 | unrolledAdds[2] = " addss %xmm1, %xmm4"; 29 | unrolledAdds[3] = " addss %xmm1, %xmm5"; 30 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 31 | } 32 | 33 | public override void GenerateX86NasmAsm(StringBuilder sb) 34 | { 35 | string initInstrs = " movss xmm1, [r8]\n" + 36 | " movss xmm2, [r8 + 4]\n" + 37 | " movss xmm3, [r8 + 8]\n" + 38 | " movss xmm4, [r8 + 12]\n" + 39 | " movss xmm5, [r8 + 16]\n"; 40 | 41 | string[] unrolledAdds = new string[4]; 42 | unrolledAdds[0] = " addss xmm2, xmm1"; 43 | unrolledAdds[1] = " addss xmm3, xmm1"; 44 | unrolledAdds[2] = " addss xmm4, xmm1"; 45 | unrolledAdds[3] = " addss xmm5, xmm1"; 46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 47 | } 48 | 49 | public override void GenerateArmAsm(StringBuilder sb) 50 | { 51 | string initInstrs = " ldr s17, [x2]\n" + 52 | " ldr s18, [x2, 4]\n" + 53 | " ldr s19, [x2, 8]\n" + 54 | " ldr s20, [x2, 12]\n" + 55 | " ldr s21, [x2, 16]\n"; 56 | 57 | string[] unrolledAdds = new string[4]; 58 | unrolledAdds[0] = " fadd s18, s18, s17"; 59 | unrolledAdds[1] = " fadd s19, s19, s17"; 60 | unrolledAdds[2] = " fadd s20, s20, s17"; 61 | unrolledAdds[3] = " fadd s21, s21, s17"; 62 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /MemoryLatency/MemoryLatency_i686.s: -------------------------------------------------------------------------------- 1 | .text 2 | 3 | .global @latencytest@8 4 | .global @preplatencyarr@8 5 | .global @stlftest@8 6 | .global @matchedstlftest@8 7 | .global latencytest 8 | .global preplatencyarr 9 | .global stlftest 10 | .global matchedstlftest 11 | 12 | /* fastcall specified in source file, so 13 | ecx = ptr to arr 14 | edx = arr len 15 | convert values in array from array indexes to pointers 16 | there has to be a way to make C do this but high level 17 | programming languages suck and make simple things harder than they should be 18 | */ 19 | preplatencyarr: 20 | @preplatencyarr@8: 21 | push %eax 22 | push %esi 23 | xor %esi, %esi /* esi = array index */ 24 | preplatencyarr_loop: 25 | mov (%ecx,%esi,4), %eax /* load target array index into eax */ 26 | lea (%ecx,%eax,4), %eax /* calculate target address -> eax */ 27 | mov %eax, (%ecx,%esi,4) /* replace array index with target address */ 28 | inc %esi 29 | cmp %esi, %edx 30 | jne preplatencyarr_loop 31 | pop %esi 32 | pop %eax 33 | ret 34 | 35 | /* ecx = iterations 36 | edx = ptr to arr 37 | do pointer chasing for specified iteration count 38 | */ 39 | latencytest: 40 | @latencytest@8: 41 | push %esi 42 | mov (%edx), %esi 43 | xor %eax, %eax 44 | latencytest_loop: 45 | mov (%esi), %esi 46 | add %esi, %eax 47 | dec %ecx 48 | jnz latencytest_loop 49 | pop %esi 50 | ret 51 | 52 | /* ecx = iterations 53 | edx = ptr to array. first two 32-bit ints in array are store and load offsets respectively 54 | mismatch load and store sizes by using 16-bit loads and 32-bit stores 55 | */ 56 | stlftest: 57 | @stlftest@8: 58 | push %esi 59 | push %edi 60 | mov (%edx), %eax /* just get some value into rax (store value */ 61 | mov (%edx), %esi 62 | mov 4(%edx), %edi 63 | add %edx, %esi /* esi = store ptr */ 64 | add %edx, %edi /* edi = load ptr */ 65 | stlftest_loop: 66 | mov %eax, (%esi) /* 32-bit store */ 67 | mov (%edi), %ax /* 16-bit load that possibly gets forwarded result */ 68 | mov %eax, (%esi) 69 | mov (%edi), %ax 70 | mov %eax, (%esi) 71 | mov (%edi), %ax 72 | mov %eax, (%esi) 73 | mov (%edi), %ax 74 | mov %eax, (%esi) 75 | mov (%edi), %ax 76 | sub $5, %ecx 77 | jg stlftest_loop 78 | pop %edi 79 | pop %esi 80 | ret 81 | 82 | matchedstlftest: 83 | @matchedstlftest@8: 84 | push %esi 85 | push %edi 86 | mov (%edx), %eax /* just get some value into rax (store value */ 87 | mov (%edx), %esi 88 | mov 4(%edx), %edi 89 | add %edx, %esi /* esi = store ptr */ 90 | add %edx, %edi /* edi = load ptr */ 91 | matchedstlftest_loop: 92 | mov %eax, (%esi) 93 | mov (%edi), %eax 94 | mov %eax, (%esi) 95 | mov (%edi), %eax 96 | mov %eax, (%esi) 97 | mov (%edi), %eax 98 | mov %eax, (%esi) 99 | mov (%edi), %eax 100 | mov %eax, (%esi) 101 | mov (%edi), %eax 102 | sub $5, %ecx 103 | jg matchedstlftest_loop 104 | pop %edi 105 | pop %esi 106 | ret 107 | -------------------------------------------------------------------------------- /AsmGen/tests/AddSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class AddSchedTest : UarchTest 6 | { 7 | public AddSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "addsched"; 11 | this.Description = "Integer (add) Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledAdds = new string[4]; 20 | unrolledAdds[0] = " add %rdi, %r15"; 21 | unrolledAdds[1] = " add %rdi, %r14"; 22 | unrolledAdds[2] = " add %rdi, %r13"; 23 | unrolledAdds[3] = " add %rdi, %r12"; 24 | 25 | string[] unrolledAdds1 = new string[4]; 26 | unrolledAdds1[0] = " add %rsi, %r15"; 27 | unrolledAdds1[1] = " add %rsi, %r14"; 28 | unrolledAdds1[2] = " add %rsi, %r13"; 29 | unrolledAdds1[3] = " add %rsi, %r12"; 30 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 31 | } 32 | 33 | public override void GenerateX86NasmAsm(StringBuilder sb) 34 | { 35 | string[] unrolledAdds = new string[4]; 36 | unrolledAdds[0] = " add r15, rdi"; 37 | unrolledAdds[1] = " add r14, rdi"; 38 | unrolledAdds[2] = " add r13, rdi"; 39 | unrolledAdds[3] = " add r12, rdi"; 40 | 41 | string[] unrolledAdds1 = new string[4]; 42 | unrolledAdds1[0] = " add r15, rsi"; 43 | unrolledAdds1[1] = " add r14, rsi"; 44 | unrolledAdds1[2] = " add r13, rsi"; 45 | unrolledAdds1[3] = " add r12, rsi"; 46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 47 | } 48 | 49 | public override void GenerateArmAsm(StringBuilder sb) 50 | { 51 | string[] unrolledAdds = new string[4]; 52 | unrolledAdds[0] = " add x15, x15, x25"; 53 | unrolledAdds[1] = " add x14, x14, x25"; 54 | unrolledAdds[2] = " add x13, x13, x25"; 55 | unrolledAdds[3] = " add x12, x12, x25"; 56 | 57 | string[] unrolledAdds1 = new string[4]; 58 | unrolledAdds1[0] = " add x15, x15, x26"; 59 | unrolledAdds1[1] = " add x14, x14, x26"; 60 | unrolledAdds1[2] = " add x13, x13, x26"; 61 | unrolledAdds1[3] = " add x12, x12, x26"; 62 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /AsmGen/tests/MulSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MulSchedTest : UarchTest 6 | { 7 | public MulSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mulsched"; 11 | this.Description = "Integer (64-bit mul) Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledMuls = new string[4]; 20 | unrolledMuls[0] = " imul %rdi, %r15"; 21 | unrolledMuls[1] = " imul %rdi, %r14"; 22 | unrolledMuls[2] = " imul %rdi, %r13"; 23 | unrolledMuls[3] = " imul %rdi, %r12"; 24 | 25 | string[] unrolledMuls1 = new string[4]; 26 | unrolledMuls1[0] = " imul %rsi, %r15"; 27 | unrolledMuls1[1] = " imul %rsi, %r14"; 28 | unrolledMuls1[2] = " imul %rsi, %r13"; 29 | unrolledMuls1[3] = " imul %rsi, %r12"; 30 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false); 31 | } 32 | 33 | public override void GenerateX86NasmAsm(StringBuilder sb) 34 | { 35 | string[] unrolledMuls = new string[4]; 36 | unrolledMuls[0] = " imul r15, rdi"; 37 | unrolledMuls[1] = " imul r14, rdi"; 38 | unrolledMuls[2] = " imul r13, rdi"; 39 | unrolledMuls[3] = " imul r12, rdi"; 40 | 41 | string[] unrolledMuls1 = new string[4]; 42 | unrolledMuls1[0] = " imul r15, rsi"; 43 | unrolledMuls1[1] = " imul r14, rsi"; 44 | unrolledMuls1[2] = " imul r13, rsi"; 45 | unrolledMuls1[3] = " imul r12, rsi"; 46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls, false); 47 | } 48 | 49 | public override void GenerateArmAsm(StringBuilder sb) 50 | { 51 | string[] unrolledAdds = new string[4]; 52 | unrolledAdds[0] = " mul x10, x10, x25"; 53 | unrolledAdds[1] = " mul x14, x14, x25"; 54 | unrolledAdds[2] = " mul x13, x13, x25"; 55 | unrolledAdds[3] = " mul x12, x12, x25"; 56 | 57 | string[] unrolledAdds1 = new string[4]; 58 | unrolledAdds1[0] = " mul x10, x10, x26"; 59 | unrolledAdds1[1] = " mul x14, x14, x26"; 60 | unrolledAdds1[2] = " mul x13, x13, x26"; 61 | unrolledAdds1[3] = " mul x12, x12, x26"; 62 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds1, false); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /MemoryBandwidth/README.md: -------------------------------------------------------------------------------- 1 | # Memory Bandwidth Benchmark 2 | This is a C and assembly project that tests memory bandwidth. There's a version in this directory for Linux that uses POSIX threads for multithreading. There's a Windows version in the MemoryBandwidth subdirectory that uses Windows threading APIs. The Windows version requires Visual Studio and nasm in the path to compile. 3 | 4 | To compile the linux version, do `make amd64` or `make aarch64`, depending on the target architecture 5 | 6 | # Example usage 7 | 8 | Testing single threaded bandwidth: `MemoryBandwidth.exe` or `./membw_amd64` or `./membw_aarch64` 9 | 10 | # General parameters 11 | `-threads` - How many threads to spawn. If you spawn more than one (i.e. with `-threads 4`) you might want to specify `-private` or `-shared` 12 | 13 | `-private` - A separate test array is allocated for each thread. Each thread will access its own block of data, with the total amount of test data equal to the test size. For example, with a test size of 16 KB and 4 threads, each thread is given a 4 KB array. With this mode, test results will reflect combined cache capacity. If you have four cores, each with a private 32 KB L1D, expect to see L1D bandwidth up to 4 * 32 KB = 128 KB. This is usually the best mode to use because memory bandwidth results won't be inflated by request combining. 14 | 15 | `-shared` - A single test array is accessed by all threads. For example, with 4 threads and a 16 KB test size, a single 16 KB array will be allocated and all four threads will hit it. Useful for seeing small shared caches, where the sum of private cache capacity is very close to (or exceeds) shared cache capacity. This mode often gives erroneously high memory bandwidth results because requests to the same cachelines from multiple cores may be combined. Of course using this mode with anything other than read-only access patterns is....stupid. 16 | 17 | `-method` - What test to run. Methods will vary depending on what platform you're targeting and what version (Windows or Linux) you're using. There's some naming inconsistency here that I have to clean up. Good luck. If you don't specify it, it should pick the best read-only test function to use on your system. But a few options: 18 | - `asm` (Linux only) - Uses a default read-only test function with a handwritten, unrolled assembly loop. On x86, AVX is used. NEON is used on aarch64. 19 | - `avx512` (Linux, x86-64 only) - Uses AVX-512 instructions 20 | - `write` (Linux) - Tests write bandwidth instead of read bandwidth. Will use AVX-512 if available 21 | - `copy` (Linux) - Copies one half of the array to the other 22 | - `scalar` - Plain C code that should work on any system. Only option available if you're on a weird (not x86 or aarch64) platform. Unsuitable for testing cache bandwidth because compilers are really really bad at autovectorization 23 | - `instr8`, `instr4` - Tests instruction-side bandwidth (as opposed to data side) by filling an array with NOPs and a return at the end, marking it executable, and calling it as if it were a function. On x86-64, `instr8` uses 8 byte NOPs, while `instr4` uses 4 byte NOPs. 24 | -------------------------------------------------------------------------------- /AsmGen/tests/CvtSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class CvtSchedTest : UarchTest 6 | { 7 | public CvtSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "cvtsched"; 11 | this.Description = "I2F (cvtsi2ss) Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | } 15 | 16 | public override void GenerateX86GccAsm(StringBuilder sb) 17 | { 18 | string[] unrolledInstrs = new string[4]; 19 | unrolledInstrs[0] = " cvtsi2ss %rdi, %xmm1"; 20 | unrolledInstrs[1] = " cvtsi2ss %rdi, %xmm2"; 21 | unrolledInstrs[2] = " cvtsi2ss %rdi, %xmm3"; 22 | unrolledInstrs[3] = " cvtsi2ss %rdi, %xmm4"; 23 | 24 | string[] unrolledInstrs1 = new string[4]; 25 | unrolledInstrs1[0] = " cvtsi2ss %rsi, %xmm1"; 26 | unrolledInstrs1[1] = " cvtsi2ss %rsi, %xmm2"; 27 | unrolledInstrs1[2] = " cvtsi2ss %rsi, %xmm3"; 28 | unrolledInstrs1[3] = " cvtsi2ss %rsi, %xmm4"; 29 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledInstrs, unrolledInstrs, false); 30 | } 31 | 32 | public override void GenerateX86NasmAsm(StringBuilder sb) 33 | { 34 | string[] unrolledInstrs = new string[4]; 35 | unrolledInstrs[0] = " cvtsi2ss xmm1, rdi"; 36 | unrolledInstrs[1] = " cvtsi2ss xmm2, rdi"; 37 | unrolledInstrs[2] = " cvtsi2ss xmm3, rdi"; 38 | unrolledInstrs[3] = " cvtsi2ss xmm4, rdi"; 39 | 40 | string[] unrolledInstrs1 = new string[4]; 41 | unrolledInstrs1[0] = " cvtsi2ss xmm1, rsi"; 42 | unrolledInstrs1[1] = " cvtsi2ss xmm2, rsi"; 43 | unrolledInstrs1[2] = " cvtsi2ss xmm3, rsi"; 44 | unrolledInstrs1[3] = " cvtsi2ss xmm4, rsi"; 45 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledInstrs, unrolledInstrs1, false); 46 | } 47 | 48 | public override void GenerateArmAsm(StringBuilder sb) 49 | { 50 | string[] unrolledInstrs = new string[4]; 51 | unrolledInstrs[0] = " scvtf s0, w25"; 52 | unrolledInstrs[1] = " scvtf s0, w25"; 53 | unrolledInstrs[2] = " scvtf s0, w25"; 54 | unrolledInstrs[3] = " scvtf s0, w25"; 55 | 56 | string[] unrolledInstrs1 = new string[4]; 57 | unrolledInstrs1[0] = " scvtf s0, w26"; 58 | unrolledInstrs1[1] = " scvtf s0, w26"; 59 | unrolledInstrs1[2] = " scvtf s0, w26"; 60 | unrolledInstrs1[3] = " scvtf s0, w26"; 61 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledInstrs, unrolledInstrs1, false); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /AsmGen/tests/VecRfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class VecRfTest : UarchTest 6 | { 7 | public VecRfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "vec128rf"; 11 | this.Description = "Vector (128-bit packed int) RF Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // it's ok, the ptr chasing arr should be way bigger than this 20 | string initInstrs = " movdqu (%rdx), %xmm1\n" + 21 | " movdqu 16(%rdx), %xmm2\n" + 22 | " movdqu 32(%rdx), %xmm3\n" + 23 | " movdqu 48(%rdx), %xmm4\n" + 24 | " movdqu 64(%rdx), %xmm5\n"; 25 | 26 | string[] unrolledAdds = new string[4]; 27 | unrolledAdds[0] = " paddq %xmm1, %xmm2"; 28 | unrolledAdds[1] = " paddq %xmm1, %xmm3"; 29 | unrolledAdds[2] = " paddq %xmm1, %xmm4"; 30 | unrolledAdds[3] = " paddq %xmm1, %xmm5"; 31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 32 | } 33 | 34 | public override void GenerateX86NasmAsm(StringBuilder sb) 35 | { 36 | string initInstrs = " movdqu xmm1, [rdx]\n" + 37 | " movdqu xmm2, [rdx + 16]\n" + 38 | " movdqu xmm3, [rdx + 32]\n" + 39 | " movdqu xmm4, [rdx + 48]\n" + 40 | " movdqu xmm5, [rdx + 64]\n"; 41 | 42 | string[] unrolledAdds = new string[4]; 43 | unrolledAdds[0] = " paddq xmm2, xmm1"; 44 | unrolledAdds[1] = " paddq xmm3, xmm1"; 45 | unrolledAdds[2] = " paddq xmm4, xmm1"; 46 | unrolledAdds[3] = " paddq xmm5, xmm1"; 47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 48 | } 49 | 50 | public override void GenerateArmAsm(StringBuilder sb) 51 | { 52 | string initInstrs = " ldr q0, [x1]\n" + 53 | " ldr q1, [x1, #0x10]\n" + 54 | " ldr q2, [x1, #0x20]\n" + 55 | " ldr q3, [x1, #0x30]\n" + 56 | " ldr q4, [x1, #0x40]\n"; 57 | 58 | string[] unrolledAdds = new string[4]; 59 | unrolledAdds[0] = " add v1.4s, v1.4s, v0.4s"; 60 | unrolledAdds[1] = " add v2.4s, v2.4s, v0.4s"; 61 | unrolledAdds[2] = " add v3.4s, v3.4s, v0.4s"; 62 | unrolledAdds[3] = " add v4.4s, v4.4s, v0.4s"; 63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /AsmGen/tests/Add512SchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Add512SchedTest : UarchTest 6 | { 7 | public Add512SchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "add512sched"; 11 | this.Description = "512-bit Integer Add Scheduler Capacity Test (AVX-512 only)"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " movq %rdi, %xmm0\n vpbroadcastd %xmm0, %zmm0\n"; 20 | string postLoadInstr2 = " movq %rsi, %xmm0\n vpbroadcastd %xmm0, %zmm0\n"; 21 | // ymm0 is dependent on ptr chasing load 22 | string[] unrolledAdds = new string[4]; 23 | unrolledAdds[0] = " vpaddd %zmm0, %zmm1, %zmm1"; 24 | unrolledAdds[1] = " vpaddd %zmm0, %zmm2, %zmm2"; 25 | unrolledAdds[2] = " vpaddd %zmm0, %zmm3, %zmm3"; 26 | unrolledAdds[3] = " vpaddd %zmm0, %zmm4, %zmm3"; 27 | 28 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 29 | } 30 | 31 | public override void GenerateX86NasmAsm(StringBuilder sb) 32 | { 33 | string postLoadInstr1 = " movq xmm0, rdi\n vpbroadcastd ymm0, xmm0\n"; 34 | string postLoadInstr2 = " movq xmm0, rsi\n vpbroadcastd ymm0, xmm0\n"; 35 | 36 | string[] unrolledAdds = new string[4]; 37 | unrolledAdds[0] = " vpaddd ymm1, ymm1, ymm0"; 38 | unrolledAdds[1] = " vpaddd ymm2, ymm2, ymm0"; 39 | unrolledAdds[2] = " vpaddd ymm3, ymm3, ymm0"; 40 | unrolledAdds[3] = " vpaddd ymm4, ymm4, ymm0"; 41 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 42 | } 43 | 44 | public override void GenerateArmAsm(StringBuilder sb) 45 | { 46 | string initInstrs = " ldr q18, [x1]\n ldr q18, [x1]\n ldr q19, [x2]\n ldr q20, [x2]\n ldr q21, [x2]\n"; 47 | string postLoadInstr1 = " mov v17.s[0], w25\n"; 48 | string postLoadInstr2 = " mov v17.s[0], w26\n"; 49 | string[] unrolledAdds = new string[4]; 50 | unrolledAdds[0] = " add v18.4s, v18.4s, v17.4s"; 51 | unrolledAdds[1] = " add v19.4s, v19.4s, v17.4s"; 52 | unrolledAdds[2] = " add v20.4s, v20.4s, v17.4s"; 53 | unrolledAdds[3] = " add v21.4s, v21.4s, v17.4s"; 54 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs, postLoadInstr1, postLoadInstr2); 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /AsmGen/tests/MxcsrFeTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MxcsrFeTest : UarchTest 6 | { 7 | public MxcsrFeTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mxcsrfe"; 11 | this.Description = "Abuse lack of MXCSR rename to measure frontend queue capacity"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | for (int i = 0; i < this.Counts.Length; i++) 20 | { 21 | string funcName = this.Prefix + this.Counts[i]; 22 | sb.AppendLine("\n" + funcName + ":"); 23 | sb.AppendLine(" mov $0x1f80, %r15"); 24 | sb.AppendLine(" mov %r15, (%rsi)"); 25 | sb.AppendLine(" mov $0x9fc0, %r15"); 26 | sb.AppendLine(" mov %r15, 8(%rsi)"); 27 | sb.AppendLine(funcName + "start:"); 28 | for (int nopIdx = 0; nopIdx < this.Counts[i]; nopIdx++) 29 | { 30 | sb.AppendLine(" nop"); 31 | } 32 | sb.AppendLine(" dec %rdi"); // iteration count 33 | sb.AppendLine(" jne " + funcName + "start"); 34 | sb.AppendLine(" ret"); 35 | } 36 | } 37 | 38 | public override void GenerateX86NasmAsm(StringBuilder sb) 39 | { 40 | // todo 41 | string[] setMxcsrInstrs = new string[2]; 42 | setMxcsrInstrs[0] = " mov r15, 0x1f80\n mov [r8], r15\n ldmxcsr [r8]\n addss xmm0, xmm1"; // default 43 | setMxcsrInstrs[1] = " mov r15, 0x9fc0\n mov [r8], r15\n ldmxcsr [r8]\n addss xmm0, xmm1"; // set denormals are zero, flush to zero 44 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, setMxcsrInstrs, setMxcsrInstrs, false); 45 | } 46 | 47 | // todo 48 | public override void GenerateArmAsm(StringBuilder sb) 49 | { 50 | // read FPCR into x15, set x14 = flush denormals to zero enabled, x15 = flush denormals to zero disabled 51 | // x12 = mask with all bits set except bit 24 (flush to zero) - bitwise AND to unset bit 24 52 | // x13 = just bit 24 set with all other bits zero - bitwise OR to set bit 24 53 | string initInstrs = " mrs x15, fpcr\n mov x13, 1\n lsl x13, x13, 24\n neg x12, x13\n orr x14, x15, x13\n and x15, x15, x12"; 54 | string[] setFpcrInstrs = new string[2]; 55 | setFpcrInstrs[0] = " msr fpcr, x15\n fadd s2, s2, s3\n"; 56 | setFpcrInstrs[1] = " msr fpcr, x14\n fadd s4, s4, s5\n"; 57 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, setFpcrInstrs, setFpcrInstrs, false, initInstrs: initInstrs); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /GpuMemLatency/latency_test.c: -------------------------------------------------------------------------------- 1 | #include "opencltest.h" 2 | 3 | float latency_test(cl_context context, 4 | cl_command_queue command_queue, 5 | cl_kernel kernel, 6 | uint32_t list_size, 7 | uint32_t chase_iterations, 8 | short sattolo) 9 | { 10 | size_t global_item_size = 1, local_item_size = 1; 11 | cl_int ret; 12 | float latency; 13 | int64_t time_diff_ms; 14 | uint32_t result; 15 | uint32_t stride = 1211; 16 | uint32_t element_count = list_size / CACHELINE_SIZE; 17 | uint32_t increment = CACHELINE_SIZE / sizeof(uint32_t); 18 | uint32_t* A = (uint32_t*)malloc(sizeof(uint32_t) * list_size); 19 | if (sattolo) { 20 | FillPatternArr((uint32_t*)A, list_size, CACHELINE_SIZE); 21 | } 22 | else { 23 | for (int i = 0; i < list_size; i++) 24 | { 25 | A[i] = (i + stride) % list_size; 26 | } 27 | } 28 | 29 | // copy array to device 30 | cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, list_size * sizeof(uint32_t), NULL, &ret); 31 | clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, list_size * sizeof(uint32_t), A, 0, NULL, NULL); 32 | 33 | cl_mem result_obj = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(uint32_t), NULL, &ret); 34 | clEnqueueWriteBuffer(command_queue, result_obj, CL_TRUE, 0, sizeof(uint32_t), &result, 0, NULL, NULL); 35 | clFinish(command_queue); 36 | 37 | // Set kernel arguments 38 | ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&a_mem_obj); 39 | if (ret != CL_SUCCESS) 40 | { 41 | fprintf(stderr, "Failed to set list as kernel arg. clSetKernelArg returned %d\n", ret); 42 | latency = 0; 43 | goto cleanup; 44 | } 45 | 46 | ret = clSetKernelArg(kernel, 1, sizeof(cl_int), (void*)&chase_iterations); 47 | ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&result_obj); 48 | 49 | start_timing(); 50 | // Execute the OpenCL kernel. launch a single thread 51 | ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL); 52 | if (ret != CL_SUCCESS) 53 | { 54 | fprintf(stderr, "Failed to submit kernel to command queue. clEnqueueNDRangeKernel returned %d\n", ret); 55 | latency = 0; 56 | goto cleanup; 57 | } 58 | 59 | ret = clFinish(command_queue); // returns success even when TDR happens? 60 | if (ret != CL_SUCCESS) 61 | { 62 | printf("Failed to finish command queue. clFinish returned %d\n", ret); 63 | latency = 0; 64 | goto cleanup; 65 | } 66 | 67 | time_diff_ms = end_timing(); 68 | latency = 1e6 * (float)time_diff_ms / (float)chase_iterations; 69 | 70 | ret = clEnqueueReadBuffer(command_queue, result_obj, CL_TRUE, 0, sizeof(uint32_t), &result, 0, NULL, NULL); 71 | clFinish(command_queue); 72 | 73 | //fprintf(stderr, "Finished reading result. Sum: %d\n", result[0]); 74 | 75 | cleanup: 76 | clFlush(command_queue); 77 | clFinish(command_queue); 78 | clReleaseMemObject(a_mem_obj); 79 | clReleaseMemObject(result_obj); 80 | free(A); 81 | return latency; 82 | } 83 | -------------------------------------------------------------------------------- /AsmGen/tests/Add256SchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Add256SchedTest : UarchTest 6 | { 7 | public Add256SchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "add256sched"; 11 | this.Description = "256-bit Integer Add Scheduler Capacity Test (128-bit on ARM)"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " movq %rdi, %xmm0\n vpbroadcastd %xmm0, %ymm0\n"; 20 | string postLoadInstr2 = " movq %rsi, %xmm0\n vpbroadcastd %xmm0, %ymm0\n"; 21 | // ymm0 is dependent on ptr chasing load 22 | string[] unrolledAdds = new string[4]; 23 | unrolledAdds[0] = " vpaddd %ymm0, %ymm1, %ymm1"; 24 | unrolledAdds[1] = " vpaddd %ymm0, %ymm2, %ymm2"; 25 | unrolledAdds[2] = " vpaddd %ymm0, %ymm3, %ymm3"; 26 | unrolledAdds[3] = " vpaddd %ymm0, %ymm4, %ymm3"; 27 | 28 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 29 | } 30 | 31 | public override void GenerateX86NasmAsm(StringBuilder sb) 32 | { 33 | string postLoadInstr1 = " movq xmm0, rdi\n vpbroadcastd ymm0, xmm0\n"; 34 | string postLoadInstr2 = " movq xmm0, rsi\n vpbroadcastd ymm0, xmm0\n"; 35 | 36 | string[] unrolledAdds = new string[4]; 37 | unrolledAdds[0] = " vpaddd ymm1, ymm1, ymm0"; 38 | unrolledAdds[1] = " vpaddd ymm2, ymm2, ymm0"; 39 | unrolledAdds[2] = " vpaddd ymm3, ymm3, ymm0"; 40 | unrolledAdds[3] = " vpaddd ymm4, ymm4, ymm0"; 41 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 42 | } 43 | 44 | public override void GenerateArmAsm(StringBuilder sb) 45 | { 46 | string initInstrs = " ldr q18, [x1]\n ldr q18, [x1]\n ldr q19, [x2]\n ldr q20, [x2]\n ldr q21, [x2]\n"; 47 | string postLoadInstr1 = " mov v17.s[0], w25\n"; 48 | string postLoadInstr2 = " mov v17.s[0], w26\n"; 49 | string[] unrolledAdds = new string[4]; 50 | unrolledAdds[0] = " add v18.4s, v18.4s, v17.4s"; 51 | unrolledAdds[1] = " add v19.4s, v19.4s, v17.4s"; 52 | unrolledAdds[2] = " add v20.4s, v20.4s, v17.4s"; 53 | unrolledAdds[3] = " add v21.4s, v21.4s, v17.4s"; 54 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs, postLoadInstr1, postLoadInstr2); 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /AsmGen/tests/LdmTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class LdmTest : UarchTest 6 | { 7 | public LdmTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "ldm"; 11 | this.Description = "Integer (add) without Load Dependency Matrix Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " add %rdi, %r11"; 20 | string[] unrolledAdds = new string[4]; 21 | unrolledAdds[0] = " add %r11, %r15"; 22 | unrolledAdds[1] = " add %r11, %r14"; 23 | unrolledAdds[2] = " add %r11, %r13"; 24 | unrolledAdds[3] = " add %r11, %r12"; 25 | 26 | string postLoadInstr2 = " add %rsi, %r11"; 27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs( 28 | sb, 29 | this.Counts, 30 | this.Prefix, 31 | unrolledAdds, 32 | unrolledAdds, 33 | false, 34 | postLoadInstrs1: postLoadInstr1, 35 | postLoadInstrs2: postLoadInstr2); 36 | } 37 | 38 | public override void GenerateX86NasmAsm(StringBuilder sb) 39 | { 40 | string postLoadInstr1 = " add r11, rdi"; 41 | string[] unrolledAdds = new string[4]; 42 | unrolledAdds[0] = " add r15, r11"; 43 | unrolledAdds[1] = " add r14, r11"; 44 | unrolledAdds[2] = " add r13, r11"; 45 | unrolledAdds[3] = " add r12, r11"; 46 | 47 | string postLoadInstr2 = " add r11, rsi"; 48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs( 49 | sb, 50 | this.Counts, 51 | this.Prefix, 52 | unrolledAdds, 53 | unrolledAdds, 54 | false, 55 | postLoadInstrs1: postLoadInstr1, 56 | postLoadInstrs2: postLoadInstr2); 57 | } 58 | 59 | public override void GenerateArmAsm(StringBuilder sb) 60 | { 61 | string postLoadInstr1 = " add x11, x11, x25"; 62 | string postLoadInstr2 = " add x11, x11, x26"; 63 | string[] unrolledAdds = new string[4]; 64 | unrolledAdds[0] = " add x15, x15, x25"; 65 | unrolledAdds[1] = " add x14, x14, x25"; 66 | unrolledAdds[2] = " add x13, x13, x25"; 67 | unrolledAdds[3] = " add x12, x12, x25"; 68 | 69 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs( 70 | sb, 71 | this.Counts, 72 | this.Prefix, 73 | unrolledAdds, 74 | unrolledAdds, 75 | false, 76 | postLoadInstrs1: postLoadInstr1, 77 | postLoadInstrs2: postLoadInstr2); 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /AsmGen/tests/MixIntFpRf12Test.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixIntFp12RfTest : UarchTest 6 | { 7 | public MixIntFp12RfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixintfp12rf"; 11 | this.Description = "Mix of integer and FP register file, 1:2 ratio"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string initInstrs = " movss (%r8), %xmm1\n" + 20 | " movss 4(%r8), %xmm2\n" + 21 | " movss 8(%r8), %xmm3\n" + 22 | " movss 12(%r8), %xmm4\n" + 23 | " movss 16(%r8), %xmm5\n"; 24 | 25 | string[] instrs = new string[6]; 26 | instrs[0] = "add %r15, %r14"; 27 | instrs[1] = "addss %xmm1, %xmm2"; 28 | instrs[2] = "addss %xmm1, %xmm3"; 29 | instrs[3] = "add %r15, %r12"; 30 | instrs[4] = "addss %xmm1, %xmm4"; 31 | instrs[5] = "addss %xmm1, %xmm5"; 32 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs); 33 | } 34 | 35 | public override void GenerateX86NasmAsm(StringBuilder sb) 36 | { 37 | string initInstrs = " movss xmm1, [r8]\n" + 38 | " movss xmm2, [r8 + 4]\n" + 39 | " movss xmm3, [r8 + 8]\n" + 40 | " movss xmm4, [r8 + 12]\n" + 41 | " movss xmm5, [r8 + 16]\n"; 42 | 43 | string[] instrs = new string[6]; 44 | instrs[0] = "add r14, r15"; 45 | instrs[1] = "addss xmm2, xmm1"; 46 | instrs[2] = "addss xmm3, xmm1"; 47 | instrs[3] = "add r12, r15"; 48 | instrs[4] = "addss xmm4, xmm1"; 49 | instrs[5] = "addss xmm5, xmm1"; 50 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs); 51 | } 52 | 53 | public override void GenerateArmAsm(StringBuilder sb) 54 | { 55 | string initInstrs = " ldr s17, [x2]\n" + 56 | " ldr s18, [x2, 4]\n" + 57 | " ldr s19, [x2, 8]\n" + 58 | " ldr s20, [x2, 12]\n" + 59 | " ldr s21, [x2, 16]\n"; 60 | 61 | string[] instrs = new string[6]; 62 | instrs[0] = " add x15, x15, x11"; 63 | instrs[1] = " fadd s18, s18, s17"; 64 | instrs[2] = " fadd s19, s19, s17"; 65 | instrs[3] = " add x13, x13, x11"; 66 | instrs[4] = " fadd s20, s20, s17"; 67 | instrs[5] = " fadd s21, s21, s17"; 68 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, instrs, instrs, true, initInstrs); 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /AsmGen/tests/Add128SchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Add128SchedTest : UarchTest 6 | { 7 | public Add128SchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "add128sched"; 11 | this.Description = "128-bit Integer Add Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " movq %rdi, %xmm0"; 20 | string postLoadInstr2 = " movq %rsi, %xmm0"; 21 | // xmm0 is dependent on ptr chasing load 22 | string[] unrolledAdds = new string[4]; 23 | unrolledAdds[0] = " paddd %xmm0, %xmm1"; 24 | unrolledAdds[1] = " paddd %xmm0, %xmm2"; 25 | unrolledAdds[2] = " paddd %xmm0, %xmm3"; 26 | unrolledAdds[3] = " paddd %xmm0, %xmm4"; 27 | 28 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, 29 | this.Counts, 30 | this.Prefix, 31 | unrolledAdds, 32 | unrolledAdds, 33 | includePtrChasingLoads: false, 34 | postLoadInstrs1: postLoadInstr1, 35 | postLoadInstrs2: postLoadInstr2); 36 | } 37 | 38 | public override void GenerateX86NasmAsm(StringBuilder sb) 39 | { 40 | string postLoadInstr1 = " movq xmm0, rdi\n pshufd xmm0, xmm0, 0\n"; 41 | string postLoadInstr2 = " movq xmm0, rsi\n pshufd xmm0, xmm0, 0\n"; 42 | 43 | string[] unrolledAdds = new string[4]; 44 | unrolledAdds[0] = " paddd xmm1, xmm0"; 45 | unrolledAdds[1] = " paddd xmm2, xmm0"; 46 | unrolledAdds[2] = " paddd xmm3, xmm0"; 47 | unrolledAdds[3] = " paddd xmm4, xmm0"; 48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 49 | } 50 | 51 | public override void GenerateArmAsm(StringBuilder sb) 52 | { 53 | string initInstrs = " ldr q18, [x1]\n ldr q18, [x1]\n ldr q19, [x2]\n ldr q20, [x2]\n ldr q21, [x2]\n"; 54 | string postLoadInstr1 = " mov v17.s[0], w25\n"; 55 | string postLoadInstr2 = " mov v17.s[0], w26\n"; 56 | string[] unrolledAdds = new string[4]; 57 | unrolledAdds[0] = " add v18.4s, v18.4s, v17.4s"; 58 | unrolledAdds[1] = " add v19.4s, v19.4s, v17.4s"; 59 | unrolledAdds[2] = " add v20.4s, v20.4s, v17.4s"; 60 | unrolledAdds[3] = " add v21.4s, v21.4s, v17.4s"; 61 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs, postLoadInstr1, postLoadInstr2); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /AsmGen/tests/Vec256RfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Vec256RfTest : UarchTest 6 | { 7 | public Vec256RfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "vec256rf"; 11 | this.Description = "Vector (256-bit packed fp) RF Test - x86 only"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // it's ok, the ptr chasing arr should be way bigger than this 20 | string initInstrs = " vmovups (%r8), %ymm1\n" + 21 | " vmovups 32(%r8), %ymm2\n" + 22 | " vmovups 64(%r8), %ymm3\n" + 23 | " vmovups 96(%r8), %ymm4\n" + 24 | " vmovups 128(%r8), %ymm5\n"; 25 | 26 | string[] unrolledAdds = new string[4]; 27 | unrolledAdds[0] = " vaddps %ymm1, %ymm2, %ymm2"; 28 | unrolledAdds[1] = " vaddps %ymm1, %ymm3, %ymm3"; 29 | unrolledAdds[2] = " vaddps %ymm1, %ymm4, %ymm4"; 30 | unrolledAdds[3] = " vaddps %ymm1, %ymm5, %ymm5"; 31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 32 | } 33 | 34 | public override void GenerateX86NasmAsm(StringBuilder sb) 35 | { 36 | string initInstrs = " vmovups ymm1, [r8]\n" + 37 | " vmovups ymm2, [r8 + 32]\n" + 38 | " vmovups ymm3, [r8 + 64]\n" + 39 | " vmovups ymm4, [r8 + 96]\n" + 40 | " vmovups ymm5, [r8 + 128]\n"; 41 | 42 | string[] unrolledAdds = new string[4]; 43 | unrolledAdds[0] = " vaddps ymm2, ymm2, ymm1"; 44 | unrolledAdds[1] = " vaddps ymm3, ymm3, ymm1"; 45 | unrolledAdds[2] = " vaddps ymm4, ymm4, ymm1"; 46 | unrolledAdds[3] = " vaddps ymm5, ymm5, ymm1"; 47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 48 | } 49 | 50 | public override void GenerateArmAsm(StringBuilder sb) 51 | { 52 | string initInstrs = " ldr q0, [x1]\n" + 53 | " ldr q1, [x1, #0x10]\n" + 54 | " ldr q2, [x1, #0x20]\n" + 55 | " ldr q3, [x1, #0x30]\n" + 56 | " ldr q4, [x1, #0x40]\n"; 57 | 58 | string[] unrolledAdds = new string[4]; 59 | unrolledAdds[0] = " add v1.4s, v1.4s, v0.4s"; 60 | unrolledAdds[1] = " add v2.4s, v2.4s, v0.4s"; 61 | unrolledAdds[2] = " add v3.4s, v3.4s, v0.4s"; 62 | unrolledAdds[3] = " add v4.4s, v4.4s, v0.4s"; 63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /AsmGen/tests/Vec512RfTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Vec512RfTest : UarchTest 6 | { 7 | public Vec512RfTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "vec512rf"; 11 | this.Description = "Vector (512-bit packed fp) RF Test - x86 only"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, float *floatArr"; 13 | this.GetFunctionCallParameters = "structIterations, A, fpArr"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // it's ok, the ptr chasing arr should be way bigger than this 20 | string initInstrs = " vmovups (%r8), %zmm1\n" + 21 | " vmovups 64(%r8), %zmm2\n" + 22 | " vmovups 128(%r8), %zmm3\n" + 23 | " vmovups 192(%r8), %zmm4\n" + 24 | " vmovups 256(%r8), %zmm5\n"; 25 | 26 | string[] unrolledAdds = new string[4]; 27 | unrolledAdds[0] = " vaddps %zmm1, %zmm2, %zmm2"; 28 | unrolledAdds[1] = " vaddps %zmm1, %zmm3, %zmm3"; 29 | unrolledAdds[2] = " vaddps %zmm1, %zmm4, %zmm4"; 30 | unrolledAdds[3] = " vaddps %zmm1, %zmm5, %zmm5"; 31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 32 | } 33 | 34 | public override void GenerateX86NasmAsm(StringBuilder sb) 35 | { 36 | string initInstrs = " vmovups zmm1, [r8]\n" + 37 | " vmovups zmm2, [r8 + 64]\n" + 38 | " vmovups zmm3, [r8 + 128]\n" + 39 | " vmovups zmm4, [r8 + 192]\n" + 40 | " vmovups zmm5, [r8 + 256]\n"; 41 | 42 | string[] unrolledAdds = new string[4]; 43 | unrolledAdds[0] = " vaddps zmm2, zmm2, zmm1"; 44 | unrolledAdds[1] = " vaddps zmm3, zmm3, zmm1"; 45 | unrolledAdds[2] = " vaddps zmm4, zmm4, zmm1"; 46 | unrolledAdds[3] = " vaddps zmm5, zmm5, zmm1"; 47 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 48 | } 49 | 50 | public override void GenerateArmAsm(StringBuilder sb) 51 | { 52 | string initInstrs = " ldr q0, [x1]\n" + 53 | " ldr q1, [x1, #0x10]\n" + 54 | " ldr q2, [x1, #0x20]\n" + 55 | " ldr q3, [x1, #0x30]\n" + 56 | " ldr q4, [x1, #0x40]\n"; 57 | 58 | string[] unrolledAdds = new string[4]; 59 | unrolledAdds[0] = " add v1.4s, v1.4s, v0.4s"; 60 | unrolledAdds[1] = " add v2.4s, v2.4s, v0.4s"; 61 | unrolledAdds[2] = " add v3.4s, v3.4s, v0.4s"; 62 | unrolledAdds[3] = " add v4.4s, v4.4s, v0.4s"; 63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs); 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /AsmGen/tests/MixMulSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | // only applicable to Zhaoxin Lujiazui 6 | public class MixMulSchedTest : UarchTest 7 | { 8 | public MixMulSchedTest(int low, int high, int step) 9 | { 10 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 11 | this.Prefix = "mixmulschedtest"; 12 | this.Description = "Mixed Integer (64/16-bit mul) Scheduler Capacity Test"; 13 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 14 | this.GetFunctionCallParameters = "structIterations, A"; 15 | this.DivideTimeByCount = false; 16 | } 17 | 18 | public override void GenerateX86GccAsm(StringBuilder sb) 19 | { 20 | string resetMulsInstr = "mov $1, %r15\n mov $1, %r13"; 21 | string[] unrolledMuls = new string[4]; 22 | unrolledMuls[0] = " imul %di, %r15w"; 23 | unrolledMuls[1] = " imul %rdi, %r14"; 24 | unrolledMuls[2] = " imul %di, %r13w"; 25 | unrolledMuls[3] = " imul %rdi, %r12"; 26 | 27 | string[] unrolledMuls1 = new string[4]; 28 | unrolledMuls1[0] = " imul %si, %r15w"; 29 | unrolledMuls1[1] = " imul %rsi, %r14"; 30 | unrolledMuls1[2] = " imul %si, %r13w"; 31 | unrolledMuls1[3] = " imul %rsi, %r12"; 32 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false, postLoadInstrs1: resetMulsInstr, postLoadInstrs2: resetMulsInstr); 33 | } 34 | 35 | public override void GenerateX86NasmAsm(StringBuilder sb) 36 | { 37 | string[] unrolledMuls = new string[4]; 38 | unrolledMuls[0] = " imul r15w, di"; 39 | unrolledMuls[1] = " imul r14, rdi"; 40 | unrolledMuls[2] = " imul r13w, di"; 41 | unrolledMuls[3] = " imul r12, rdi"; 42 | 43 | string[] unrolledMuls1 = new string[4]; 44 | unrolledMuls1[0] = " imul r15w, si"; 45 | unrolledMuls1[1] = " imul r14, rsi"; 46 | unrolledMuls1[2] = " imul r13w, si"; 47 | unrolledMuls1[3] = " imul r12, rsi"; 48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls, false); 49 | } 50 | 51 | public override void GenerateArmAsm(StringBuilder sb) 52 | { 53 | string[] unrolledMuls = new string[4]; 54 | unrolledMuls[0] = " mul w15, w15, w25"; 55 | unrolledMuls[1] = " mul x14, x14, x25"; 56 | unrolledMuls[2] = " mul w13, w13, w25"; 57 | unrolledMuls[3] = " mul x12, x12, x25"; 58 | 59 | string[] unrolledMuls1 = new string[4]; 60 | unrolledMuls1[0] = " mul w15, w15, w26"; 61 | unrolledMuls1[1] = " mul x14, x14, x26"; 62 | unrolledMuls1[2] = " mul w13, w13, w26"; 63 | unrolledMuls1[3] = " mul x12, x12, x26"; 64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /AsmGen/tests/MixJumpMulSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class MixJmpMulSchedTest : UarchTest 6 | { 7 | public MixJmpMulSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mixmuljmpsched"; 11 | this.Description = "Mixed integer multiply and not-taken Jump Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string[] unrolledJumps = new string[6]; 20 | unrolledJumps[0] = " cmp %rdi, %rsi\n je muljmpsched_reallybadthing"; 21 | unrolledJumps[1] = " imul %edi, %r12d"; 22 | unrolledJumps[2] = " cmp %rdi, %rsi\n je muljmpsched_reallybadthing"; 23 | unrolledJumps[3] = " imul %edi, %r13d"; 24 | unrolledJumps[4] = " cmp %rdi, %rsi\n je muljmpsched_reallybadthing"; 25 | unrolledJumps[5] = " imul %edi, %r14d"; 26 | 27 | string[] unrolledJumps1 = new string[2]; 28 | unrolledJumps1[0] = " cmp %rdi, %rsi\n je muljmpsched_reallybadthing"; 29 | unrolledJumps1[1] = " imul %esi, %r11d"; 30 | 31 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps1, false); 32 | 33 | sb.AppendLine("muljmpsched_reallybadthing:"); 34 | sb.AppendLine(" int3"); 35 | } 36 | 37 | public override void GenerateX86NasmAsm(StringBuilder sb) 38 | { 39 | string[] unrolledJumps = new string[2]; 40 | unrolledJumps[0] = " cmp rdi, rsi\n je muljmpsched_reallybadthing"; 41 | unrolledJumps[0] = " imul r12d, edi"; 42 | 43 | string[] unrolledJumps1 = new string[2]; 44 | unrolledJumps1[0] = " cmp rdi, rsi\n je muljmpsched_reallybadthing"; 45 | unrolledJumps1[0] = " imul r11d, esi"; 46 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps1, false); 47 | 48 | sb.AppendLine("muljmpsched_reallybadthing:"); 49 | sb.AppendLine(" int3"); 50 | } 51 | 52 | public override void GenerateArmAsm(StringBuilder sb) 53 | { 54 | string[] unrolledJumps = new string[2]; 55 | unrolledJumps[0] = " cmp x25, x26\n b.eq muljmpsched_reallybadthing"; 56 | unrolledJumps[1] = " mul x12, x12, x25"; 57 | 58 | string[] unrolledJumps1 = new string[2]; 59 | unrolledJumps1[0] = " cmp x25, x26\n b.eq muljmpsched_reallybadthing"; 60 | unrolledJumps1[1] = " mul x14, x14, x26"; 61 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledJumps, unrolledJumps, false); 62 | 63 | sb.AppendLine("muljmpsched_reallybadthing:"); 64 | sb.AppendLine(" .word 0xf7f0a000"); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /MemoryLatency/MemoryLatency_arm.s: -------------------------------------------------------------------------------- 1 | .text 2 | 3 | .global latencytest 4 | .global preplatencyarr 5 | .global stlftest 6 | .global stlftest32 7 | .global matchedstlftest 8 | 9 | /* x0 = ptr to arr 10 | x1 = arr len 11 | convert values in array from array indexes to pointers */ 12 | preplatencyarr: 13 | sub sp, sp, #0x20 14 | stp x14, x15, [sp, #0x10] 15 | mov x15, 0 16 | preplatencyarr_loop: 17 | ldr x14, [x0, w15, uxtw #3] 18 | lsl x14, x14, 3 19 | add x14, x14, x0 20 | str x14, [x0, w15, uxtw #3] 21 | add w15, w15, 1 22 | cmp x15, x1 23 | b.ne preplatencyarr_loop 24 | ldp x14, x15, [sp, #0x10] 25 | add sp, sp, #0x20 26 | ret 27 | 28 | /* x0 = iteration count 29 | x1 = ptr to arr 30 | do pointer chasing for specified iteration count */ 31 | latencytest: 32 | sub sp, sp, #0x20 33 | stp x14, x15, [sp, #0x10] 34 | mov x14, 0 35 | ldr x15, [x1] 36 | latencytest_loop: 37 | ldr x15, [x15] 38 | add x14, x14, x15 39 | sub x0, x0, 1 40 | cbnz x0, latencytest_loop 41 | mov x0, x14 42 | ldp x14, x15, [sp, #0x10] 43 | add sp, sp, #0x20 44 | ret 45 | 46 | /* x0 = iteration count 47 | x1 = ptr to arr. first 32-bit int = store offset, second = load offset */ 48 | stlftest: 49 | sub sp, sp, #0x40 50 | stp x14, x15, [sp, #0x10] 51 | stp x12, x13, [sp, #0x20] /* x12 = store ptr, x13 = load ptr */ 52 | ldr x15, [x1] 53 | ldr w12, [x1] 54 | ldr w13, [x1, 4] 55 | add x12, x12, x1 56 | add x13, x13, x1 57 | stlftest_loop: 58 | str x15, [x12] 59 | ldr w15, [x13] 60 | str x15, [x12] 61 | ldr w15, [x13] 62 | str x15, [x12] 63 | ldr w15, [x13] 64 | str x15, [x12] 65 | ldr w15, [x13] 66 | str x15, [x12] 67 | ldr w15, [x13] 68 | sub x0, x0, 5 69 | cmp x0, 0 70 | b.gt stlftest_loop 71 | ldp x12, x13, [sp, #0x10] 72 | ldp x14, x15, [sp, #0x10] 73 | add sp, sp, #0x40 74 | ret 75 | 76 | stlftest32: 77 | sub sp, sp, #0x40 78 | stp x14, x15, [sp, #0x10] 79 | stp x12, x13, [sp, #0x20] /* x12 = store ptr, x13 = load ptr */ 80 | ldr x15, [x1] 81 | ldr w12, [x1] 82 | ldr w13, [x1, 4] 83 | add x12, x12, x1 84 | add x13, x13, x1 85 | stlftest32_loop: 86 | str w15, [x12] 87 | ldrh w15, [x13] 88 | str w15, [x12] 89 | ldrh w15, [x13] 90 | str w15, [x12] 91 | ldrh w15, [x13] 92 | str w15, [x12] 93 | ldrh w15, [x13] 94 | str w15, [x12] 95 | ldrh w15, [x13] 96 | sub x0, x0, 5 97 | cmp x0, 0 98 | b.gt stlftest32_loop 99 | ldp x12, x13, [sp, #0x10] 100 | ldp x14, x15, [sp, #0x10] 101 | add sp, sp, #0x40 102 | ret 103 | 104 | matchedstlftest: 105 | sub sp, sp, #0x40 106 | stp x14, x15, [sp, #0x10] 107 | stp x12, x13, [sp, #0x20] /* x12 = store ptr, x13 = load ptr */ 108 | ldr x15, [x1] 109 | ldr w12, [x1] 110 | ldr w13, [x1, 4] 111 | add x12, x12, x1 112 | add x13, x13, x1 113 | matchedstlftest_loop: 114 | str x15, [x12] 115 | ldr x15, [x13] 116 | str x15, [x12] 117 | ldr x15, [x13] 118 | str x15, [x12] 119 | ldr x15, [x13] 120 | str x15, [x12] 121 | ldr x15, [x13] 122 | str x15, [x12] 123 | ldr x15, [x13] 124 | sub x0, x0, 5 125 | cmp x0, 0 126 | b.gt matchedstlftest_loop 127 | ldp x12, x13, [sp, #0x10] 128 | ldp x14, x15, [sp, #0x10] 129 | add sp, sp, #0x40 130 | ret 131 | -------------------------------------------------------------------------------- /AsmGen/tests/VecStoreDataSchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class VecStoreDataSchedTest : UarchTest 6 | { 7 | public VecStoreDataSchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "vecstoredatasched"; 11 | this.Description = "Store 128-bit Data Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr1"; 13 | this.GetFunctionCallParameters = "structIterations, A, B"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | string postLoadInstr1 = " movups (%rdx, %rdi, 2), %xmm1"; 20 | string postLoadInstr2 = " movups (%rdx, %rsi, 2), %xmm1"; 21 | string[] dependentStores = new string[4]; 22 | dependentStores[0] = " movups %xmm1, (%r8)"; 23 | dependentStores[1] = " movups %xmm1, (%r8, %r14, 8)"; 24 | dependentStores[2] = " movups %xmm1, (%r8, %r13, 8)"; 25 | dependentStores[3] = " movups %xmm1, (%r8, %r12, 8)"; 26 | 27 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, dependentStores, dependentStores, false, postLoadInstrs1: postLoadInstr1, postLoadInstrs2: postLoadInstr2); 28 | } 29 | 30 | public override void GenerateX86NasmAsm(StringBuilder sb) 31 | { 32 | string initInstrs = " vpcmpeqd xmm1, xmm1, xmm1\n vpxor xmm0, xmm0, xmm1"; 33 | string postLoadInstr1 = " cvtsi2ss xmm0, rdi"; 34 | string postLoadInstr2 = " cvtsi2ss xmm0, rsi"; 35 | string[] dependentStores = new string[4]; 36 | dependentStores[0] = " movups [r8], xmm0"; 37 | dependentStores[1] = " movups [r8 + r14 * 8], xmm0"; 38 | dependentStores[2] = " movups [r8 + r13 * 8], xmm0"; 39 | dependentStores[3] = " movups [r8 + r12 * 8], xmm0"; 40 | 41 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, dependentStores, dependentStores, false, initInstrs, postLoadInstr1, postLoadInstr2); 42 | } 43 | 44 | public override void GenerateArmAsm(StringBuilder sb) 45 | { 46 | // todo 47 | string[] dependentStores = new string[4]; 48 | dependentStores[0] = " str w15, [x2, w25, uxtw #2]"; 49 | dependentStores[1] = " str w15, [x2, w25, uxtw #2]"; 50 | dependentStores[2] = " str w15, [x2, w25, uxtw #2]"; 51 | dependentStores[3] = " str w15, [x2, w25, uxtw #2]"; 52 | 53 | string[] dependentStores1 = new string[4]; 54 | dependentStores1[0] = " str w15, [x2, w26, uxtw #2]"; 55 | dependentStores1[1] = " str w15, [x2, w26, uxtw #2]"; 56 | dependentStores1[2] = " str w15, [x2, w26, uxtw #2]"; 57 | dependentStores1[3] = " str w15, [x2, w26, uxtw #2]"; 58 | UarchTestHelpers.GenerateArmAsmDivStructureTestFuncs(sb, this.Counts, this.Prefix, dependentStores, dependentStores1, false); 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /AsmGen/tests/Mul16SchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Mul16SchedTest : UarchTest 6 | { 7 | public Mul16SchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mul16sched"; 11 | this.Description = "Integer (16-bit mul) Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // trying to unsuccessfully counter some weird behavior on zhaoxin 20 | string resetMulsInstr = "mov $11, %r15\n mov $13, %r14\n mov $15, %r13\n mov $17, %r12\n"; 21 | string[] unrolledMuls = new string[4]; 22 | unrolledMuls[0] = " imul %di, %r15w"; 23 | unrolledMuls[1] = " imul %di, %r14w"; 24 | unrolledMuls[2] = " imul %di, %r13w"; 25 | unrolledMuls[3] = " imul %di, %r12w"; 26 | 27 | string[] unrolledMuls1 = new string[4]; 28 | unrolledMuls1[0] = " imul %si, %r15w"; 29 | unrolledMuls1[1] = " imul %si, %r14w"; 30 | unrolledMuls1[2] = " imul %si, %r13w"; 31 | unrolledMuls1[3] = " imul %si, %r12w"; 32 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false, postLoadInstrs1: resetMulsInstr, postLoadInstrs2: resetMulsInstr); 33 | } 34 | 35 | public override void GenerateX86NasmAsm(StringBuilder sb) 36 | { 37 | string[] unrolledMuls = new string[4]; 38 | unrolledMuls[0] = " imul r15w, di"; 39 | unrolledMuls[1] = " imul r14w, di"; 40 | unrolledMuls[2] = " imul r13w, di"; 41 | unrolledMuls[3] = " imul r12w, di"; 42 | 43 | string[] unrolledMuls1 = new string[4]; 44 | unrolledMuls1[0] = " imul r15w, si"; 45 | unrolledMuls1[1] = " imul r14w, si"; 46 | unrolledMuls1[2] = " imul r13w, si"; 47 | unrolledMuls1[3] = " imul r12w, si"; 48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls, false); 49 | } 50 | 51 | public override void GenerateArmAsm(StringBuilder sb) 52 | { 53 | string[] unrolledMuls = new string[4]; 54 | unrolledMuls[0] = " mul w15, w15, w25"; 55 | unrolledMuls[1] = " mul w14, w14, w25"; 56 | unrolledMuls[2] = " mul w13, w13, w25"; 57 | unrolledMuls[3] = " mul w12, w12, w25"; 58 | 59 | string[] unrolledMuls1 = new string[4]; 60 | unrolledMuls1[0] = " mul w15, w15, w26"; 61 | unrolledMuls1[1] = " mul w14, w14, w26"; 62 | unrolledMuls1[2] = " mul w13, w13, w26"; 63 | unrolledMuls1[3] = " mul w12, w12, w26"; 64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /AsmGen/tests/Add128SNsqTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Add128NsqTest : UarchTest 6 | { 7 | private int high; 8 | public Add128NsqTest(int low, int high, int step) 9 | { 10 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 11 | this.Prefix = "add128nsq"; 12 | this.Description = "128-bit Integer Add Scheduler Capacity Test, excluding NSQ"; 13 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr, int *arr2"; 14 | this.GetFunctionCallParameters = "structIterations, A, B"; 15 | this.DivideTimeByCount = false; 16 | this.high = high; 17 | } 18 | 19 | public override void GenerateX86GccAsm(StringBuilder sb) 20 | { 21 | string initInstrs = " pxor %xmm3, %xmm3\n pxor %xmm4, %xmm4\n movq %r15, %xmm5\n"; 22 | string postLoadInstr = " movq %rdi, %xmm0\n"; 23 | //string postLoadInstr2 = " movq %rsi, %xmm0\n pshufd $0, %xmm0, %xmm0\n"; 24 | // xmm0 is dependent on ptr chasing load 25 | string[] depAdds = new string[2]; 26 | depAdds[0] = " paddd %xmm0, %xmm1"; 27 | depAdds[1] = " paddd %xmm0, %xmm2"; 28 | 29 | string[] indepAdds = new string[2]; 30 | indepAdds[0] = " paddd %xmm3, %xmm5"; 31 | indepAdds[1] = " paddd %xmm4, %xmm5"; 32 | 33 | UarchTestHelpers.GenerateX86AsmNsqTestFuncs(sb, this.high, this.Counts, this.Prefix, depAdds, indepAdds, false, initInstrs, postLoadInstr); 34 | } 35 | 36 | public override void GenerateX86NasmAsm(StringBuilder sb) 37 | { 38 | string initInstrs = " pxor xmm3, xmm3\n pxor xmm4, xmm4\n movq r15, xmm5\n"; 39 | string postLoadInstr = " movq xmm0, rdi"; 40 | //string postLoadInstr2 = " movq xmm0, rsi"; 41 | 42 | string[] depAdds = new string[2]; 43 | depAdds[0] = " paddd xmm1, xmm0"; 44 | depAdds[1] = " paddd xmm2, xmm0"; 45 | 46 | string[] indepAdds = new string[2]; 47 | indepAdds[0] = " paddd xmm5, xmm3"; 48 | indepAdds[1] = " paddd xmm6, xmm4"; 49 | UarchTestHelpers.GenerateX86NasmNsqTestFuncs(sb, this.high, this.Counts, this.Prefix, depAdds, indepAdds, false, initInstrs, postLoadInstr); 50 | } 51 | 52 | public override void GenerateArmAsm(StringBuilder sb) 53 | { 54 | // todo 55 | string initInstrs = " ldr q18, [x1]\n ldr q18, [x1]\n ldr q19, [x2]\n ldr q20, [x2]\n ldr q21, [x2]\n"; 56 | string postLoadInstr1 = " mov v17.s[0], w25\n"; 57 | string postLoadInstr2 = " mov v17.s[0], w26\n"; 58 | string[] unrolledAdds = new string[4]; 59 | unrolledAdds[0] = " add v18.4s, v18.4s, v17.4s"; 60 | unrolledAdds[1] = " add v19.4s, v19.4s, v17.4s"; 61 | unrolledAdds[2] = " add v20.4s, v20.4s, v17.4s"; 62 | unrolledAdds[3] = " add v21.4s, v21.4s, v17.4s"; 63 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledAdds, unrolledAdds, false, initInstrs, postLoadInstr1, postLoadInstr2); 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /AsmGen/tests/Mul32SchedTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace AsmGen 4 | { 5 | public class Mul32SchedTest : UarchTest 6 | { 7 | public Mul32SchedTest(int low, int high, int step) 8 | { 9 | this.Counts = UarchTestHelpers.GenerateCountArray(low, high, step); 10 | this.Prefix = "mul32sched"; 11 | this.Description = "Integer (32-bit mul) Scheduler Capacity Test"; 12 | this.FunctionDefinitionParameters = "uint64_t iterations, int *arr"; 13 | this.GetFunctionCallParameters = "structIterations, A"; 14 | this.DivideTimeByCount = false; 15 | } 16 | 17 | public override void GenerateX86GccAsm(StringBuilder sb) 18 | { 19 | // trying to unsuccessfully counter some weird behavior on zhaoxin 20 | string resetMulsInstr = "mov $11, %r15\n mov $13, %r14\n mov $15, %r13\n mov $17, %r12\n"; 21 | string[] unrolledMuls = new string[4]; 22 | unrolledMuls[0] = " imul %edi, %r15d"; 23 | unrolledMuls[1] = " imul %edi, %r14d"; 24 | unrolledMuls[2] = " imul %edi, %r13d"; 25 | unrolledMuls[3] = " imul %edi, %r12d"; 26 | 27 | string[] unrolledMuls1 = new string[4]; 28 | unrolledMuls1[0] = " imul %esi, %r15d"; 29 | unrolledMuls1[1] = " imul %esi, %r14d"; 30 | unrolledMuls1[2] = " imul %esi, %r13d"; 31 | unrolledMuls1[3] = " imul %esi, %r12d"; 32 | UarchTestHelpers.GenerateX86AsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false, postLoadInstrs1: resetMulsInstr, postLoadInstrs2: resetMulsInstr); 33 | } 34 | 35 | public override void GenerateX86NasmAsm(StringBuilder sb) 36 | { 37 | string[] unrolledMuls = new string[4]; 38 | unrolledMuls[0] = " imul r15d, edi"; 39 | unrolledMuls[1] = " imul r14d, edi"; 40 | unrolledMuls[2] = " imul r13d, edi"; 41 | unrolledMuls[3] = " imul r12d, edi"; 42 | 43 | string[] unrolledMuls1 = new string[4]; 44 | unrolledMuls1[0] = " imul r15d, esi"; 45 | unrolledMuls1[1] = " imul r14d, esi"; 46 | unrolledMuls1[2] = " imul r13d, esi"; 47 | unrolledMuls1[3] = " imul r12d, esi"; 48 | UarchTestHelpers.GenerateX86NasmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls, false); 49 | } 50 | 51 | public override void GenerateArmAsm(StringBuilder sb) 52 | { 53 | string[] unrolledMuls = new string[4]; 54 | unrolledMuls[0] = " mul w15, w15, w25"; 55 | unrolledMuls[1] = " mul w14, w14, w25"; 56 | unrolledMuls[2] = " mul w13, w13, w25"; 57 | unrolledMuls[3] = " mul w12, w12, w25"; 58 | 59 | string[] unrolledMuls1 = new string[4]; 60 | unrolledMuls1[0] = " mul w15, w15, w26"; 61 | unrolledMuls1[1] = " mul w14, w14, w26"; 62 | unrolledMuls1[2] = " mul w13, w13, w26"; 63 | unrolledMuls1[3] = " mul w12, w12, w26"; 64 | UarchTestHelpers.GenerateArmAsmStructureTestFuncs(sb, this.Counts, this.Prefix, unrolledMuls, unrolledMuls1, false); 65 | } 66 | } 67 | } 68 | --------------------------------------------------------------------------------