├── .gitattributes ├── .gitignore ├── .vs └── PmcReader │ ├── config │ └── applicationhost.config │ ├── v15 │ └── .suo │ └── v16 │ └── TestStore │ └── 0 │ ├── 000.testlog │ └── testlog.manifest ├── AMD ├── Amd10hCpu.cs ├── Amd15hCpu.cs ├── Amd16hCpu.cs ├── Amd17hCpu.cs ├── Amd19hCpu.cs ├── Bulldozer.cs ├── Jaguar.cs ├── K10.cs ├── Piledriver.cs ├── PiledriverNorthbridge.cs ├── Zen.cs ├── Zen1.cs ├── Zen2.cs ├── Zen2DataFabric.cs ├── Zen2L3Cache.cs ├── Zen3.cs ├── Zen3L3Cache.cs ├── Zen4.cs ├── Zen4DataFabric.cs ├── Zen4L3Cache.cs ├── Zen5.cs ├── Zen5DataFabric.cs ├── Zen5L3Cache.cs └── ZenL3Cache.cs ├── App.config ├── Cpu.cs ├── GenericMonitoringArea.cs ├── HaswellForm.Designer.cs ├── HaswellForm.cs ├── HaswellForm.resx ├── Intel ├── AlderLake.cs ├── AlderLakeL3.cs ├── AlderLakeUncore.cs ├── GoldmontPlus.cs ├── Haswell.cs ├── HaswellClientArb.cs ├── HaswellClientL3.cs ├── HaswellClientUncore.cs ├── HaswellEL3.cs ├── MeteorLake.cs ├── MeteorLakeArb.cs ├── MeteorLakeL3.cs ├── MeteorLakeUncore.cs ├── ModernIntelCpu.cs ├── SandyBridge.cs ├── SandyBridgeEL3.cs ├── SandyBridgeUncore.cs ├── Skylake.cs ├── SkylakeClientArb.cs ├── SkylakeClientL3.cs └── SkylakeClientUncore.cs ├── Interop ├── AdvApi32.cs ├── Kernel32.cs ├── KernelDriver.cs ├── OpCode.cs ├── Ring0.cs ├── ThreadAffinity.cs ├── WinRing0.sys ├── WinRing0x64.sys └── winpmem_64.sys ├── LICENSE ├── PmcReader.csproj ├── PmcReader.sln ├── Program.cs ├── Properties ├── AssemblyInfo.cs ├── Resources.Designer.cs ├── Resources.resx ├── Settings.Designer.cs └── Settings.settings ├── README.md └── app.manifest /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | obj/ 3 | .vs/ -------------------------------------------------------------------------------- /.vs/PmcReader/v15/.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/.vs/PmcReader/v15/.suo -------------------------------------------------------------------------------- /.vs/PmcReader/v16/TestStore/0/000.testlog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/.vs/PmcReader/v16/TestStore/0/000.testlog -------------------------------------------------------------------------------- /.vs/PmcReader/v16/TestStore/0/testlog.manifest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/.vs/PmcReader/v16/TestStore/0/testlog.manifest -------------------------------------------------------------------------------- /AMD/Amd10hCpu.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | using System.Windows.Forms; 4 | using System.Drawing; 5 | 6 | namespace PmcReader.AMD 7 | { 8 | public class Amd10hCpu : GenericMonitoringArea 9 | { 10 | public const uint MSR_TSC = 0x00000010; 11 | public const uint MSR_PERF_CTR_0 = 0xC0010004; 12 | public const uint MSR_PERF_CTR_1 = 0xC0010005; 13 | public const uint MSR_PERF_CTR_2 = 0xC0010006; 14 | public const uint MSR_PERF_CTR_3 = 0xC0010007; 15 | public const uint MSR_PERF_CTL_0 = 0xC0010000; 16 | public const uint MSR_PERF_CTL_1 = 0xC0010001; 17 | public const uint MSR_PERF_CTL_2 = 0xC0010002; 18 | public const uint MSR_PERF_CTL_3 = 0xC0010003; 19 | 20 | public const uint HWCR = 0xC0010015; 21 | 22 | public NormalizedCoreCounterData[] NormalizedThreadCounts; 23 | public NormalizedCoreCounterData NormalizedTotalCounts; 24 | private ulong[] lastThreadTsc; 25 | 26 | public Amd10hCpu() 27 | { 28 | architectureName = "AMD 10h Family"; 29 | lastThreadTsc = new ulong[GetThreadCount()]; 30 | } 31 | 32 | /// 33 | /// Program core perf counters 34 | /// 35 | /// Counter 0 event select 36 | /// Counter 1 event select 37 | /// Counter 2 event select 38 | /// Counter 3 event select 39 | public void ProgramPerfCounters(ulong ctr0, ulong ctr1, ulong ctr2, ulong ctr3) 40 | { 41 | for (int threadIdx = 0; threadIdx < this.GetThreadCount(); threadIdx++) 42 | { 43 | ThreadAffinity.Set(1UL << threadIdx); 44 | Ring0.WriteMsr(MSR_PERF_CTL_0, ctr0); 45 | Ring0.WriteMsr(MSR_PERF_CTL_1, ctr1); 46 | Ring0.WriteMsr(MSR_PERF_CTL_2, ctr2); 47 | Ring0.WriteMsr(MSR_PERF_CTL_3, ctr3); 48 | } 49 | } 50 | 51 | /// 52 | /// Update fixed counters for thread, affinity must be set going in 53 | /// 54 | /// thread to update fixed counters for 55 | public void ReadFixedCounters(int threadIdx, out ulong elapsedTsc) 56 | { 57 | ulong tsc; 58 | Ring0.ReadMsr(MSR_TSC, out tsc); 59 | 60 | elapsedTsc = tsc; 61 | if (tsc > lastThreadTsc[threadIdx]) 62 | elapsedTsc = tsc - lastThreadTsc[threadIdx]; 63 | else if (lastThreadTsc[threadIdx] > 0) 64 | elapsedTsc = tsc + (0xFFFFFFFFFFFFFFFF - lastThreadTsc[threadIdx]); 65 | 66 | lastThreadTsc[threadIdx] = tsc; 67 | } 68 | 69 | /// 70 | /// initialize/reset accumulated totals for core counter data 71 | /// 72 | public void InitializeCoreTotals() 73 | { 74 | if (NormalizedTotalCounts == null) 75 | { 76 | NormalizedTotalCounts = new NormalizedCoreCounterData(); 77 | } 78 | 79 | NormalizedTotalCounts.tsc = 0; 80 | NormalizedTotalCounts.ctr0 = 0; 81 | NormalizedTotalCounts.ctr1 = 0; 82 | NormalizedTotalCounts.ctr2 = 0; 83 | NormalizedTotalCounts.ctr3 = 0; 84 | } 85 | 86 | /// 87 | /// Read and update counter data for thread 88 | /// 89 | /// Thread to set affinity to 90 | public void UpdateThreadCoreCounterData(int threadIdx) 91 | { 92 | ThreadAffinity.Set(1UL << threadIdx); 93 | float normalizationFactor = GetNormalizationFactor(threadIdx); 94 | ulong tsc; 95 | ulong ctr0, ctr1, ctr2, ctr3; 96 | ReadFixedCounters(threadIdx, out tsc); 97 | ctr0 = ReadAndClearMsr(MSR_PERF_CTR_0); 98 | ctr1 = ReadAndClearMsr(MSR_PERF_CTR_1); 99 | ctr2 = ReadAndClearMsr(MSR_PERF_CTR_2); 100 | ctr3 = ReadAndClearMsr(MSR_PERF_CTR_3); 101 | 102 | if (NormalizedThreadCounts == null) NormalizedThreadCounts = new NormalizedCoreCounterData[threadCount]; 103 | if (NormalizedThreadCounts[threadIdx] == null) NormalizedThreadCounts[threadIdx] = new NormalizedCoreCounterData(); 104 | 105 | if (NormalizedThreadCounts[threadIdx].NormalizationFactor != 0.0f) 106 | { 107 | NormalizedThreadCounts[threadIdx].totalctr0 += ctr0; 108 | NormalizedThreadCounts[threadIdx].totalctr1 += ctr1; 109 | NormalizedThreadCounts[threadIdx].totalctr2 += ctr2; 110 | NormalizedThreadCounts[threadIdx].totalctr3 += ctr3; 111 | NormalizedTotalCounts.totalctr0 += ctr0; 112 | NormalizedTotalCounts.totalctr1 += ctr1; 113 | NormalizedTotalCounts.totalctr2 += ctr2; 114 | NormalizedTotalCounts.totalctr3 += ctr3; 115 | } 116 | 117 | NormalizedThreadCounts[threadIdx].tsc = tsc * normalizationFactor; 118 | NormalizedThreadCounts[threadIdx].ctr0 = ctr0 * normalizationFactor; 119 | NormalizedThreadCounts[threadIdx].ctr1 = ctr1 * normalizationFactor; 120 | NormalizedThreadCounts[threadIdx].ctr2 = ctr2 * normalizationFactor; 121 | NormalizedThreadCounts[threadIdx].ctr3 = ctr3 * normalizationFactor; 122 | NormalizedThreadCounts[threadIdx].NormalizationFactor = normalizationFactor; 123 | NormalizedTotalCounts.tsc += NormalizedThreadCounts[threadIdx].tsc; 124 | NormalizedTotalCounts.ctr0 += NormalizedThreadCounts[threadIdx].ctr0; 125 | NormalizedTotalCounts.ctr1 += NormalizedThreadCounts[threadIdx].ctr1; 126 | NormalizedTotalCounts.ctr2 += NormalizedThreadCounts[threadIdx].ctr2; 127 | NormalizedTotalCounts.ctr3 += NormalizedThreadCounts[threadIdx].ctr3; 128 | } 129 | 130 | /// 131 | /// Assemble overall counter values into a Tuple of string, float array. 132 | /// 133 | /// Description for counter 0 value 134 | /// Description for counter 1 value 135 | /// Description for counter 2 value 136 | /// Description for counter 3 value 137 | /// Array to put in results object 138 | public Tuple[] GetOverallCounterValues(string ctr0, string ctr1, string ctr2, string ctr3) 139 | { 140 | NormalizedCoreCounterData dataToLog = this.NormalizedTotalCounts; 141 | if (this.targetLogCoreIndex >= 0) 142 | { 143 | dataToLog = NormalizedThreadCounts[this.targetLogCoreIndex]; 144 | } 145 | 146 | Tuple[] retval = new Tuple[5]; 147 | retval[0] = new Tuple("TSC", dataToLog.tsc); 148 | retval[1] = new Tuple(ctr0, dataToLog.ctr0); 149 | retval[2] = new Tuple(ctr1, dataToLog.ctr1); 150 | retval[3] = new Tuple(ctr2, dataToLog.ctr2); 151 | retval[4] = new Tuple(ctr3, dataToLog.ctr3); 152 | return retval; 153 | } 154 | 155 | /// 156 | /// Get perf ctl value assuming default values for stupid stuff 157 | /// 158 | /// Perf event, low 16 bits 159 | /// Unit mask 160 | /// only increment on transition 161 | /// count mask 162 | /// Perf event, high 8 bits 163 | /// 164 | public static ulong GetPerfCtlValue(byte perfEvent, byte umask, bool edge, byte cmask, byte perfEventHi) 165 | { 166 | return GetPerfCtlValue(perfEvent, 167 | umask, 168 | OsUsrMode.All, 169 | edge, 170 | interrupt: false, 171 | enable: true, 172 | invert: false, 173 | cmask, 174 | perfEventHi, 175 | HostGuestOnly.All); 176 | } 177 | 178 | /// 179 | /// Get core perf ctl value 180 | /// 181 | /// Low 16 bits of performance event 182 | /// perf event umask 183 | /// Count in os or user mode 184 | /// only increment on transition 185 | /// generate apic interrupt on overflow 186 | /// enable perf ctr 187 | /// invert cmask 188 | /// 0 = increment by event count. >0 = increment by 1 if event count in clock cycle >= cmask 189 | /// high 4 bits of performance event 190 | /// Count host or guest events 191 | /// value for perf ctl msr 192 | public static ulong GetPerfCtlValue(byte perfEvent, byte umask, OsUsrMode osUsrMode, bool edge, bool interrupt, bool enable, bool invert, byte cmask, byte perfEventHi, HostGuestOnly hostGuestOnly) 193 | { 194 | return perfEvent | 195 | (ulong)umask << 8 | 196 | ((ulong)osUsrMode) << 16 | 197 | (edge ? 1UL : 0UL) << 18 | 198 | (interrupt ? 1UL : 0UL) << 20 | 199 | (enable ? 1UL : 0UL) << 22 | 200 | (invert ? 1UL : 0UL) << 23 | 201 | (ulong)cmask << 24 | 202 | (ulong)perfEventHi << 32 | 203 | ((ulong)hostGuestOnly) << 40; 204 | } 205 | 206 | /// 207 | /// Selects what ring(s) events are counted for 208 | /// 209 | public enum OsUsrMode 210 | { 211 | None = 0b00, 212 | Usr = 0b01, 213 | OS = 0b10, 214 | All = 0b11 215 | } 216 | 217 | /// 218 | /// Whether to count events for guest (VM) or host 219 | /// 220 | public enum HostGuestOnly 221 | { 222 | All = 0b00, 223 | Guest = 0b01, 224 | Host = 0b10, 225 | AllSvme = 0b11 226 | } 227 | 228 | public class NormalizedCoreCounterData 229 | { 230 | /// 231 | /// Time stamp counter 232 | /// Increments at P0 frequency 233 | /// 234 | public float tsc; 235 | 236 | /// 237 | /// Programmable performance counter values 238 | /// 239 | public float ctr0; 240 | public float ctr1; 241 | public float ctr2; 242 | public float ctr3; 243 | 244 | public float NormalizationFactor; 245 | 246 | public ulong totalctr0; 247 | public ulong totalctr1; 248 | public ulong totalctr2; 249 | public ulong totalctr3; 250 | } 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /AMD/Jaguar.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using PmcReader.Interop; 3 | 4 | namespace PmcReader.AMD 5 | { 6 | public class Jaguar : Amd16hCpu 7 | { 8 | public Jaguar() 9 | { 10 | List configs = new List(); 11 | configs.Add(new BpuMonitoringConfig(this)); 12 | monitoringConfigs = configs.ToArray(); 13 | architectureName = "Jaguar"; 14 | } 15 | 16 | public class BpuMonitoringConfig : MonitoringConfig 17 | { 18 | private Jaguar cpu; 19 | public string GetConfigName() { return "Branch Prediction"; } 20 | 21 | public BpuMonitoringConfig(Jaguar amdCpu) 22 | { 23 | cpu = amdCpu; 24 | } 25 | 26 | public string[] GetColumns() { return columns; } 27 | 28 | public void Initialize() 29 | { 30 | cpu.ProgramCorePerfCounters( 31 | GetPerfCtlValue(0xC0, 0, false, 0, 0), // ret instr 32 | GetPerfCtlValue(0x76, 0, false, 0, 0), // cycles 33 | GetPerfCtlValue(0xC2, 0, false, 0, 0), // ret branch 34 | GetPerfCtlValue(0xC3, 0, false, 0, 0)); // ret misp branch 35 | } 36 | 37 | public MonitoringUpdateResults Update() 38 | { 39 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 40 | results.unitMetrics = new string[cpu.GetThreadCount()][]; 41 | cpu.InitializeCoreTotals(); 42 | for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) 43 | { 44 | cpu.UpdateThreadCoreCounterData(threadIdx); 45 | results.unitMetrics[threadIdx] = computeMetrics("Thread " + threadIdx, cpu.NormalizedThreadCounts[threadIdx]); 46 | } 47 | 48 | results.overallMetrics = computeMetrics("Overall", cpu.NormalizedTotalCounts); 49 | results.overallCounterValues = cpu.GetOverallCounterValues("Instructions", "Cycles", "Retired Branches", "Retired Mispredicted Branches"); 50 | return results; 51 | } 52 | 53 | public string[] columns = new string[] { "Item", "Active Cycles", "Instructions", "IPC", "BPU Acc", "Branch MPKI", "% Branches" }; 54 | 55 | public string GetHelpText() 56 | { 57 | return "aaaaaa"; 58 | } 59 | 60 | private string[] computeMetrics(string label, NormalizedCoreCounterData counterData) 61 | { 62 | float instr = counterData.ctr0; 63 | float cycles = counterData.ctr1; 64 | return new string[] { label, 65 | FormatLargeNumber(cycles), 66 | FormatLargeNumber(instr), 67 | string.Format("{0:F2}", instr / cycles), 68 | FormatPercentage(counterData.ctr2 - counterData.ctr3, counterData.ctr2), 69 | string.Format("{0:F2}", 1000 * counterData.ctr3 / instr), 70 | FormatPercentage(counterData.ctr3, instr) 71 | }; 72 | } 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /AMD/Zen.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.InteropServices.WindowsRuntime; 3 | using PmcReader.Interop; 4 | 5 | namespace PmcReader.AMD 6 | { 7 | public class Zen : Amd17hCpu 8 | { 9 | public Zen() 10 | { 11 | monitoringConfigs = new MonitoringConfig[1]; 12 | monitoringConfigs[0] = new BpuMonitoringConfig(this); 13 | architectureName = "Zen 1"; 14 | } 15 | 16 | public class BpuMonitoringConfig : MonitoringConfig 17 | { 18 | private Zen cpu; 19 | public string GetConfigName() { return "Branch Prediction and Fusion"; } 20 | 21 | public BpuMonitoringConfig(Zen amdCpu) 22 | { 23 | cpu = amdCpu; 24 | } 25 | 26 | public string[] GetColumns() 27 | { 28 | return columns; 29 | } 30 | 31 | public void Initialize() 32 | { 33 | cpu.EnablePerformanceCounters(); 34 | for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) 35 | { 36 | ThreadAffinity.Set(1UL << threadIdx); 37 | // Set PERF_CTR0 to count retired branches 38 | Ring0.WriteMsr(MSR_PERF_CTL_0, GetPerfCtlValue(0xC2, 0, true, true, false, false, true, false, 0, 0, false, false)); 39 | 40 | // PERF_CTR1 = mispredicted retired branches 41 | Ring0.WriteMsr(MSR_PERF_CTL_1, GetPerfCtlValue(0xC3, 0, true, true, false, false, true, false, 0, 0, false, false)); 42 | 43 | // PERF_CTR2 = retired instrs 44 | Ring0.WriteMsr(MSR_PERF_CTL_2, GetPerfCtlValue(0xC0, 0, true, true, false, false, true, false, 0, 0, false, false)); 45 | 46 | // PERF_CTR3 = cycles not in halt 47 | Ring0.WriteMsr(MSR_PERF_CTL_3, GetPerfCtlValue(0x76, 0, true, true, false, false, true, false, 0, 0, false, false)); 48 | 49 | // PERF_CTR4 = decoder overrides existing prediction 50 | Ring0.WriteMsr(MSR_PERF_CTL_4, GetPerfCtlValue(0x91, 0, true, true, false, false, true, false, 0, 0, false, false)); 51 | 52 | // PERF_CTR5 = retired fused branch instructions 53 | Ring0.WriteMsr(MSR_PERF_CTL_5, GetPerfCtlValue(0xD0, 0, true, true, false, false, true, false, 0, 1, false, false)); 54 | } 55 | } 56 | 57 | public MonitoringUpdateResults Update() 58 | { 59 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 60 | results.unitMetrics = new string[cpu.GetThreadCount()][]; 61 | cpu.InitializeCoreTotals(); 62 | for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) 63 | { 64 | cpu.UpdateThreadCoreCounterData(threadIdx); 65 | results.unitMetrics[threadIdx] = computeMetrics("Thread " + threadIdx, cpu.NormalizedThreadCounts[threadIdx]); 66 | } 67 | 68 | results.overallMetrics = computeMetrics("Overall", cpu.NormalizedTotalCounts); 69 | results.overallCounterValues = cpu.GetOverallCounterValues("Retired Branches", "Retired Misp Branches", "L1 BTB Override", "L2 BTB Override", "Decoder Override", "Fused Branches"); 70 | return results; 71 | } 72 | 73 | public string[] columns = new string[] { "Item", "Active Cycles", "Instructions", "IPC", "BPU Accuracy", "Branch MPKI", "Branches", "Mispredicted Branches", "Fused Branches" }; 74 | 75 | public string GetHelpText() 76 | { 77 | return ""; 78 | } 79 | 80 | private string[] computeMetrics(string label, NormalizedCoreCounterData counterData) 81 | { 82 | 83 | return new string[] { label, 84 | FormatLargeNumber(counterData.ctr3), 85 | FormatLargeNumber(counterData.ctr2), 86 | string.Format("{0:F2}", counterData.ctr2 / counterData.ctr3), 87 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)), 88 | string.Format("{0:F2}", counterData.ctr1 / counterData.ctr3 * 1000), 89 | FormatLargeNumber(counterData.ctr0), 90 | FormatLargeNumber(counterData.ctr1), 91 | FormatLargeNumber(counterData.ctr5)}; 92 | } 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /AMD/Zen2DataFabric.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using PmcReader.Interop; 4 | 5 | namespace PmcReader.AMD 6 | { 7 | public class Zen2DataFabric : Amd17hCpu 8 | { 9 | public enum DfType 10 | { 11 | Client = 0, 12 | DestkopThreadripper = 1, 13 | Server = 2 14 | } 15 | public Zen2DataFabric(DfType dfType) 16 | { 17 | architectureName = "Zen 2 Data Fabric"; 18 | List monitoringConfigList = new List(); 19 | if (dfType == DfType.Client) monitoringConfigList.Add(new ClientBwConfig(this)); 20 | else if (dfType == DfType.DestkopThreadripper) monitoringConfigList.Add(new TrDramBwConfig(this)); 21 | monitoringConfigs = monitoringConfigList.ToArray(); 22 | } 23 | 24 | public class TrDramBwConfig : MonitoringConfig 25 | { 26 | private Zen2DataFabric dataFabric; 27 | private long lastUpdateTime; 28 | private const int monitoringThread = 1; 29 | 30 | public string[] columns = new string[] { "Item", "DRAM BW" }; 31 | public string GetHelpText() { return ""; } 32 | public TrDramBwConfig(Zen2DataFabric dataFabric) 33 | { 34 | this.dataFabric = dataFabric; 35 | } 36 | 37 | public string GetConfigName() { return "TR DRAM Bandwidth?"; } 38 | public string[] GetColumns() { return columns; } 39 | public void Initialize() 40 | { 41 | // Undocumented data fabric mentioned in prelimary PPR, but removed in the latest one 42 | // prelimary PPR suggests calculating DRAM bandwidth by adding up all these events and 43 | // multiplying by 64 44 | // These four are always zero on the 3950X. Possibly for quad channel? 45 | /*ulong mysteryDramBytes7 = 0x00000001004038C7; 46 | ulong mysteryDramBytes6 = 0x0000000100403887; */ 47 | // ulong mysteryDramBytes1 = 0x0000000000403847; 48 | //ulong mysteryDramBytes0 = 0x0000000000403807; 49 | 50 | // Nemes says these four return counts on her TR 51 | ulong mysteryDramBytes5 = 0x0000000100403847; 52 | ulong mysteryDramBytes4 = 0x0000000100403807; 53 | ulong mysteryDramBytes3 = 0x00000000004038C7; 54 | ulong mysteryDramBytes2 = 0x0000000000403887; 55 | 56 | ThreadAffinity.Set(1UL << monitoringThread); 57 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, mysteryDramBytes4); 58 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, mysteryDramBytes5); 59 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, mysteryDramBytes2); 60 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, mysteryDramBytes3); 61 | 62 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); 63 | } 64 | 65 | public MonitoringUpdateResults Update() 66 | { 67 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime); 68 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 69 | results.unitMetrics = new string[4][]; 70 | ThreadAffinity.Set(1UL << monitoringThread); 71 | ulong mysteryDramBytes4 = ReadAndClearMsr(MSR_DF_PERF_CTR_0) * 64; 72 | ulong mysteryDramBytes5 = ReadAndClearMsr(MSR_DF_PERF_CTR_1) * 64; 73 | ulong mysteryDramBytes2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2) * 64; 74 | ulong mysteryDramBytes3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3) * 64; 75 | 76 | results.unitMetrics[0] = new string[] { "DF Evt 0x87 Umask 0x38", FormatLargeNumber(mysteryDramBytes2 * normalizationFactor) + "B/s" }; 77 | results.unitMetrics[1] = new string[] { "DF Evt 0xC7 Umask 0x38", FormatLargeNumber(mysteryDramBytes3 * normalizationFactor) + "B/s" }; 78 | results.unitMetrics[2] = new string[] { "DF Evt 0x107 Umask 0x38", FormatLargeNumber(mysteryDramBytes4 * normalizationFactor) + "B/s" }; 79 | results.unitMetrics[3] = new string[] { "DF Evt 0x147 Umask 0x38", FormatLargeNumber(mysteryDramBytes5 * normalizationFactor) + "B/s" }; 80 | 81 | results.overallMetrics = new string[] { "Overall", 82 | FormatLargeNumber((mysteryDramBytes4 + mysteryDramBytes5 + mysteryDramBytes2 + mysteryDramBytes3) * normalizationFactor) + "B/s" }; 83 | return results; 84 | } 85 | } 86 | 87 | public class ClientBwConfig : MonitoringConfig 88 | { 89 | private Zen2DataFabric dataFabric; 90 | private long lastUpdateTime; 91 | private const int monitoringThread = 1; 92 | 93 | public string[] columns = new string[] { "Item", "Count * 64B", "Count", "Pkg Pwr" }; 94 | public string GetHelpText() { return ""; } 95 | public ClientBwConfig(Zen2DataFabric dataFabric) 96 | { 97 | this.dataFabric = dataFabric; 98 | } 99 | 100 | public string GetConfigName() { return "MTS/RNR DRAM Bandwidth??"; } 101 | public string[] GetColumns() { return columns; } 102 | public void Initialize() 103 | { 104 | ThreadAffinity.Set(1UL << monitoringThread); 105 | /* From experimentation, the umask seems to be laid out as: 106 | * bit 0: include NT writes, but requires bit 3 to be set??? 107 | * bit 1: unknown (very low counts) 108 | * bit 2: unknown (very low counts) 109 | * bit 3: writes 110 | * bit 4: unknown (very low counts for normal reads/writes, zero counts for NT write) 111 | * bit 5: reads 112 | * bit 6: unknown (zero) 113 | * bit 7: unknown (zero) 114 | * Unit masks tested on a 3950X and 4800H 115 | * These work for events 0x7 and 0x47, which seem to correspond to the two memory channels 116 | * based on one of them reading zero if the DIMMs for one channel are pulled 117 | */ 118 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, GetDFPerfCtlValue(0x7, 0x20, true, 0, 0)); // ch0 read? 119 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, GetDFPerfCtlValue(0x7, 0x8, true, 0, 0)); // ch0 write? 120 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, GetDFPerfCtlValue(0x47, 0x20, true, 0, 0));// ch1 read? 121 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, GetDFPerfCtlValue(0x47, 0x8, true, 0, 0)); // ch1 write? 122 | 123 | dataFabric.InitializeCoreTotals(); 124 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); 125 | } 126 | 127 | public MonitoringUpdateResults Update() 128 | { 129 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime); 130 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 131 | ThreadAffinity.Set(1UL << monitoringThread); 132 | ulong ctr0 = ReadAndClearMsr(MSR_DF_PERF_CTR_0); 133 | ulong ctr1 = ReadAndClearMsr(MSR_DF_PERF_CTR_1); 134 | ulong ctr2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2); 135 | ulong ctr3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3); 136 | 137 | dataFabric.ReadPackagePowerCounter(); 138 | results.unitMetrics = new string[4][]; 139 | results.unitMetrics[0] = new string[] { "Ch 0 Read?", FormatLargeNumber(ctr0 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr0 * normalizationFactor), "N/A" }; 140 | results.unitMetrics[1] = new string[] { "Ch 0 Write?", FormatLargeNumber(ctr1 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr1 * normalizationFactor), "N/A" }; 141 | results.unitMetrics[2] = new string[] { "Ch 1 Read?", FormatLargeNumber(ctr2 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr2 * normalizationFactor), "N/A" }; 142 | results.unitMetrics[3] = new string[] { "Ch 1 Write?", FormatLargeNumber(ctr3 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr3 * normalizationFactor), "N/A" }; 143 | 144 | ulong total = ctr0 + ctr1 + ctr2 + ctr3; 145 | results.overallMetrics = new string[] { "Total", 146 | FormatLargeNumber(total * normalizationFactor * 64) + "B/s", 147 | FormatLargeNumber(total * normalizationFactor), 148 | string.Format("{0:F2} W", dataFabric.NormalizedTotalCounts.watts) 149 | }; 150 | 151 | results.overallCounterValues = new Tuple[5]; 152 | results.overallCounterValues[0] = new Tuple("Package Power", dataFabric.NormalizedTotalCounts.watts); 153 | results.overallCounterValues[1] = new Tuple("Ch 0 Read?", ctr0); 154 | results.overallCounterValues[2] = new Tuple("Ch 0 Write?", ctr1); 155 | results.overallCounterValues[3] = new Tuple("Ch 1 Read?", ctr2); 156 | results.overallCounterValues[4] = new Tuple("Ch 1 Write?", ctr3); 157 | return results; 158 | } 159 | } 160 | 161 | public class OutboundDataConfig : MonitoringConfig 162 | { 163 | private Zen2DataFabric dataFabric; 164 | private long lastUpdateTime; 165 | private const int monitoringThread = 1; 166 | 167 | public string[] columns = new string[] { "Item", "Outbound Data BW" }; 168 | public string GetHelpText() { return ""; } 169 | public OutboundDataConfig(Zen2DataFabric dataFabric) 170 | { 171 | this.dataFabric = dataFabric; 172 | } 173 | 174 | public string GetConfigName() { return "Remote Outbound Data???"; } 175 | public string[] GetColumns() { return columns; } 176 | public void Initialize() 177 | { 178 | /* from preliminary PPR */ 179 | ulong mysteryOutboundBytes3 = 0x800400247; 180 | ulong mysteryOutboundBytes2 = 0x800400247; // yes the same event is mentioned twice 181 | ulong mysteryOutboundBytes1 = 0x800400207; 182 | ulong mysteryOutboundBytes0 = 0x7004002C7; 183 | 184 | ThreadAffinity.Set(1UL << monitoringThread); 185 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, mysteryOutboundBytes0); 186 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, mysteryOutboundBytes1); 187 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, mysteryOutboundBytes2); 188 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, mysteryOutboundBytes3); 189 | 190 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); 191 | } 192 | 193 | public MonitoringUpdateResults Update() 194 | { 195 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime); 196 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 197 | results.unitMetrics = new string[4][]; 198 | ThreadAffinity.Set(1UL << monitoringThread); 199 | ulong mysteryOutboundBytes0 = ReadAndClearMsr(MSR_DF_PERF_CTR_0) * 32; 200 | ulong mysteryOutboundBytes1 = ReadAndClearMsr(MSR_DF_PERF_CTR_1) * 32; 201 | ulong mysteryOutboundBytes2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2) * 32; 202 | ulong mysteryOutboundBytes3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3) * 32; 203 | 204 | results.unitMetrics[0] = new string[] { "DF Evt 0x7C7 Umask 0x2", FormatLargeNumber(mysteryOutboundBytes0 * normalizationFactor) + "B/s" }; 205 | results.unitMetrics[1] = new string[] { "DF Evt 0x807 Umask 0x2", FormatLargeNumber(mysteryOutboundBytes1 * normalizationFactor) + "B/s" }; 206 | results.unitMetrics[2] = new string[] { "DF Evt 0x847 Umask 0x2", FormatLargeNumber(mysteryOutboundBytes2 * normalizationFactor) + "B/s" }; 207 | results.unitMetrics[3] = new string[] { "DF Evt 0x847 Umask 0x2", FormatLargeNumber(mysteryOutboundBytes3 * normalizationFactor) + "B/s" }; 208 | 209 | results.overallMetrics = new string[] { "Overall", 210 | FormatLargeNumber((mysteryOutboundBytes0 + mysteryOutboundBytes1 + mysteryOutboundBytes2 + mysteryOutboundBytes3) * normalizationFactor) + "B/s" }; 211 | return results; 212 | } 213 | } 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /AMD/Zen3L3Cache.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using PmcReader.Interop; 5 | 6 | namespace PmcReader.AMD 7 | { 8 | public class Zen3L3Cache : Amd17hCpu 9 | { 10 | // ccx -> thread id mapping. Just need one thread per ccx - we'll always sample using that thread 11 | protected Dictionary ccxSampleThreads; 12 | // ccx -> list of thread ids mapping 13 | protected Dictionary> allCcxThreads; 14 | public L3CounterData[] ccxCounterData; 15 | public L3CounterData ccxTotals; 16 | 17 | public Zen3L3Cache() 18 | { 19 | architectureName = "Zen 3 L3"; 20 | ccxSampleThreads = new Dictionary(); 21 | allCcxThreads = new Dictionary>(); 22 | for (int threadIdx = 0; threadIdx < GetThreadCount(); threadIdx++) 23 | { 24 | int ccxIdx = Get19hCcxId(threadIdx); 25 | ccxSampleThreads[ccxIdx] = threadIdx; 26 | List ccxThreads; 27 | if (! allCcxThreads.TryGetValue(ccxIdx, out ccxThreads)) 28 | { 29 | ccxThreads = new List(); 30 | allCcxThreads.Add(ccxIdx, ccxThreads); 31 | } 32 | 33 | ccxThreads.Add(threadIdx); 34 | } 35 | 36 | monitoringConfigs = new MonitoringConfig[1]; 37 | monitoringConfigs[0] = new HitRateLatencyConfig(this); 38 | 39 | ccxCounterData = new L3CounterData[ccxSampleThreads.Count()]; 40 | ccxTotals = new L3CounterData(); 41 | } 42 | 43 | public class L3CounterData 44 | { 45 | public float ctr0; 46 | public float ctr1; 47 | public float ctr2; 48 | public float ctr3; 49 | public float ctr4; 50 | public float ctr5; 51 | } 52 | 53 | public void ClearTotals() 54 | { 55 | ccxTotals.ctr0 = 0; 56 | ccxTotals.ctr1 = 0; 57 | ccxTotals.ctr2 = 0; 58 | ccxTotals.ctr3 = 0; 59 | ccxTotals.ctr4 = 0; 60 | ccxTotals.ctr5 = 0; 61 | } 62 | 63 | public void UpdateCcxL3CounterData(int ccxIdx, int threadIdx) 64 | { 65 | ThreadAffinity.Set(1UL << threadIdx); 66 | float normalizationFactor = GetNormalizationFactor(threadIdx); 67 | ulong ctr0 = ReadAndClearMsr(MSR_L3_PERF_CTR_0); 68 | ulong ctr1 = ReadAndClearMsr(MSR_L3_PERF_CTR_1); 69 | ulong ctr2 = ReadAndClearMsr(MSR_L3_PERF_CTR_2); 70 | ulong ctr3 = ReadAndClearMsr(MSR_L3_PERF_CTR_3); 71 | ulong ctr4 = ReadAndClearMsr(MSR_L3_PERF_CTR_4); 72 | ulong ctr5 = ReadAndClearMsr(MSR_L3_PERF_CTR_5); 73 | 74 | if (ccxCounterData[ccxIdx] == null) ccxCounterData[ccxIdx] = new L3CounterData(); 75 | ccxCounterData[ccxIdx].ctr0 = ctr0 * normalizationFactor; 76 | ccxCounterData[ccxIdx].ctr1 = ctr1 * normalizationFactor; 77 | ccxCounterData[ccxIdx].ctr2 = ctr2 * normalizationFactor; 78 | ccxCounterData[ccxIdx].ctr3 = ctr3 * normalizationFactor; 79 | ccxCounterData[ccxIdx].ctr4 = ctr4 * normalizationFactor; 80 | ccxCounterData[ccxIdx].ctr5 = ctr5 * normalizationFactor; 81 | ccxTotals.ctr0 += ccxCounterData[ccxIdx].ctr0; 82 | ccxTotals.ctr1 += ccxCounterData[ccxIdx].ctr1; 83 | ccxTotals.ctr2 += ccxCounterData[ccxIdx].ctr2; 84 | ccxTotals.ctr3 += ccxCounterData[ccxIdx].ctr3; 85 | ccxTotals.ctr4 += ccxCounterData[ccxIdx].ctr4; 86 | ccxTotals.ctr5 += ccxCounterData[ccxIdx].ctr5; 87 | } 88 | 89 | public Tuple[] GetOverallL3CounterValues(ulong aperf, ulong mperf, ulong irperfcount, ulong tsc, 90 | string ctr0, string ctr1, string ctr2, string ctr3, string ctr4, string ctr5) 91 | { 92 | Tuple[] retval = new Tuple[10]; 93 | retval[0] = new Tuple("APERF", aperf); 94 | retval[1] = new Tuple("MPERF", mperf); 95 | retval[2] = new Tuple("TSC", tsc); 96 | retval[3] = new Tuple("IRPerfCount", irperfcount); 97 | retval[4] = new Tuple(ctr0, ccxTotals.ctr0); 98 | retval[5] = new Tuple(ctr1, ccxTotals.ctr1); 99 | retval[6] = new Tuple(ctr2, ccxTotals.ctr2); 100 | retval[7] = new Tuple(ctr3, ccxTotals.ctr3); 101 | retval[8] = new Tuple(ctr4, ccxTotals.ctr4); 102 | retval[9] = new Tuple(ctr5, ccxTotals.ctr5); 103 | return retval; 104 | } 105 | 106 | public class HitRateLatencyConfig : MonitoringConfig 107 | { 108 | private Zen3L3Cache l3Cache; 109 | 110 | public HitRateLatencyConfig(Zen3L3Cache l3Cache) 111 | { 112 | this.l3Cache = l3Cache; 113 | } 114 | 115 | public string GetConfigName() { return "Hitrate and Miss Latency"; } 116 | public string[] GetColumns() { return columns; } 117 | public void Initialize() 118 | { 119 | ulong L3AccessPerfCtl = Get19hL3PerfCtlValue(0x4, 0xFF, true, 0, true, true, 0, 0b11); 120 | ulong L3MissLatencyCtl = Get19hL3PerfCtlValue(0x90, 0, true, 0, true, true, 0, 0); 121 | ulong L3MissSdpRequestPerfCtl = Get19hL3PerfCtlValue(0x9A, 0xFF, true, 0, true, true, 0, 0); 122 | ulong L3MissesForLatencyCalculation = 0x0300C00000401F9a; 123 | ulong L3Miss = 0x0300C00000400104; 124 | 125 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads) 126 | { 127 | ThreadAffinity.Set(1UL << ccxThread.Value); 128 | Ring0.WriteMsr(MSR_L3_PERF_CTL_0, L3AccessPerfCtl); 129 | Ring0.WriteMsr(MSR_L3_PERF_CTL_1, L3MissLatencyCtl); 130 | Ring0.WriteMsr(MSR_L3_PERF_CTL_2, L3MissSdpRequestPerfCtl); 131 | Ring0.WriteMsr(MSR_L3_PERF_CTL_3, L3MissesForLatencyCalculation); 132 | Ring0.WriteMsr(MSR_L3_PERF_CTL_4, L3Miss); 133 | } 134 | } 135 | 136 | public MonitoringUpdateResults Update() 137 | { 138 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 139 | results.unitMetrics = new string[l3Cache.ccxSampleThreads.Count()][]; 140 | float[] ccxClocks = new float[l3Cache.allCcxThreads.Count()]; 141 | l3Cache.ClearTotals(); 142 | ulong totalAperf = 0, totalMperf = 0, totalTsc = 0, totalIrPerfCount = 0; 143 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads) 144 | { 145 | // Try to determine frequency, by getting max frequency of cores in ccx 146 | foreach (int ccxThreadIdx in l3Cache.allCcxThreads[ccxThread.Key]) 147 | { 148 | ThreadAffinity.Set(1UL << ccxThreadIdx); 149 | float normalizationFactor = l3Cache.GetNormalizationFactor(l3Cache.GetThreadCount() + ccxThreadIdx); 150 | ulong aperf, mperf, tsc, irperfcount; 151 | l3Cache.ReadFixedCounters(ccxThreadIdx, out aperf, out irperfcount, out tsc, out mperf); 152 | totalAperf += aperf; 153 | totalIrPerfCount += irperfcount; 154 | totalTsc += tsc; 155 | totalMperf += mperf; 156 | float clk = tsc * ((float)aperf / mperf) * normalizationFactor; 157 | if (clk > ccxClocks[ccxThread.Key]) ccxClocks[ccxThread.Key] = clk; 158 | if (ccxThreadIdx == ccxThread.Value) 159 | { 160 | l3Cache.UpdateCcxL3CounterData(ccxThread.Key, ccxThread.Value); 161 | results.unitMetrics[ccxThread.Key] = computeMetrics("CCX " + ccxThread.Key, l3Cache.ccxCounterData[ccxThread.Key], ccxClocks[ccxThread.Key]); 162 | } 163 | } 164 | } 165 | 166 | float avgClk = 0; 167 | foreach (float ccxClock in ccxClocks) avgClk += ccxClock; 168 | avgClk /= l3Cache.allCcxThreads.Count(); 169 | results.overallMetrics = computeMetrics("Overall", l3Cache.ccxTotals, avgClk); 170 | results.overallCounterValues = l3Cache.GetOverallL3CounterValues(totalAperf, totalMperf, totalIrPerfCount, totalTsc, 171 | "L3Access", "L3MissLat/16", "L3MissSdpReq", "L3MissesForLatencyCalculation", "L3Miss", "Unused"); 172 | return results; 173 | } 174 | 175 | public string[] columns = new string[] { "Item", "Clk", "Hitrate", "Hit BW", "Mem Latency", "Mem Latency?", "Pend. Miss/C", "SDP Requests", "SDP Requests * 64B" }; 176 | 177 | public string GetHelpText() { return ""; } 178 | 179 | private string[] computeMetrics(string label, L3CounterData counterData, float clk) 180 | { 181 | // event 0x90 counts "total cycles for all transactions divided by 16" 182 | float ccxL3MissLatency = (float)counterData.ctr1 * 16 / counterData.ctr3; 183 | float ccxL3Hitrate = (1 - (float)counterData.ctr4 / counterData.ctr0) * 100; 184 | float ccxL3HitBw = ((float)counterData.ctr0 - counterData.ctr4) * 64; 185 | return new string[] { label, 186 | FormatLargeNumber(clk), 187 | string.Format("{0:F2}%", ccxL3Hitrate), 188 | FormatLargeNumber(ccxL3HitBw) + "B/s", 189 | string.Format("{0:F1} clks", ccxL3MissLatency), 190 | string.Format("{0:F1} ns", (1000000000 / clk) * ccxL3MissLatency), 191 | string.Format("{0:F2}", counterData.ctr1 * 16 / clk), 192 | FormatLargeNumber(counterData.ctr2), 193 | FormatLargeNumber(counterData.ctr2 * 64) + "B/s"}; 194 | } 195 | } 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /AMD/Zen4DataFabric.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using PmcReader.Interop; 4 | 5 | namespace PmcReader.AMD 6 | { 7 | public class Zen4DataFabric : Amd19hCpu 8 | { 9 | public enum DfType 10 | { 11 | Client = 0 12 | } 13 | 14 | public Zen4DataFabric(DfType dfType) 15 | { 16 | architectureName = "Zen 4 Data Fabric"; 17 | List monitoringConfigList = new List(); 18 | if (dfType == DfType.Client) monitoringConfigList.Add(new ClientBwConfig(this)); 19 | monitoringConfigList.Add(new CSConfig(this)); 20 | monitoringConfigList.Add(new UMCConfig(this)); 21 | monitoringConfigs = monitoringConfigList.ToArray(); 22 | } 23 | 24 | public class ClientBwConfig : MonitoringConfig 25 | { 26 | private Zen4DataFabric dataFabric; 27 | private long lastUpdateTime; 28 | private const int monitoringThread = 1; 29 | 30 | public string[] columns = new string[] { "Item", "Count * 64B", "Count", "Pkg Pwr" }; 31 | public string GetHelpText() { return ""; } 32 | public ClientBwConfig(Zen4DataFabric dataFabric) 33 | { 34 | this.dataFabric = dataFabric; 35 | // does not work 36 | // dataFabric.GetUmcPerfmonInfo(out uint umcCount, out uint umcPerfcounterCount); 37 | // Console.WriteLine("Have {0} UMCs, {1} perf counters", umcCount, umcPerfcounterCount); 38 | } 39 | 40 | public string GetConfigName() { return "DRAM Bandwidth??"; } 41 | public string[] GetColumns() { return columns; } 42 | public void Initialize() 43 | { 44 | ThreadAffinity.Set(1UL << monitoringThread); 45 | ulong evt0 = GetDramPerfEvent(true, 0); 46 | ulong evt1 = GetDramPerfEvent(true, 0) + 0x20; 47 | ulong evt2 = GetDramPerfEvent(false, 11); 48 | ulong evt3 = GetDramPerfEvent(false, 0); 49 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, evt0); // ch0 read? 50 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, evt1); // ch0 write? 51 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, evt2);// ch1 read? 52 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, evt3); // ch1 write? 53 | 54 | dataFabric.InitializeCoreTotals(); 55 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); 56 | } 57 | 58 | private ulong GetDramPerfEvent(bool read, uint index) 59 | { 60 | ulong dramEventBase = 0x740F00F; 61 | if (read) dramEventBase |= 0xE00; 62 | else dramEventBase |= 0xF00; 63 | 64 | index = index * 4 + 1; 65 | dramEventBase |= (index & 0xF) << 4; 66 | dramEventBase |= (index & 0xF0) << 28; 67 | return dramEventBase; 68 | } 69 | 70 | public MonitoringUpdateResults Update() 71 | { 72 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime); 73 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 74 | ThreadAffinity.Set(1UL << monitoringThread); 75 | ulong ctr0 = ReadAndClearMsr(MSR_DF_PERF_CTR_0); 76 | ulong ctr1 = ReadAndClearMsr(MSR_DF_PERF_CTR_1); 77 | ulong ctr2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2); 78 | ulong ctr3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3); 79 | 80 | dataFabric.ReadPackagePowerCounter(); 81 | results.unitMetrics = new string[4][]; 82 | results.unitMetrics[0] = new string[] { "DRAM Read?", FormatLargeNumber(ctr0 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr0 * normalizationFactor), "N/A" }; 83 | results.unitMetrics[1] = new string[] { "Write 0?", FormatLargeNumber(ctr1 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr1 * normalizationFactor), "N/A" }; 84 | results.unitMetrics[2] = new string[] { "iGPU Related?", FormatLargeNumber(ctr2 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr2 * normalizationFactor), "N/A" }; 85 | results.unitMetrics[3] = new string[] { "Write 2?", FormatLargeNumber(ctr3 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr3 * normalizationFactor), "N/A" }; 86 | 87 | ulong total = ctr0 + ctr1 + ctr2 + ctr3; 88 | results.overallMetrics = new string[] { "Total", 89 | FormatLargeNumber(total * normalizationFactor * 64) + "B/s", 90 | FormatLargeNumber(total * normalizationFactor), 91 | string.Format("{0:F2} W", dataFabric.NormalizedTotalCounts.watts) 92 | }; 93 | 94 | results.overallCounterValues = new Tuple[5]; 95 | results.overallCounterValues[0] = new Tuple("Package Power", dataFabric.NormalizedTotalCounts.watts); 96 | results.overallCounterValues[1] = new Tuple("Ch 0 Read?", ctr0); 97 | results.overallCounterValues[2] = new Tuple("Ch 0 Write?", ctr1); 98 | results.overallCounterValues[3] = new Tuple("Ch 1 Read?", ctr2); 99 | results.overallCounterValues[4] = new Tuple("Ch 1 Write?", ctr3); 100 | return results; 101 | } 102 | } 103 | 104 | public class CSConfig : MonitoringConfig 105 | { 106 | private Zen4DataFabric dataFabric; 107 | private long lastUpdateTime; 108 | private const int monitoringThread = 1; 109 | 110 | public string[] columns = new string[] { "Item", "Count * 64B", "Count", "Pkg Pwr" }; 111 | public string GetHelpText() { return ""; } 112 | public CSConfig(Zen4DataFabric dataFabric) 113 | { 114 | this.dataFabric = dataFabric; 115 | } 116 | 117 | public string GetConfigName() { return "Coherent Station?"; } 118 | public string[] GetColumns() { return columns; } 119 | public void Initialize() 120 | { 121 | ThreadAffinity.Set(1UL << monitoringThread); 122 | ulong evt0 = GetDFPerfCtlValue(0x1f, 0, 0xfe, 0x7, true); // cs0 read 123 | ulong evt1 = GetDFPerfCtlValue(0x5f, 0, 0xfe, 0x7, true); // cs1 read 124 | ulong evt2 = GetDFPerfCtlValue(0x1f, 0, 0xff, 0x7, true); // cs0 write 125 | ulong evt3 = GetDFPerfCtlValue(0x5f, 0, 0xff, 0x7, true); // cs1 write 126 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, evt0); 127 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, evt1); 128 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, evt2); 129 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, evt3); 130 | 131 | dataFabric.InitializeCoreTotals(); 132 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); 133 | } 134 | 135 | public MonitoringUpdateResults Update() 136 | { 137 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime); 138 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 139 | ThreadAffinity.Set(1UL << monitoringThread); 140 | ulong ctr0 = ReadAndClearMsr(MSR_DF_PERF_CTR_0); 141 | ulong ctr1 = ReadAndClearMsr(MSR_DF_PERF_CTR_1); 142 | ulong ctr2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2); 143 | ulong ctr3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3); 144 | 145 | dataFabric.ReadPackagePowerCounter(); 146 | results.unitMetrics = new string[4][]; 147 | results.unitMetrics[0] = new string[] { "CS0 Read", FormatLargeNumber(ctr0 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr0 * normalizationFactor), "N/A" }; 148 | results.unitMetrics[1] = new string[] { "CS1 Read", FormatLargeNumber(ctr1 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr1 * normalizationFactor), "N/A" }; 149 | results.unitMetrics[2] = new string[] { "CS0 Write", FormatLargeNumber(ctr2 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr2 * normalizationFactor), "N/A" }; 150 | results.unitMetrics[3] = new string[] { "CS1 Write", FormatLargeNumber(ctr3 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr3 * normalizationFactor), "N/A" }; 151 | 152 | ulong total = ctr0 + ctr1 + ctr2 + ctr3; 153 | results.overallMetrics = new string[] { "Total", 154 | FormatLargeNumber(total * normalizationFactor * 64) + "B/s", 155 | FormatLargeNumber(total * normalizationFactor), 156 | string.Format("{0:F2} W", dataFabric.NormalizedTotalCounts.watts) 157 | }; 158 | 159 | results.overallCounterValues = new Tuple[5]; 160 | results.overallCounterValues[0] = new Tuple("Package Power", dataFabric.NormalizedTotalCounts.watts); 161 | results.overallCounterValues[1] = new Tuple("Ch 0 Read?", ctr0); 162 | results.overallCounterValues[2] = new Tuple("Ch 0 Write?", ctr1); 163 | results.overallCounterValues[3] = new Tuple("Ch 1 Read?", ctr2); 164 | results.overallCounterValues[4] = new Tuple("Ch 1 Write?", ctr3); 165 | return results; 166 | } 167 | } 168 | 169 | public class UMCConfig : MonitoringConfig 170 | { 171 | private Zen4DataFabric dataFabric; 172 | private long lastUpdateTime; 173 | private const int monitoringThread = 1; 174 | 175 | public string[] columns = new string[] { "Item", "Count * 64B", "Count", "Pkg Pwr" }; 176 | public string GetHelpText() { return ""; } 177 | public UMCConfig(Zen4DataFabric dataFabric) 178 | { 179 | this.dataFabric = dataFabric; 180 | } 181 | 182 | public string GetConfigName() { return "UMC?"; } 183 | public string[] GetColumns() { return columns; } 184 | public void Initialize() 185 | { 186 | ThreadAffinity.Set(1UL << monitoringThread); 187 | 188 | ulong hwcrValue; 189 | Ring0.ReadMsr(HWCR, out hwcrValue); 190 | hwcrValue |= 1UL << 30; // instructions retired counter 191 | hwcrValue |= 1UL << 31; // enable UMC counters 192 | Ring0.WriteMsr(HWCR, hwcrValue); 193 | Ring0.ReadMsr(HWCR, out hwcrValue); 194 | 195 | ulong evt0 = GetUmcPerfCtlValue(0, false, false); // clk 196 | ulong evt1 = GetUmcPerfCtlValue(0xa, false, false); // cas 197 | ulong evt2 = GetUmcPerfCtlValue(0x5, false, false); // activate 198 | ulong evt3 = GetUmcPerfCtlValue(0x6, false, false); // precharge 199 | Ring0.WriteMsr(MSR_UMC_PERF_CTL_base, evt0); 200 | Ring0.WriteMsr(MSR_UMC_PERF_CTL_base + MSR_UMC_PERF_increment, evt1); 201 | Ring0.WriteMsr(MSR_UMC_PERF_CTL_base + MSR_UMC_PERF_increment * 2, evt2); 202 | Ring0.WriteMsr(MSR_UMC_PERF_CTL_base + MSR_UMC_PERF_increment * 3, evt3); 203 | 204 | dataFabric.InitializeCoreTotals(); 205 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); 206 | } 207 | 208 | public MonitoringUpdateResults Update() 209 | { 210 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime); 211 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 212 | ThreadAffinity.Set(1UL << monitoringThread); 213 | ulong ctr0 = ReadAndClearMsr(MSR_UMC_PERF_CTR_base); 214 | ulong ctr1 = ReadAndClearMsr(MSR_UMC_PERF_CTR_base + MSR_UMC_PERF_increment); 215 | ulong ctr2 = ReadAndClearMsr(MSR_UMC_PERF_CTR_base + MSR_UMC_PERF_increment * 2); 216 | ulong ctr3 = ReadAndClearMsr(MSR_UMC_PERF_CTR_base + MSR_UMC_PERF_increment * 3); 217 | 218 | dataFabric.ReadPackagePowerCounter(); 219 | results.unitMetrics = new string[4][]; 220 | results.unitMetrics[0] = new string[] { "Clk?", FormatLargeNumber(ctr0 * normalizationFactor) + "Hz", "N/A", "N/A" }; 221 | results.unitMetrics[1] = new string[] { "CS1 Read", FormatLargeNumber(ctr1 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr1 * normalizationFactor), "N/A" }; 222 | results.unitMetrics[2] = new string[] { "CS0 Write", FormatLargeNumber(ctr2 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr2 * normalizationFactor), "N/A" }; 223 | results.unitMetrics[3] = new string[] { "CS1 Write", FormatLargeNumber(ctr3 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr3 * normalizationFactor), "N/A" }; 224 | 225 | ulong total = ctr0 + ctr1 + ctr2 + ctr3; 226 | results.overallMetrics = new string[] { "Total", 227 | FormatLargeNumber(total * normalizationFactor * 64) + "B/s", 228 | FormatLargeNumber(total * normalizationFactor), 229 | string.Format("{0:F2} W", dataFabric.NormalizedTotalCounts.watts) 230 | }; 231 | 232 | results.overallCounterValues = new Tuple[5]; 233 | results.overallCounterValues[0] = new Tuple("Package Power", dataFabric.NormalizedTotalCounts.watts); 234 | results.overallCounterValues[1] = new Tuple("Ch 0 Read?", ctr0); 235 | results.overallCounterValues[2] = new Tuple("Ch 0 Write?", ctr1); 236 | results.overallCounterValues[3] = new Tuple("Ch 1 Read?", ctr2); 237 | results.overallCounterValues[4] = new Tuple("Ch 1 Write?", ctr3); 238 | return results; 239 | } 240 | } 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /AMD/Zen5L3Cache.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using PmcReader.Interop; 5 | 6 | namespace PmcReader.AMD 7 | { 8 | public class Zen5L3Cache : Amd19hCpu 9 | { 10 | // ccx -> thread id mapping. Just need one thread per ccx - we'll always sample using that thread 11 | protected Dictionary ccxSampleThreads; 12 | protected Dictionary> allCcxThreads; 13 | public L3CounterData[] ccxCounterData; 14 | public L3CounterData ccxTotals; 15 | 16 | public Zen5L3Cache() 17 | { 18 | architectureName = "Zen 5 L3"; 19 | ccxSampleThreads = new Dictionary(); 20 | allCcxThreads = new Dictionary>(); 21 | ccxSampleThreads[0] = 0; 22 | ccxSampleThreads[1] = 12; 23 | allCcxThreads[0] = new List() { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; 24 | allCcxThreads[1] = new List() { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }; 25 | 26 | monitoringConfigs = new MonitoringConfig[1]; 27 | monitoringConfigs[0] = new HitRateLatencyConfig(this); 28 | 29 | ccxCounterData = new L3CounterData[ccxSampleThreads.Count()]; 30 | ccxTotals = new L3CounterData(); 31 | } 32 | 33 | public class L3CounterData 34 | { 35 | public float ctr0; 36 | public float ctr1; 37 | public float ctr2; 38 | public float ctr3; 39 | public float ctr4; 40 | public float ctr5; 41 | } 42 | 43 | public void ClearTotals() 44 | { 45 | ccxTotals.ctr0 = 0; 46 | ccxTotals.ctr1 = 0; 47 | ccxTotals.ctr2 = 0; 48 | ccxTotals.ctr3 = 0; 49 | ccxTotals.ctr4 = 0; 50 | ccxTotals.ctr5 = 0; 51 | } 52 | 53 | public void UpdateCcxL3CounterData(int ccxIdx, int threadIdx) 54 | { 55 | ThreadAffinity.Set(1UL << threadIdx); 56 | float normalizationFactor = GetNormalizationFactor(threadIdx); 57 | ulong ctr0 = ReadAndClearMsr(MSR_L3_PERF_CTR_0); 58 | ulong ctr1 = ReadAndClearMsr(MSR_L3_PERF_CTR_1); 59 | ulong ctr2 = ReadAndClearMsr(MSR_L3_PERF_CTR_2); 60 | ulong ctr3 = ReadAndClearMsr(MSR_L3_PERF_CTR_3); 61 | ulong ctr4 = ReadAndClearMsr(MSR_L3_PERF_CTR_4); 62 | ulong ctr5 = ReadAndClearMsr(MSR_L3_PERF_CTR_5); 63 | 64 | if (ccxCounterData[ccxIdx] == null) ccxCounterData[ccxIdx] = new L3CounterData(); 65 | ccxCounterData[ccxIdx].ctr0 = ctr0 * normalizationFactor; 66 | ccxCounterData[ccxIdx].ctr1 = ctr1 * normalizationFactor; 67 | ccxCounterData[ccxIdx].ctr2 = ctr2 * normalizationFactor; 68 | ccxCounterData[ccxIdx].ctr3 = ctr3 * normalizationFactor; 69 | ccxCounterData[ccxIdx].ctr4 = ctr4 * normalizationFactor; 70 | ccxCounterData[ccxIdx].ctr5 = ctr5 * normalizationFactor; 71 | ccxTotals.ctr0 += ccxCounterData[ccxIdx].ctr0; 72 | ccxTotals.ctr1 += ccxCounterData[ccxIdx].ctr1; 73 | ccxTotals.ctr2 += ccxCounterData[ccxIdx].ctr2; 74 | ccxTotals.ctr3 += ccxCounterData[ccxIdx].ctr3; 75 | ccxTotals.ctr4 += ccxCounterData[ccxIdx].ctr4; 76 | ccxTotals.ctr5 += ccxCounterData[ccxIdx].ctr5; 77 | } 78 | 79 | public Tuple[] GetOverallL3CounterValues(ulong aperf, ulong mperf, ulong irperfcount, ulong tsc, 80 | string ctr0, string ctr1, string ctr2, string ctr3, string ctr4, string ctr5) 81 | { 82 | Tuple[] retval = new Tuple[10]; 83 | retval[0] = new Tuple("APERF", aperf); 84 | retval[1] = new Tuple("MPERF", mperf); 85 | retval[2] = new Tuple("TSC", tsc); 86 | retval[3] = new Tuple("IRPerfCount", irperfcount); 87 | retval[4] = new Tuple(ctr0, ccxTotals.ctr0); 88 | retval[5] = new Tuple(ctr1, ccxTotals.ctr1); 89 | retval[6] = new Tuple(ctr2, ccxTotals.ctr2); 90 | retval[7] = new Tuple(ctr3, ccxTotals.ctr3); 91 | retval[8] = new Tuple(ctr4, ccxTotals.ctr4); 92 | retval[9] = new Tuple(ctr5, ccxTotals.ctr5); 93 | return retval; 94 | } 95 | 96 | public class HitRateLatencyConfig : MonitoringConfig 97 | { 98 | private Zen5L3Cache l3Cache; 99 | 100 | public HitRateLatencyConfig(Zen5L3Cache l3Cache) 101 | { 102 | this.l3Cache = l3Cache; 103 | } 104 | 105 | public string GetConfigName() { return "Hitrate and Latency"; } 106 | public string[] GetColumns() { return columns; } 107 | public void Initialize() 108 | { 109 | 110 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads) 111 | { 112 | ThreadAffinity.Set(1UL << ccxThread.Value); 113 | InitializeThread(); 114 | } 115 | } 116 | 117 | private void InitializeThread() 118 | { 119 | // L3 tag lookup state, all coherent accesses to L3 120 | ulong L3AccessPerfCtl = Get1AhL3PerfCtlValue(0x4, 0xFF, true, 0, true, true, 0, threadMask: 3); 121 | ulong L3MissPerfCtl = Get1AhL3PerfCtlValue(0x4, 1, true, 0, true, true, 0, threadMask: 3); 122 | 123 | // bit 2,3 of unit mask = near,far ccx's cache 124 | ulong L3MissLatencyOtherCacheReqs = Get19hL3PerfCtlValue(0xAD, 0b1100, true, 0, true, enableAllSlices: true, sliceId: 0x3, 0b11); 125 | ulong L3MissLatencyOtherCache = Get19hL3PerfCtlValue(0xAC, 0b1100, true, 0, true, enableAllSlices: true, sliceId: 0x3, 0b11); 126 | 127 | // bits 0,1 of unit mask = near,far dram 128 | ulong L3MissLatencyDramReqs = Get19hL3PerfCtlValue(0xAD, 0b11, true, 0, true, enableAllSlices: true, sliceId: 0x3, 0b11); 129 | ulong L3MissLatencyDram = Get19hL3PerfCtlValue(0xAC, 0b11, true, 0, true, enableAllSlices: true, sliceId: 0x3, 0b11); 130 | 131 | Ring0.WriteMsr(MSR_L3_PERF_CTL_0, L3AccessPerfCtl); 132 | Ring0.WriteMsr(MSR_L3_PERF_CTL_1, L3MissPerfCtl); 133 | Ring0.WriteMsr(MSR_L3_PERF_CTL_2, L3MissLatencyOtherCacheReqs); 134 | Ring0.WriteMsr(MSR_L3_PERF_CTL_3, L3MissLatencyOtherCache); 135 | Ring0.WriteMsr(MSR_L3_PERF_CTL_4, L3MissLatencyDramReqs); 136 | Ring0.WriteMsr(MSR_L3_PERF_CTL_5, L3MissLatencyDram); 137 | 138 | } 139 | 140 | public MonitoringUpdateResults Update() 141 | { 142 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 143 | results.unitMetrics = new string[l3Cache.ccxSampleThreads.Count()][]; 144 | float[] ccxClocks = new float[l3Cache.allCcxThreads.Count()]; 145 | l3Cache.ClearTotals(); 146 | ulong totalAperf = 0, totalMperf = 0, totalTsc = 0, totalIrPerfCount = 0; 147 | List> overallCounterValues = new List>(); 148 | 149 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads) 150 | { 151 | // Try to determine frequency, by getting max frequency of cores in ccx 152 | foreach (int ccxThreadIdx in l3Cache.allCcxThreads[ccxThread.Key]) 153 | { 154 | ThreadAffinity.Set(1UL << ccxThreadIdx); 155 | float normalizationFactor = l3Cache.GetNormalizationFactor(l3Cache.GetThreadCount() + ccxThreadIdx); 156 | ulong aperf, mperf, tsc, irperfcount; 157 | l3Cache.ReadFixedCounters(ccxThreadIdx, out aperf, out irperfcount, out tsc, out mperf); 158 | totalAperf += aperf; 159 | totalIrPerfCount += irperfcount; 160 | totalTsc += tsc; 161 | totalMperf += mperf; 162 | float clk = tsc * ((float)aperf / mperf) * normalizationFactor; 163 | if (clk > ccxClocks[ccxThread.Key]) ccxClocks[ccxThread.Key] = clk; 164 | if (ccxThreadIdx == ccxThread.Value) 165 | { 166 | l3Cache.UpdateCcxL3CounterData(ccxThread.Key, ccxThread.Value); 167 | InitializeThread(); // somehow these get cleared every once in a while? 168 | results.unitMetrics[ccxThread.Key] = computeMetrics("CCX " + ccxThread.Key, l3Cache.ccxCounterData[ccxThread.Key], ccxClocks[ccxThread.Key]); 169 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " L3 Access", l3Cache.ccxCounterData[ccxThread.Key].ctr0)); 170 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " L3 Miss", l3Cache.ccxCounterData[ccxThread.Key].ctr1)); 171 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " Other CCX Sampled Reqs", l3Cache.ccxCounterData[ccxThread.Key].ctr2)); 172 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " Other CCX Sampled Latency", l3Cache.ccxCounterData[ccxThread.Key].ctr3)); 173 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " DRAM Sampled Reqs", l3Cache.ccxCounterData[ccxThread.Key].ctr4)); 174 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " DRAM Sampled Latency", l3Cache.ccxCounterData[ccxThread.Key].ctr5)); 175 | } 176 | } 177 | } 178 | 179 | overallCounterValues.Add(new Tuple("APERF", totalAperf)); 180 | overallCounterValues.Add(new Tuple("MPERF", totalMperf)); 181 | overallCounterValues.Add(new Tuple("REF_TSC", totalTsc)); 182 | overallCounterValues.Add(new Tuple("IrPerfCount", totalIrPerfCount)); 183 | 184 | float avgClk = 0; 185 | foreach (float ccxClock in ccxClocks) avgClk += ccxClock; 186 | avgClk /= l3Cache.allCcxThreads.Count(); 187 | results.overallMetrics = computeMetrics("Overall", l3Cache.ccxTotals, avgClk); 188 | /*results.overallCounterValues = l3Cache.GetOverallL3CounterValues(totalAperf, totalMperf, totalIrPerfCount, totalTsc, 189 | "Coherent L3 Access", "L3 Miss", "Other CCX Reqs", "Other CCX Pending Reqs Per Cycle", "DRAM Reqs", "DRAM Pending Reqs Per Cycle");*/ 190 | results.overallCounterValues = overallCounterValues.ToArray(); 191 | return results; 192 | } 193 | 194 | public string[] columns = new string[] { "Item", "Clk", "Hitrate", "Hit BW", "Miss BW", "Latency, Other CCX", "Latency, DRAM" }; 195 | 196 | public string GetHelpText() { return ""; } 197 | 198 | private string[] computeMetrics(string label, L3CounterData counterData, float clk) 199 | { 200 | // average sampled latency is XiSampledLatency / XiSampledLatencyRequests * 10 ns 201 | float ccxL3MissLatencyNs = (float)10 * counterData.ctr3 / counterData.ctr2; 202 | float dramL3MissLatencyNs = (float)10 * counterData.ctr5 / counterData.ctr4; 203 | float ccxL3Hitrate = (1 - (float)counterData.ctr1 / counterData.ctr0) * 100; 204 | float ccxL3HitBw = ((float)counterData.ctr0 - counterData.ctr1) * 64; 205 | return new string[] { label, 206 | FormatLargeNumber(clk), 207 | string.Format("{0:F2}%", ccxL3Hitrate), 208 | FormatLargeNumber(ccxL3HitBw) + "B/s", 209 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s", 210 | string.Format("{0:F1} ns", ccxL3MissLatencyNs), 211 | string.Format("{0:F1} ns", dramL3MissLatencyNs)}; 212 | } 213 | } 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /AMD/ZenL3Cache.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using PmcReader.Interop; 5 | 6 | namespace PmcReader.AMD 7 | { 8 | public class ZenL3Cache : Amd17hCpu 9 | { 10 | // ccx -> thread id mapping. Just need one thread per ccx - we'll always sample using that thread 11 | protected Dictionary ccxSampleThreads; 12 | protected Dictionary> allCcxThreads; 13 | public L3CounterData[] ccxCounterData; 14 | public L3CounterData ccxTotals; 15 | 16 | public ZenL3Cache() 17 | { 18 | architectureName = "Zen L3"; 19 | ccxSampleThreads = new Dictionary(); 20 | allCcxThreads = new Dictionary>(); 21 | for (int threadIdx = 0; threadIdx < GetThreadCount(); threadIdx++) 22 | { 23 | int ccxIdx = GetCcxId(threadIdx); 24 | ccxSampleThreads[ccxIdx] = threadIdx; 25 | List ccxThreads; 26 | if (! allCcxThreads.TryGetValue(ccxIdx, out ccxThreads)) 27 | { 28 | ccxThreads = new List(); 29 | allCcxThreads.Add(ccxIdx, ccxThreads); 30 | } 31 | 32 | ccxThreads.Add(threadIdx); 33 | } 34 | 35 | monitoringConfigs = new MonitoringConfig[1]; 36 | monitoringConfigs[0] = new HitRateLatencyConfig(this); 37 | 38 | ccxCounterData = new L3CounterData[ccxSampleThreads.Count()]; 39 | ccxTotals = new L3CounterData(); 40 | } 41 | 42 | public class L3CounterData 43 | { 44 | public float ctr0; 45 | public float ctr1; 46 | public float ctr2; 47 | public float ctr3; 48 | public float ctr4; 49 | public float ctr5; 50 | } 51 | 52 | public void ClearTotals() 53 | { 54 | ccxTotals.ctr0 = 0; 55 | ccxTotals.ctr1 = 0; 56 | ccxTotals.ctr2 = 0; 57 | ccxTotals.ctr3 = 0; 58 | ccxTotals.ctr4 = 0; 59 | ccxTotals.ctr5 = 0; 60 | } 61 | 62 | public void UpdateCcxL3CounterData(int ccxIdx, int threadIdx) 63 | { 64 | ThreadAffinity.Set(1UL << threadIdx); 65 | float normalizationFactor = GetNormalizationFactor(threadIdx); 66 | ulong ctr0 = ReadAndClearMsr(MSR_L3_PERF_CTR_0); 67 | ulong ctr1 = ReadAndClearMsr(MSR_L3_PERF_CTR_1); 68 | ulong ctr2 = ReadAndClearMsr(MSR_L3_PERF_CTR_2); 69 | ulong ctr3 = ReadAndClearMsr(MSR_L3_PERF_CTR_3); 70 | ulong ctr4 = ReadAndClearMsr(MSR_L3_PERF_CTR_4); 71 | ulong ctr5 = ReadAndClearMsr(MSR_L3_PERF_CTR_5); 72 | 73 | if (ccxCounterData[ccxIdx] == null) ccxCounterData[ccxIdx] = new L3CounterData(); 74 | ccxCounterData[ccxIdx].ctr0 = ctr0 * normalizationFactor; 75 | ccxCounterData[ccxIdx].ctr1 = ctr1 * normalizationFactor; 76 | ccxCounterData[ccxIdx].ctr2 = ctr2 * normalizationFactor; 77 | ccxCounterData[ccxIdx].ctr3 = ctr3 * normalizationFactor; 78 | ccxCounterData[ccxIdx].ctr4 = ctr4 * normalizationFactor; 79 | ccxCounterData[ccxIdx].ctr5 = ctr5 * normalizationFactor; 80 | ccxTotals.ctr0 += ccxCounterData[ccxIdx].ctr0; 81 | ccxTotals.ctr1 += ccxCounterData[ccxIdx].ctr1; 82 | ccxTotals.ctr2 += ccxCounterData[ccxIdx].ctr2; 83 | ccxTotals.ctr3 += ccxCounterData[ccxIdx].ctr3; 84 | ccxTotals.ctr4 += ccxCounterData[ccxIdx].ctr4; 85 | ccxTotals.ctr5 += ccxCounterData[ccxIdx].ctr5; 86 | } 87 | 88 | public Tuple[] GetOverallL3CounterValues(ulong aperf, ulong mperf, ulong irperfcount, ulong tsc, 89 | string ctr0, string ctr1, string ctr2, string ctr3, string ctr4, string ctr5) 90 | { 91 | Tuple[] retval = new Tuple[10]; 92 | retval[0] = new Tuple("APERF", aperf); 93 | retval[1] = new Tuple("MPERF", mperf); 94 | retval[2] = new Tuple("TSC", tsc); 95 | retval[3] = new Tuple("IRPerfCount", irperfcount); 96 | retval[4] = new Tuple(ctr0, ccxTotals.ctr0); 97 | retval[5] = new Tuple(ctr1, ccxTotals.ctr1); 98 | retval[6] = new Tuple(ctr2, ccxTotals.ctr2); 99 | retval[7] = new Tuple(ctr3, ccxTotals.ctr3); 100 | retval[8] = new Tuple(ctr4, ccxTotals.ctr4); 101 | retval[9] = new Tuple(ctr5, ccxTotals.ctr5); 102 | return retval; 103 | } 104 | 105 | public class HitRateLatencyConfig : MonitoringConfig 106 | { 107 | private ZenL3Cache l3Cache; 108 | 109 | public HitRateLatencyConfig(ZenL3Cache l3Cache) 110 | { 111 | this.l3Cache = l3Cache; 112 | } 113 | 114 | public string GetConfigName() { return "Hitrate and Miss Latency"; } 115 | public string[] GetColumns() { return columns; } 116 | public void Initialize() 117 | { 118 | ulong L3AccessPerfCtl = GetL3PerfCtlValue(0x01, 1 << 7, true, 0xF, 0xFF); // bit 7 = caching: L3 cache access 119 | ulong L3MissPerfCtl = GetL3PerfCtlValue(0x06, 0x01, true, 0xF, 0xFF); // bit 0 = requestmiss 120 | ulong L3MissLatencyCtl = GetL3PerfCtlValue(0x90, 0, true, 0xF, 0xFF); 121 | ulong L3MissSdpRequestPerfCtl = GetL3PerfCtlValue(0x9A, 0x1F, true, 0xF, 0xFF); 122 | ulong L3ReqLookupState = GetL3PerfCtlValue(0x04, 0xFF, true, 0xF, 0xFF); 123 | ulong L3ReqMiss = GetL3PerfCtlValue(0x04, 0x1, true, 0xF, 0xFF); 124 | 125 | foreach(KeyValuePair ccxThread in l3Cache.ccxSampleThreads) 126 | { 127 | ThreadAffinity.Set(1UL << ccxThread.Value); 128 | Ring0.WriteMsr(MSR_L3_PERF_CTL_0, L3AccessPerfCtl); 129 | Ring0.WriteMsr(MSR_L3_PERF_CTL_1, L3MissPerfCtl); 130 | Ring0.WriteMsr(MSR_L3_PERF_CTL_2, L3MissLatencyCtl); 131 | Ring0.WriteMsr(MSR_L3_PERF_CTL_3, L3MissSdpRequestPerfCtl); 132 | Ring0.WriteMsr(MSR_L3_PERF_CTL_4, L3ReqLookupState); 133 | Ring0.WriteMsr(MSR_L3_PERF_CTL_5, L3ReqMiss); 134 | } 135 | } 136 | 137 | public MonitoringUpdateResults Update() 138 | { 139 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 140 | results.unitMetrics = new string[l3Cache.ccxSampleThreads.Count()][]; 141 | float[] ccxClocks = new float[l3Cache.allCcxThreads.Count()]; 142 | l3Cache.ClearTotals(); 143 | ulong totalAperf = 0, totalMperf = 0, totalTsc = 0, totalIrPerfCount = 0; 144 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads) 145 | { 146 | // Try to determine frequency, by getting max frequency of cores in ccx 147 | foreach (int ccxThreadIdx in l3Cache.allCcxThreads[ccxThread.Key]) 148 | { 149 | ThreadAffinity.Set(1UL << ccxThreadIdx); 150 | float normalizationFactor = l3Cache.GetNormalizationFactor(l3Cache.GetThreadCount() + ccxThreadIdx); 151 | ulong aperf, mperf, tsc, irperfcount; 152 | l3Cache.ReadFixedCounters(ccxThreadIdx, out aperf, out irperfcount, out tsc, out mperf); 153 | totalAperf += aperf; 154 | totalIrPerfCount += irperfcount; 155 | totalTsc += tsc; 156 | totalMperf += mperf; 157 | float clk = tsc * ((float)aperf / mperf) * normalizationFactor; 158 | if (clk > ccxClocks[ccxThread.Key]) ccxClocks[ccxThread.Key] = clk; 159 | if (ccxThreadIdx == ccxThread.Value) 160 | { 161 | l3Cache.UpdateCcxL3CounterData(ccxThread.Key, ccxThread.Value); 162 | results.unitMetrics[ccxThread.Key] = computeMetrics("CCX " + ccxThread.Key, l3Cache.ccxCounterData[ccxThread.Key], ccxClocks[ccxThread.Key]); 163 | } 164 | } 165 | } 166 | 167 | float avgClk = 0; 168 | foreach (float ccxClock in ccxClocks) avgClk += ccxClock; 169 | avgClk /= l3Cache.allCcxThreads.Count(); 170 | results.overallMetrics = computeMetrics("Overall", l3Cache.ccxTotals, avgClk); 171 | results.overallCounterValues = l3Cache.GetOverallL3CounterValues(totalAperf, totalMperf, totalIrPerfCount, totalTsc, 172 | "L3Access", "L3Miss", "L3MissLat/16", "L3MissSdpReq", "Unused", "Unused"); 173 | return results; 174 | } 175 | 176 | public string[] columns = new string[] { "Item", "Clk", "Hitrate", "Hit BW", "Mem Latency", "Mem Latency?", "Pend. Miss/C", "SDP Requests", "SDP Requests * 64B" }; 177 | 178 | public string GetHelpText() { return ""; } 179 | 180 | private string[] computeMetrics(string label, L3CounterData counterData, float clk) 181 | { 182 | // event 0x90 counts "total cycles for all transactions divided by 16" 183 | float ccxL3MissLatency = (float)counterData.ctr2 * 16 / counterData.ctr3; 184 | float ccxL3Hitrate = (1 - (float)counterData.ctr1 / counterData.ctr0) * 100; 185 | float ccxL3HitBw = ((float)counterData.ctr0 - counterData.ctr1) * 64; 186 | return new string[] { label, 187 | FormatLargeNumber(clk), 188 | string.Format("{0:F2}%", ccxL3Hitrate), 189 | FormatLargeNumber(ccxL3HitBw) + "B/s", 190 | string.Format("{0:F1} clks", ccxL3MissLatency), 191 | string.Format("{0:F1} ns", (1000000000 / clk) * ccxL3MissLatency), 192 | string.Format("{0:F2}", counterData.ctr2 * 16 / clk), 193 | FormatLargeNumber(counterData.ctr3), 194 | FormatLargeNumber(counterData.ctr3 * 64) + "B/s"}; 195 | } 196 | } 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /App.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /Cpu.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Threading; 3 | using System.Windows.Forms; 4 | 5 | namespace PmcReader 6 | { 7 | public interface MonitoringArea 8 | { 9 | MonitoringConfig[] GetMonitoringConfigs(); 10 | 11 | string GetArchitectureName(); 12 | 13 | /// 14 | /// Monitoring thread function, periodically populates listView with results 15 | /// 16 | void MonitoringThread(int configId, ListView listView, CancellationToken cancelToken); 17 | 18 | /// 19 | /// Get number of threads in CPU 20 | /// 21 | /// Number of threads 22 | int GetThreadCount(); 23 | 24 | /// 25 | /// Start logging to file 26 | /// 27 | /// File path to log to 28 | /// If >= 0, only log for a specific core 29 | /// 30 | string StartLogToFile(string filePath, int targetCore); 31 | void StopLoggingToFile(); 32 | } 33 | 34 | public interface MonitoringConfig 35 | { 36 | /// 37 | /// Display name for configuration 38 | /// 39 | /// 40 | string GetConfigName(); 41 | 42 | /// 43 | /// Description of this config 44 | /// 45 | /// 46 | string GetHelpText(); 47 | 48 | /// 49 | /// Get columns to display in listview 50 | /// 51 | /// 52 | string[] GetColumns(); 53 | 54 | /// 55 | /// Program the appropriate counters 56 | /// 57 | void Initialize(); 58 | 59 | /// 60 | /// Read counters, return metrics 61 | /// 62 | MonitoringUpdateResults Update(); 63 | } 64 | 65 | /// 66 | /// Result metrics, collected after each update 67 | /// 68 | public class MonitoringUpdateResults 69 | { 70 | /// 71 | /// Aggregated metrics 72 | /// 73 | public string[] overallMetrics; 74 | 75 | /// 76 | /// List of per-unit metrics 77 | /// 78 | public string[][] unitMetrics; 79 | 80 | /// 81 | /// Counter values, for logging 82 | /// 83 | public Tuple[] overallCounterValues; 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /HaswellForm.resx: -------------------------------------------------------------------------------- 1 |  2 | 3 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | text/microsoft-resx 110 | 111 | 112 | 2.0 113 | 114 | 115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 116 | 117 | 118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 119 | 120 | 121 | 17, 17 122 | 123 | -------------------------------------------------------------------------------- /Intel/AlderLakeL3.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Runtime.InteropServices.WindowsRuntime; 5 | 6 | namespace PmcReader.Intel 7 | { 8 | public class AlderLakeL3 : AlderLakeUncore 9 | { 10 | /// 11 | /// Number of L3 cache coherency boxes 12 | /// 13 | public int CboCount; 14 | public NormalizedCboCounterData[] cboData; 15 | public NormalizedCboCounterData cboTotals; 16 | 17 | public AlderLakeL3() 18 | { 19 | ulong cboConfig; 20 | architectureName = "Alder Lake Client L3"; 21 | 22 | // intel developer manual table 2-30 says bits 0-3 encode number of C-Box 23 | // ADL no longer requires subtracting one from the reported C-Box count, unlike Haswell and Skylake 24 | Ring0.ReadMsr(MSR_UNC_CBO_CONFIG, out cboConfig); 25 | CboCount = (int)(cboConfig & 0xF); 26 | cboData = new NormalizedCboCounterData[CboCount]; 27 | 28 | List monitoringConfigList = new List(); 29 | monitoringConfigList.Add(new HitrateConfig(this)); 30 | monitoringConfigs = monitoringConfigList.ToArray(); 31 | } 32 | 33 | public class NormalizedCboCounterData 34 | { 35 | public float ctr0; 36 | public float ctr1; 37 | } 38 | 39 | public void InitializeCboTotals() 40 | { 41 | if (cboTotals == null) 42 | { 43 | cboTotals = new NormalizedCboCounterData(); 44 | } 45 | 46 | cboTotals.ctr0 = 0; 47 | cboTotals.ctr1 = 0; 48 | } 49 | 50 | public void UpdateCboCounterData(uint cboIdx) 51 | { 52 | float normalizationFactor = GetNormalizationFactor((int)cboIdx); 53 | ulong ctr0 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR0_base + MSR_UNC_CBO_increment * cboIdx); 54 | ulong ctr1 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR1_base + MSR_UNC_CBO_increment * cboIdx); 55 | 56 | if (cboData[cboIdx] == null) 57 | { 58 | cboData[cboIdx] = new NormalizedCboCounterData(); 59 | } 60 | 61 | cboData[cboIdx].ctr0 = ctr0 * normalizationFactor; 62 | cboData[cboIdx].ctr1 = ctr1 * normalizationFactor; 63 | cboTotals.ctr0 += cboData[cboIdx].ctr0; 64 | cboTotals.ctr1 += cboData[cboIdx].ctr1; 65 | } 66 | 67 | public Tuple[] GetOverallCounterValues(string ctr0, string ctr1) 68 | { 69 | Tuple[] retval = new Tuple[2]; 70 | retval[0] = new Tuple(ctr0, cboTotals.ctr0); 71 | retval[1] = new Tuple(ctr1, cboTotals.ctr1); 72 | return retval; 73 | } 74 | 75 | public class HitrateConfig : MonitoringConfig 76 | { 77 | private AlderLakeL3 cpu; 78 | public string GetConfigName() { return "L3 Hitrate"; } 79 | 80 | public HitrateConfig(AlderLakeL3 intelCpu) 81 | { 82 | cpu = intelCpu; 83 | } 84 | 85 | public string[] GetColumns() 86 | { 87 | return columns; 88 | } 89 | 90 | public void Initialize() 91 | { 92 | cpu.EnableUncoreCounters(); 93 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 94 | { 95 | // Reusing Skylake events since Intel has not documented uncore events for arches after that 96 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx, 97 | GetUncorePerfEvtSelRegisterValue(0x34, 0x8F, false, false, true, false, 0)); 98 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx, 99 | GetUncorePerfEvtSelRegisterValue(0x34, 0x88, false, false, true, false, 0)); 100 | } 101 | } 102 | 103 | public MonitoringUpdateResults Update() 104 | { 105 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 106 | results.unitMetrics = new string[cpu.CboCount][]; 107 | cpu.InitializeCboTotals(); 108 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 109 | { 110 | cpu.UpdateCboCounterData(cboIdx); 111 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]); 112 | } 113 | 114 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals); 115 | results.overallCounterValues = cpu.GetOverallCounterValues("L3 Lookups", "L3 Misses"); 116 | return results; 117 | } 118 | 119 | public string[] columns = new string[] { "Item", "Hitrate", "Hit BW", "All Lookups", "I state" }; 120 | 121 | public string GetHelpText() { return ""; } 122 | 123 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData) 124 | { 125 | return new string[] { label, 126 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)), 127 | FormatLargeNumber((counterData.ctr0 - counterData.ctr1) * 64) + "B/s", 128 | FormatLargeNumber(counterData.ctr0), 129 | FormatLargeNumber(counterData.ctr1)}; 130 | } 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /Intel/AlderLakeUncore.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | 4 | namespace PmcReader.Intel 5 | { 6 | public class AlderLakeUncore : ModernIntelCpu 7 | { 8 | public const uint MSR_UNC_PERF_GLOBAL_CTRL = 0x2FF0; 9 | public const uint MSR_UNC_PERF_FIXED_CTRL = 0x2FDE; 10 | public const uint MSR_UNC_PERF_FIXED_CTR = 0x2FDF; 11 | public const uint MSR_UNC_CBO_CONFIG = 0x396; 12 | public const uint MSR_UNC_CBO_PERFEVTSEL0_base = 0x2000; 13 | public const uint MSR_UNC_CBO_PERFEVTSEL1_base = 0x2001; 14 | public const uint MSR_UNC_CBO_PERFCTR0_base = 0x2002; 15 | public const uint MSR_UNC_CBO_PERFCTR1_base = 0x2003; 16 | public const uint MSR_UNC_ARB_PERFCTR0 = 0x2FD2; 17 | public const uint MSR_UNC_ARB_PERFCTR1 = 0x2FD3; 18 | public const uint MSR_UNC_ARB_PERFEVTSEL0 = 0x2FD0; 19 | public const uint MSR_UNC_ARB_PERFEVTSEL1 = 0x2FD1; 20 | public const uint MSR_UNC_CBO_increment = 0x8; 21 | 22 | public AlderLakeUncore() 23 | { 24 | architectureName = "Alder Lake Client Uncore"; 25 | } 26 | 27 | /// 28 | /// Enable uncore counters, wtih overflow propagation/freezing disabled 29 | /// 30 | public void EnableUncoreCounters() 31 | { 32 | // Bit 29 - globally enable all PMU counters. 33 | // local counters still have to be individually enabled 34 | // other bits have to do with PMI or are reserved 35 | ulong enableUncoreCountersValue = 1UL << 29; 36 | Ring0.WriteMsr(MSR_UNC_PERF_GLOBAL_CTRL, enableUncoreCountersValue); 37 | 38 | // Bit 22 - locally enable fixed counter 39 | ulong enableUncoreFixedCtrValue = 1UL << 22; 40 | Ring0.WriteMsr(MSR_UNC_PERF_FIXED_CTRL, enableUncoreFixedCtrValue); 41 | } 42 | 43 | /// 44 | /// Get value to put in PERFEVTSEL register, for uncore counters 45 | /// 46 | /// Perf event 47 | /// Perf event qualification (umask) 48 | /// Edge detect 49 | /// Enable overflow forwarding 50 | /// Enable counter 51 | /// Invert cmask 52 | /// Count mask 53 | /// value to put in perfevtsel register 54 | public static ulong GetUncorePerfEvtSelRegisterValue(byte perfEvent, 55 | byte umask, 56 | bool edge, 57 | bool ovf_en, 58 | bool enable, 59 | bool invert, 60 | byte cmask) 61 | { 62 | return perfEvent | 63 | (ulong)umask << 8 | 64 | (edge ? 1UL : 0UL) << 18 | 65 | (ovf_en ? 1UL : 0UL) << 20 | 66 | (enable ? 1UL : 0UL) << 22 | 67 | (invert ? 1UL : 0UL) << 23 | 68 | (ulong)(cmask & 0xF) << 24; 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /Intel/HaswellClientArb.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | 4 | namespace PmcReader.Intel 5 | { 6 | public class HaswellClientArb : HaswellClientUncore 7 | { 8 | private ulong lastUncoreClockCount; 9 | 10 | public HaswellClientArb() 11 | { 12 | architectureName = "Haswell Client System Agent"; 13 | lastUncoreClockCount = 0; 14 | monitoringConfigs = new MonitoringConfig[2]; 15 | monitoringConfigs[0] = new MCRequests(this); 16 | monitoringConfigs[1] = new CoherencyRequests(this); 17 | } 18 | 19 | public class NormalizedArbCounterData 20 | { 21 | public float uncoreClock; 22 | public float ctr0; 23 | public float ctr1; 24 | } 25 | 26 | public NormalizedArbCounterData UpdateArbCounterData(out ulong ctr0, out ulong ctr1) 27 | { 28 | NormalizedArbCounterData rc = new NormalizedArbCounterData(); 29 | float normalizationFactor = GetNormalizationFactor(0); 30 | ulong uncoreClock, elapsedUncoreClocks; 31 | ctr0 = ReadAndClearMsr(MSR_UNC_ARB_PERFCTR0); 32 | ctr1 = ReadAndClearMsr(MSR_UNC_ARB_PERFCTR1); 33 | Ring0.ReadMsr(MSR_UNC_PERF_FIXED_CTR, out uncoreClock); 34 | 35 | // MSR_UNC_PERF_FIXED_CTR is 48 bits wide, upper bits are reserved 36 | uncoreClock &= 0xFFFFFFFFFFFF; 37 | elapsedUncoreClocks = uncoreClock; 38 | if (uncoreClock > lastUncoreClockCount) 39 | elapsedUncoreClocks = uncoreClock - lastUncoreClockCount; 40 | lastUncoreClockCount = uncoreClock; 41 | 42 | rc.ctr0 = ctr0 * normalizationFactor; 43 | rc.ctr1 = ctr1 * normalizationFactor; 44 | rc.uncoreClock = elapsedUncoreClocks * normalizationFactor; 45 | return rc; 46 | } 47 | 48 | public Tuple[] GetOverallCounterValues(NormalizedArbCounterData data, string ctr0, string ctr1) 49 | { 50 | Tuple[] retval = new Tuple[3]; 51 | retval[0] = new Tuple("Uncore Clk", data.uncoreClock); 52 | retval[1] = new Tuple(ctr0, data.ctr0); 53 | retval[2] = new Tuple(ctr1, data.ctr1); 54 | return retval; 55 | } 56 | 57 | public class MCRequests : MonitoringConfig 58 | { 59 | private HaswellClientArb cpu; 60 | private ulong totalReqs; 61 | public string GetConfigName() { return "All MC Requests"; } 62 | 63 | public MCRequests(HaswellClientArb intelCpu) 64 | { 65 | cpu = intelCpu; 66 | this.totalReqs = 0; 67 | } 68 | 69 | public string[] GetColumns() 70 | { 71 | return columns; 72 | } 73 | 74 | public void Initialize() 75 | { 76 | cpu.EnableUncoreCounters(); 77 | // 0x80 = increments by number of outstanding requests every cycle 78 | // counts for coherent and non-coherent requests initiated by cores, igpu, or L3 79 | // only works in counter 0 80 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL0, 81 | GetUncorePerfEvtSelRegisterValue(0x80, 1, false, false, true, false, 0)); 82 | 83 | // 0x81 = number of requests 84 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL1, 85 | GetUncorePerfEvtSelRegisterValue(0x81, 1, false, false, true, false, 0)); 86 | 87 | Ring0.WriteMsr(MSR_UNC_ARB_PERFCTR0, 0); 88 | Ring0.WriteMsr(MSR_UNC_ARB_PERFCTR1, 0); 89 | } 90 | 91 | public MonitoringUpdateResults Update() 92 | { 93 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 94 | results.unitMetrics = null; 95 | NormalizedArbCounterData counterData = cpu.UpdateArbCounterData(out ulong _, out ulong reqs); 96 | this.totalReqs += reqs; 97 | 98 | results.overallMetrics = new string[] { FormatLargeNumber(counterData.uncoreClock), 99 | FormatLargeNumber(counterData.ctr1), 100 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s", 101 | string.Format("{0:F2}", counterData.ctr0 / counterData.uncoreClock), 102 | string.Format("{0:F2} clk", counterData.ctr0 / counterData.ctr1), 103 | string.Format("{0:F2} ns", (1000000000 / counterData.uncoreClock) * (counterData.ctr0 / counterData.ctr1)), 104 | FormatLargeNumber(totalReqs * 64) + "B" 105 | }; 106 | results.overallCounterValues = cpu.GetOverallCounterValues(counterData, "Pending Requests Per Cycle", "Requests"); 107 | return results; 108 | } 109 | 110 | public string GetHelpText() { return ""; } 111 | public string[] columns = new string[] { "Clk", "Requests", "Req*64B", "Q Occupancy", "Req Latency", "Req Latency", "Total Req Data" }; 112 | } 113 | 114 | public class CoherencyRequests : MonitoringConfig 115 | { 116 | private HaswellClientArb cpu; 117 | public string GetConfigName() { return "Coherency Tracker Requests"; } 118 | 119 | public CoherencyRequests(HaswellClientArb intelCpu) 120 | { 121 | cpu = intelCpu; 122 | } 123 | 124 | public string[] GetColumns() 125 | { 126 | return columns; 127 | } 128 | 129 | public void Initialize() 130 | { 131 | cpu.EnableUncoreCounters(); 132 | // 0x83 = increments by number of outstanding requests every cycle in coherency tracker 133 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL0, 134 | GetUncorePerfEvtSelRegisterValue(0x83, 1, false, false, true, false, 0)); 135 | 136 | // 0x84 = number of requests allocated in coherency tracker 137 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL1, 138 | GetUncorePerfEvtSelRegisterValue(0x84, 1, false, false, true, false, 0)); 139 | } 140 | 141 | public MonitoringUpdateResults Update() 142 | { 143 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 144 | results.unitMetrics = null; 145 | NormalizedArbCounterData counterData = cpu.UpdateArbCounterData(out _, out _); 146 | 147 | results.overallMetrics = new string[] { FormatLargeNumber(counterData.uncoreClock), 148 | FormatLargeNumber(counterData.ctr1), 149 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s", 150 | string.Format("{0:F2}", counterData.ctr0 / counterData.uncoreClock), 151 | string.Format("{0:F2} clk", counterData.ctr0 / counterData.ctr1), 152 | string.Format("{0:F2} ns", (1000000000 / counterData.uncoreClock) * (counterData.ctr0 / counterData.ctr1)) 153 | }; 154 | return results; 155 | } 156 | 157 | public string GetHelpText() { return ""; } 158 | public string[] columns = new string[] { "Clk", "Requests", "Req*64B", "Q Occupancy", "Req Latency", "Req Latency" }; 159 | } 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /Intel/HaswellClientL3.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | 4 | namespace PmcReader.Intel 5 | { 6 | public class HaswellClientL3 : HaswellClientUncore 7 | { 8 | /// 9 | /// Number of L3 cache coherency boxes 10 | /// 11 | public int CboCount; 12 | public NormalizedCboCounterData[] cboData; 13 | public NormalizedCboCounterData cboTotals; 14 | 15 | public HaswellClientL3() 16 | { 17 | ulong cboConfig; 18 | architectureName = "Haswell Client L3"; 19 | 20 | // intel developer manual table 2-30 syas bits 0-3 encode number of C-Box 21 | // "derive value by -1" 22 | Ring0.ReadMsr(MSR_UNC_CBO_CONFIG, out cboConfig); 23 | CboCount = (int)((cboConfig & 0x7) - 1); 24 | cboData = new NormalizedCboCounterData[CboCount]; 25 | 26 | monitoringConfigs = new MonitoringConfig[3]; 27 | monitoringConfigs[0] = new HitrateConfig(this); 28 | monitoringConfigs[1] = new SnoopHitConfig(this); 29 | monitoringConfigs[2] = new SnoopInvalidateConfig(this); 30 | } 31 | 32 | public class NormalizedCboCounterData 33 | { 34 | public float ctr0; 35 | public float ctr1; 36 | } 37 | 38 | public void InitializeCboTotals() 39 | { 40 | if (cboTotals == null) 41 | { 42 | cboTotals = new NormalizedCboCounterData(); 43 | } 44 | 45 | cboTotals.ctr0 = 0; 46 | cboTotals.ctr1 = 0; 47 | } 48 | 49 | public void UpdateCboCounterData(uint cboIdx) 50 | { 51 | float normalizationFactor = GetNormalizationFactor((int)cboIdx); 52 | ulong ctr0 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR0_base + MSR_UNC_CBO_increment * cboIdx); 53 | ulong ctr1 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR1_base + MSR_UNC_CBO_increment * cboIdx); 54 | 55 | if (cboData[cboIdx] == null) 56 | { 57 | cboData[cboIdx] = new NormalizedCboCounterData(); 58 | } 59 | 60 | cboData[cboIdx].ctr0 = ctr0 * normalizationFactor; 61 | cboData[cboIdx].ctr1 = ctr1 * normalizationFactor; 62 | cboTotals.ctr0 += cboData[cboIdx].ctr0; 63 | cboTotals.ctr1 += cboData[cboIdx].ctr1; 64 | } 65 | 66 | public Tuple[] GetCboOverallCounterValues(string ctr0, string ctr1) 67 | { 68 | Tuple[] retval = new Tuple[2]; 69 | retval[0] = new Tuple(ctr0, cboTotals.ctr0); 70 | retval[1] = new Tuple(ctr1, cboTotals.ctr1); 71 | return retval; 72 | } 73 | 74 | public class HitrateConfig : MonitoringConfig 75 | { 76 | private HaswellClientL3 cpu; 77 | public string GetConfigName() { return "L3 Hitrate"; } 78 | 79 | public HitrateConfig(HaswellClientL3 intelCpu) 80 | { 81 | cpu = intelCpu; 82 | } 83 | 84 | public string[] GetColumns() 85 | { 86 | return columns; 87 | } 88 | 89 | public void Initialize() 90 | { 91 | cpu.EnableUncoreCounters(); 92 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 93 | { 94 | // 0x34 = L3 lookups, 0xFF = all lookups 95 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx, 96 | GetUncorePerfEvtSelRegisterValue(0x34, 0xFF, false, false, true, false, 0)); 97 | 98 | // 0x34 = L3 lookups, high 4 bits = cacheable read | cacheable write | external snoop | irq/ipq 99 | // low 4 bits = M | ES | I, so select I to count misses 100 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx, 101 | GetUncorePerfEvtSelRegisterValue(0x34, 0xF8, false, false, true, false, 0)); 102 | } 103 | } 104 | 105 | public MonitoringUpdateResults Update() 106 | { 107 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 108 | results.unitMetrics = new string[cpu.CboCount][]; 109 | cpu.InitializeCboTotals(); 110 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 111 | { 112 | cpu.UpdateCboCounterData(cboIdx); 113 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]); 114 | } 115 | 116 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals); 117 | results.overallCounterValues = cpu.GetCboOverallCounterValues("L3 Lookups", "L3 Miss"); 118 | return results; 119 | } 120 | 121 | public string[] columns = new string[] { "Item", "Hitrate", "Hit BW", "All Lookups", "L3 Miss" }; 122 | 123 | public string GetHelpText() { return ""; } 124 | 125 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData) 126 | { 127 | return new string[] { label, 128 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)), 129 | FormatLargeNumber((counterData.ctr0 - counterData.ctr1) * 64) + "B/s", 130 | FormatLargeNumber(counterData.ctr0), 131 | FormatLargeNumber(counterData.ctr1)}; 132 | } 133 | } 134 | 135 | public class SnoopInvalidateConfig : MonitoringConfig 136 | { 137 | private HaswellClientL3 cpu; 138 | public string GetConfigName() { return "Snoop Invalidations"; } 139 | 140 | public SnoopInvalidateConfig(HaswellClientL3 intelCpu) 141 | { 142 | cpu = intelCpu; 143 | } 144 | 145 | public string[] GetColumns() 146 | { 147 | return columns; 148 | } 149 | 150 | public void Initialize() 151 | { 152 | ThreadAffinity.Set(0x1); 153 | cpu.EnableUncoreCounters(); 154 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 155 | { 156 | // 0x22 = Snoop response, 0xFF = all responses 157 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx, 158 | GetUncorePerfEvtSelRegisterValue(0x22, 0xFF, false, false, true, false, 0)); 159 | 160 | // 0x22 = Snoop response, umask 0x2 = non-modified line invalidated, umask 0x10 = modified line invalidated 161 | // high 3 bits of umask = filter. 0x20 = external snoop, 0x40 = core memory request, 0x80 = L3 eviction 162 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx, 163 | GetUncorePerfEvtSelRegisterValue(0x22, 0x12 | 0x20 | 0x40 | 0x80, false, false, true, false, 0)); 164 | } 165 | } 166 | 167 | public MonitoringUpdateResults Update() 168 | { 169 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 170 | results.unitMetrics = new string[cpu.CboCount][]; 171 | cpu.InitializeCboTotals(); 172 | ThreadAffinity.Set(0x1); 173 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 174 | { 175 | cpu.UpdateCboCounterData(cboIdx); 176 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]); 177 | } 178 | 179 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals); 180 | return results; 181 | } 182 | 183 | public string[] columns = new string[] { "Item", "Invalidate Resp %", "Invalidate BW", "All Snoop Responses", "Core Cache Lines Invalidated" }; 184 | 185 | public string GetHelpText() { return ""; } 186 | 187 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData) 188 | { 189 | return new string[] { label, 190 | string.Format("{0:F2}%", 100 * (counterData.ctr1 / counterData.ctr0)), 191 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s", 192 | FormatLargeNumber(counterData.ctr0), 193 | FormatLargeNumber(counterData.ctr1)}; 194 | } 195 | } 196 | 197 | public class SnoopHitConfig : MonitoringConfig 198 | { 199 | private HaswellClientL3 cpu; 200 | public string GetConfigName() { return "Snoop Hits"; } 201 | 202 | public SnoopHitConfig(HaswellClientL3 intelCpu) 203 | { 204 | cpu = intelCpu; 205 | } 206 | 207 | public string[] GetColumns() 208 | { 209 | return columns; 210 | } 211 | 212 | public void Initialize() 213 | { 214 | ThreadAffinity.Set(0x1); 215 | cpu.EnableUncoreCounters(); 216 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 217 | { 218 | // 0x22 = Snoop response, 0xFF = all responses 219 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx, 220 | GetUncorePerfEvtSelRegisterValue(0x22, 0xFF, false, false, true, false, 0)); 221 | 222 | // 0x22 = Snoop response, umask 0x4 = non-modified line hit, umask 0x8 = modified line hit 223 | // high 3 bits of umask = filter. 0x20 = external snoop, 0x40 = core memory request, 0x80 = L3 eviction 224 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx, 225 | GetUncorePerfEvtSelRegisterValue(0x22, 0x4 | 0x8 | 0x20 | 0x40 | 0x80, false, false, true, false, 0)); 226 | } 227 | } 228 | 229 | public MonitoringUpdateResults Update() 230 | { 231 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 232 | results.unitMetrics = new string[cpu.CboCount][]; 233 | cpu.InitializeCboTotals(); 234 | ThreadAffinity.Set(0x1); 235 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 236 | { 237 | cpu.UpdateCboCounterData(cboIdx); 238 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]); 239 | } 240 | 241 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals); 242 | return results; 243 | } 244 | 245 | public string[] columns = new string[] { "Item", "Snoop Hitrate", "Snoop Hit BW", "All Snoop Responses", "Snoop Hits" }; 246 | 247 | public string GetHelpText() { return ""; } 248 | 249 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData) 250 | { 251 | return new string[] { label, 252 | string.Format("{0:F2}%", 100 * (counterData.ctr1 / counterData.ctr0)), 253 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s", 254 | FormatLargeNumber(counterData.ctr0), 255 | FormatLargeNumber(counterData.ctr1)}; 256 | } 257 | } 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /Intel/HaswellClientUncore.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | 4 | namespace PmcReader.Intel 5 | { 6 | public class HaswellClientUncore : ModernIntelCpu 7 | { 8 | public const uint MSR_UNC_PERF_GLOBAL_CTRL = 0x391; 9 | public const uint MSR_UNC_PERF_FIXED_CTRL = 0x394; 10 | public const uint MSR_UNC_PERF_FIXED_CTR = 0x395; 11 | public const uint MSR_UNC_CBO_CONFIG = 0x396; 12 | public const uint MSR_UNC_CBO_PERFEVTSEL0_base = 0x700; 13 | public const uint MSR_UNC_CBO_PERFEVTSEL1_base = 0x701; 14 | public const uint MSR_UNC_CBO_PERFCTR0_base = 0x706; 15 | public const uint MSR_UNC_CBO_PERFCTR1_base = 0x707; 16 | public const uint MSR_UNC_ARB_PERFCTR0 = 0x3B0; 17 | public const uint MSR_UNC_ARB_PERFCTR1 = 0x3B1; 18 | public const uint MSR_UNC_ARB_PERFEVTSEL0 = 0x3B2; 19 | public const uint MSR_UNC_ARB_PERFEVTSEL1 = 0x3B3; 20 | public const uint MSR_UNC_CBO_increment = 0x10; 21 | 22 | public HaswellClientUncore() 23 | { 24 | architectureName = "Haswell Client Uncore"; 25 | } 26 | 27 | /// 28 | /// Enable haswell uncore counters, wtih overflow propagation/freezing disabled 29 | /// 30 | public void EnableUncoreCounters() 31 | { 32 | ulong enableUncoreCountersValue = 1UL << 29; 33 | Ring0.WriteMsr(MSR_UNC_PERF_GLOBAL_CTRL, enableUncoreCountersValue); 34 | ulong enableUncoreFixedCtrValue = 1UL << 22; 35 | Ring0.WriteMsr(MSR_UNC_PERF_FIXED_CTRL, enableUncoreFixedCtrValue); 36 | } 37 | 38 | /// 39 | /// Get value to put in PERFEVTSEL register, for uncore counters 40 | /// 41 | /// Perf event 42 | /// Perf event qualification (umask) 43 | /// Edge detect 44 | /// Enable overflow forwarding 45 | /// Enable counter 46 | /// Invert cmask 47 | /// Count mask 48 | /// value to put in perfevtsel register 49 | public static ulong GetUncorePerfEvtSelRegisterValue(byte perfEvent, 50 | byte umask, 51 | bool edge, 52 | bool ovf_en, 53 | bool enable, 54 | bool invert, 55 | byte cmask) 56 | { 57 | return perfEvent | 58 | (ulong)umask << 8 | 59 | (edge ? 1UL : 0UL) << 18 | 60 | (ovf_en ? 1UL : 0UL) << 20 | 61 | (enable ? 1UL : 0UL) << 22 | 62 | (invert ? 1UL : 0UL) << 23 | 63 | (ulong)(cmask & 0xF) << 24; 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Intel/MeteorLakeArb.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | using System.Collections.Generic; 4 | 5 | namespace PmcReader.Intel 6 | { 7 | public class MeteorLakeArb : MeteorLakeUncore 8 | { 9 | private ulong lastSncuClk, lastCncuClk; 10 | 11 | public MeteorLakeArb() 12 | { 13 | architectureName = "Meteor Lake ARB"; 14 | lastSncuClk = 0; 15 | List arbMonitoringConfigs = new List(); 16 | arbMonitoringConfigs.Add(new FixedCounters(this)); 17 | arbMonitoringConfigs.Add(new ArbCounters(this)); 18 | monitoringConfigs = arbMonitoringConfigs.ToArray(); 19 | } 20 | 21 | public class NormalizedArbCounterData 22 | { 23 | public float sncuUncoreClk; 24 | 25 | /// 26 | /// Documented as UCLK (UNC_CLOCK.SOCKET) cycles 27 | /// 28 | public float cncuUncoreClk; 29 | public float arbCtr0; 30 | public float arbCtr1; 31 | public float hacArbCtr0; 32 | public float hacArbCtr1; 33 | public float hacCboCtr0; 34 | public float hacCboCtr1; 35 | } 36 | 37 | public void InitializeFixedCounters() 38 | { 39 | ulong boxEnable = 1UL << 29; 40 | Ring0.WriteMsr(MTL_UNC_SNCU_BOX_CTRL, boxEnable); 41 | Ring0.WriteMsr(MTL_UNC_CNCU_BOX_CTRL, boxEnable); 42 | 43 | // 0xFF = clockticks, bit 22 = enable 44 | // cNCU = socket uncore clocks from Intel's description 45 | // reaches 3.3 GHz and likely corresponds to uncore clk on the CPU tile 46 | // sNCU could be socket uncore clock for the IO die. 47 | // reaches 2.4 GHz 48 | Ring0.WriteMsr(MTL_UNC_SNCU_FIXED_CTRL, 0xFF | (1UL << 22)); 49 | Ring0.WriteMsr(MTL_UNC_CNCU_FIXED_CTRL, 0xFF | (1UL << 22)); 50 | Ring0.WriteMsr(MTL_UNC_SNCU_FIXED_CTR, 0); 51 | Ring0.WriteMsr(MTL_UNC_CNCU_FIXED_CTR, 0); 52 | } 53 | 54 | public NormalizedArbCounterData UpdateArbCounterData() 55 | { 56 | NormalizedArbCounterData rc = new NormalizedArbCounterData(); 57 | float normalizationFactor = GetNormalizationFactor(0); 58 | ulong sncuClk, cncuClk, elapsedSncuClk, elapsedCncuClk; 59 | ulong arbCtr0 = ReadAndClearMsr(MTL_UNC_ARB_CTR); 60 | ulong arbCtr1 = ReadAndClearMsr(MTL_UNC_ARB_CTR + 1); 61 | ulong hacArbCtr0 = ReadAndClearMsr(MTL_UNC_HAC_ARB_CTR); 62 | ulong hacArbCtr1 = ReadAndClearMsr(MTL_UNC_HAC_ARB_CTR + 1); 63 | ulong hacCboCtr0 = ReadAndClearMsr(MTL_UNC_HAC_CBO_CTR); 64 | ulong hacCboCtr1 = ReadAndClearMsr(MTL_UNC_HAC_CBO_CTR + 1); 65 | 66 | // Fixed counters 67 | Ring0.ReadMsr(MTL_UNC_SNCU_FIXED_CTR, out sncuClk); 68 | Ring0.ReadMsr(MTL_UNC_CNCU_FIXED_CTR, out cncuClk); 69 | 70 | // MSR_UNC_PERF_FIXED_CTR is 48 bits wide, upper bits are reserved 71 | sncuClk &= 0xFFFFFFFFFFFF; 72 | elapsedSncuClk = sncuClk; 73 | if (sncuClk > lastSncuClk) 74 | elapsedSncuClk = sncuClk - lastSncuClk; 75 | lastSncuClk = sncuClk; 76 | 77 | cncuClk &= 0xFFFFFFFFFFFF; 78 | elapsedCncuClk = cncuClk; 79 | if (cncuClk > lastCncuClk) 80 | elapsedCncuClk = cncuClk - lastCncuClk; 81 | lastCncuClk = cncuClk; 82 | 83 | rc.arbCtr0 = arbCtr0 * normalizationFactor; 84 | rc.arbCtr1 = arbCtr1 * normalizationFactor; 85 | rc.hacArbCtr0 = hacArbCtr0 * normalizationFactor; 86 | rc.hacArbCtr1 = hacArbCtr1 * normalizationFactor; 87 | rc.hacCboCtr0 = hacCboCtr0 * normalizationFactor; 88 | rc.hacCboCtr1 = hacCboCtr1 * normalizationFactor; 89 | rc.sncuUncoreClk = elapsedSncuClk * normalizationFactor; 90 | rc.cncuUncoreClk = elapsedCncuClk * normalizationFactor; 91 | return rc; 92 | } 93 | 94 | public Tuple[] GetOverallCounterValues(NormalizedArbCounterData data, string ctr0, string ctr1) 95 | { 96 | Tuple[] retval = new Tuple[3]; 97 | retval[0] = new Tuple("sNCU Clk", data.sncuUncoreClk); 98 | retval[1] = new Tuple("cNCU Clk", data.cncuUncoreClk); 99 | retval[2] = new Tuple(ctr0, data.arbCtr0); 100 | retval[3] = new Tuple(ctr1, data.arbCtr1); 101 | return retval; 102 | } 103 | 104 | public class FixedCounters : MonitoringConfig 105 | { 106 | private MeteorLakeArb arb; 107 | public FixedCounters(MeteorLakeArb arb) 108 | { 109 | this.arb = arb; 110 | } 111 | 112 | public string[] columns = new string[] { "Item", "GHz" }; 113 | public string[] GetColumns() { return columns; } 114 | public string GetConfigName() { return "Fixed Counters"; } 115 | public string GetHelpText() { return ""; } 116 | 117 | public void Initialize() 118 | { 119 | arb.InitializeFixedCounters(); 120 | 121 | // HAC CBo ToR allocation, all requests 122 | Ring0.WriteMsr(MTL_UNC_HAC_CBO_CTRL, GetUncorePerfEvtSelRegisterValue(0x35, 8, false, false, true, false, 0)); 123 | Ring0.WriteMsr(MTL_UNC_HAC_CBO_CTR, 0); 124 | } 125 | 126 | public MonitoringUpdateResults Update() 127 | { 128 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 129 | results.overallMetrics = new string[] { "N/A", "N/A" }; 130 | NormalizedArbCounterData normalizedArbCounterData = arb.UpdateArbCounterData(); 131 | results.unitMetrics = new string[2][]; 132 | results.unitMetrics[0] = new string[] { "sNCU", FormatLargeNumber(normalizedArbCounterData.sncuUncoreClk) + "Hz" }; 133 | results.unitMetrics[1] = new string[] { "cNCU", FormatLargeNumber(normalizedArbCounterData.cncuUncoreClk) + "Hz" }; 134 | return results; 135 | } 136 | } 137 | 138 | public class ArbCounters : MonitoringConfig 139 | { 140 | private MeteorLakeArb arb; 141 | public ArbCounters(MeteorLakeArb arb) 142 | { 143 | this.arb = arb; 144 | } 145 | 146 | public string[] columns = new string[] { "Item", "Metric", "Occupancy", "Latency" }; 147 | public string[] GetColumns() { return columns; } 148 | public string GetConfigName() { return "Arb"; } 149 | public string GetHelpText() { return ""; } 150 | 151 | public void Initialize() 152 | { 153 | arb.InitializeFixedCounters(); 154 | 155 | // HAC CBo ToR allocation, all requests 156 | Ring0.WriteMsr(MTL_UNC_HAC_CBO_CTRL, GetUncorePerfEvtSelRegisterValue(0x35, 8, false, false, true, false, 0)); 157 | Ring0.WriteMsr(MTL_UNC_HAC_CBO_CTR, 0); 158 | 159 | // HAC ARB, all requests 160 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTRL, GetUncorePerfEvtSelRegisterValue(0x81, 1, false, false, true, false, 0)); 161 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTR, 0); 162 | 163 | // HAC ARB, CMI transactions 164 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTRL + 1, GetUncorePerfEvtSelRegisterValue(0x8A, 1, false, false, true, false, 0)); 165 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTR, 0); 166 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTR + 1, 0); 167 | 168 | // ARB Occupancy. 2 = data read, 0 = all (in the past, not documented) 169 | // 0x85 = occupancy. Uses cNCU clock 170 | // ok 0x81 doesn't work, how about 0x8A 171 | // 0x86 is almost right? seems to count in 32B increments and doesn't count GPU BW 172 | //Ring0.WriteMsr(MTL_UNC_ARB_CTRL, GetUncorePerfEvtSelRegisterValue(0x85, 0, false, false, true, false, 0)); 173 | Ring0.WriteMsr(MTL_UNC_ARB_CTRL, GetUncorePerfEvtSelRegisterValue(0x85, 0, false, false, true, false, 20)); 174 | Ring0.WriteMsr(MTL_UNC_ARB_CTR, 0); 175 | //Ring0.WriteMsr(MTL_UNC_ARB_CTR + 1, 0); 176 | } 177 | 178 | public MonitoringUpdateResults Update() 179 | { 180 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 181 | NormalizedArbCounterData normalizedArbCounterData = arb.UpdateArbCounterData(); 182 | float arbReqs = normalizedArbCounterData.arbCtr0; 183 | // float arbOcc = normalizedArbCounterData.arbCtr0; 184 | results.unitMetrics = new string[][] { 185 | new string[] { "HAC CBo", FormatLargeNumber(normalizedArbCounterData.hacCboCtr0 * 64) + "B/s", "-", "-"}, 186 | new string[] { "HAC ARB (All Reqs)", FormatLargeNumber(normalizedArbCounterData.hacArbCtr0 * 64) + "B/s", "-", "-"}, 187 | new string[] { "HAC ARB (CMI Transactions)", FormatLargeNumber(normalizedArbCounterData.hacArbCtr1 * 64) + "B/s", "-", "-"}, 188 | 189 | // which clock? 190 | new string[] { "ARB", FormatLargeNumber(arbReqs) + ">20", "-", "-"}, 191 | new string[] { "sNCU", FormatLargeNumber(normalizedArbCounterData.sncuUncoreClk) + "Hz", "-", "-" }, 192 | new string[] { "cNCU", FormatLargeNumber(normalizedArbCounterData.cncuUncoreClk) + "Hz", "-", "-" }, 193 | }; 194 | 195 | results.overallMetrics = new string[] { "N/A", "N/A" }; 196 | return results; 197 | } 198 | } 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /Intel/MeteorLakeL3.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | using System.Collections.Generic; 4 | 5 | namespace PmcReader.Intel 6 | { 7 | public class MeteorLakeL3 : MeteorLakeUncore 8 | { 9 | /// 10 | /// Number of L3 cache coherency boxes 11 | /// 12 | public int CboCount; 13 | public NormalizedCboCounterData[] cboData; 14 | public NormalizedCboCounterData cboTotals; 15 | 16 | public MeteorLakeL3() 17 | { 18 | ulong cboConfig; 19 | architectureName = "Meteor Lake Client L3"; 20 | 21 | // Verbatim from Linux perf code 22 | Ring0.ReadMsr(MTL_UNC_CBO_CONFIG, out cboConfig); 23 | CboCount = (int)(cboConfig & MTL_UNC_NUM_CBO_MASK); 24 | cboData = new NormalizedCboCounterData[CboCount]; 25 | 26 | List monitoringConfigList = new List(); 27 | monitoringConfigList.Add(new HitrateConfig(this)); 28 | monitoringConfigs = monitoringConfigList.ToArray(); 29 | } 30 | 31 | public class NormalizedCboCounterData 32 | { 33 | public float ctr0; 34 | public float ctr1; 35 | } 36 | 37 | public void InitializeCboTotals() 38 | { 39 | if (cboTotals == null) 40 | { 41 | cboTotals = new NormalizedCboCounterData(); 42 | } 43 | 44 | cboTotals.ctr0 = 0; 45 | cboTotals.ctr1 = 0; 46 | } 47 | 48 | public void UpdateCboCounterData(uint cboIdx) 49 | { 50 | float normalizationFactor = GetNormalizationFactor((int)cboIdx); 51 | ulong ctr0 = ReadAndClearMsr(MTL_UNC_CBO_CTR + MTL_UNC_INCREMENT * cboIdx); 52 | ulong ctr1 = ReadAndClearMsr(MTL_UNC_CBO_CTR + MTL_UNC_INCREMENT * cboIdx + 1); 53 | 54 | if (cboData[cboIdx] == null) 55 | { 56 | cboData[cboIdx] = new NormalizedCboCounterData(); 57 | } 58 | 59 | cboData[cboIdx].ctr0 = ctr0 * normalizationFactor; 60 | cboData[cboIdx].ctr1 = ctr1 * normalizationFactor; 61 | cboTotals.ctr0 += cboData[cboIdx].ctr0; 62 | cboTotals.ctr1 += cboData[cboIdx].ctr1; 63 | } 64 | 65 | public Tuple[] GetOverallCounterValues(string ctr0, string ctr1) 66 | { 67 | Tuple[] retval = new Tuple[2]; 68 | retval[0] = new Tuple(ctr0, cboTotals.ctr0); 69 | retval[1] = new Tuple(ctr1, cboTotals.ctr1); 70 | return retval; 71 | } 72 | 73 | public class HitrateConfig : MonitoringConfig 74 | { 75 | private MeteorLakeL3 cpu; 76 | public string GetConfigName() { return "L3 Hitrate"; } 77 | 78 | public HitrateConfig(MeteorLakeL3 intelCpu) 79 | { 80 | cpu = intelCpu; 81 | } 82 | 83 | public string[] GetColumns() 84 | { 85 | return columns; 86 | } 87 | 88 | public void Initialize() 89 | { 90 | cpu.EnableUncoreCounters(); 91 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 92 | { 93 | // Reusing Skylake events since Intel has not documented uncore events for arches after that 94 | Ring0.WriteMsr(MTL_UNC_CBO_CTRL + MTL_UNC_INCREMENT * cboIdx, 95 | GetUncorePerfEvtSelRegisterValue(0x34, 0x8F, false, false, true, false, 0)); 96 | Ring0.WriteMsr(MTL_UNC_CBO_CTRL + MTL_UNC_INCREMENT * cboIdx + 1, 97 | GetUncorePerfEvtSelRegisterValue(0x34, 0x88, false, false, true, false, 0)); 98 | } 99 | } 100 | 101 | public MonitoringUpdateResults Update() 102 | { 103 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 104 | results.unitMetrics = new string[cpu.CboCount][]; 105 | cpu.InitializeCboTotals(); 106 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 107 | { 108 | cpu.UpdateCboCounterData(cboIdx); 109 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]); 110 | } 111 | 112 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals); 113 | results.overallCounterValues = cpu.GetOverallCounterValues("L3 Lookups", "L3 Misses"); 114 | return results; 115 | } 116 | 117 | public string[] columns = new string[] { "Item", "Hitrate", "Hit BW", "All Lookups", "I state" }; 118 | 119 | public string GetHelpText() { return ""; } 120 | 121 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData) 122 | { 123 | return new string[] { label, 124 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)), 125 | FormatLargeNumber((counterData.ctr0 - counterData.ctr1) * 64) + "B/s", 126 | FormatLargeNumber(counterData.ctr0), 127 | FormatLargeNumber(counterData.ctr1)}; 128 | } 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /Intel/MeteorLakeUncore.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | 4 | namespace PmcReader.Intel 5 | { 6 | // Really just a container for MSR numbers 7 | public class MeteorLakeUncore : ModernIntelCpu 8 | { // applies to cbox, arb, hac_arb, hac_cbox (hbo) 9 | public const uint MTL_UNC_INCREMENT = 0x10; 10 | 11 | // same as Ice Lake 12 | public const uint MTL_UNC_CBO_CONFIG = 0x396; 13 | public const ulong MTL_UNC_NUM_CBO_MASK = 0xF; 14 | 15 | // SNCU and CNCU provide fixed counters for clock ticks 16 | public const uint MTL_UNC_SNCU_FIXED_CTRL = 0x2002; 17 | public const uint MTL_UNC_SNCU_FIXED_CTR = 0x2008; 18 | public const uint MTL_UNC_SNCU_BOX_CTRL = 0x200e; 19 | public const uint MTL_UNC_CNCU_FIXED_CTRL = 0x2402; 20 | public const uint MTL_UNC_CNCU_FIXED_CTR = 0x2408; 21 | public const uint MTL_UNC_CNCU_BOX_CTRL = 0x240e; 22 | 23 | // System agent's arbitration queue? 24 | public const uint MTL_UNC_ARB_CTRL = 0x2412; 25 | public const uint MTL_UNC_ARB_CTR = 0x2418; 26 | 27 | // Home agent's arbitration queue? Compute tile -> SoC tile 28 | public const uint MTL_UNC_HAC_ARB_CTRL = 0x2012; 29 | public const uint MTL_UNC_HAC_ARB_CTR = 0x2018; 30 | 31 | // Home agent cbox? 2 counters 32 | public const uint MTL_UNC_HAC_CBO_CTRL = 0x2042; 33 | public const uint MTL_UNC_HAC_CBO_CTR = 0x2048; 34 | 35 | // L3 cboxes. 2x 48-bit ctrs per cbo 36 | public const uint MTL_UNC_CBO_CTRL = 0x2442; 37 | public const uint MTL_UNC_CBO_CTR = 0x2448; 38 | 39 | public MeteorLakeUncore() 40 | { 41 | architectureName = "Meteor Lake Uncore"; 42 | } 43 | 44 | /// 45 | /// Enable uncore counters, wtih overflow propagation/freezing disabled 46 | /// 47 | public void EnableUncoreCounters() 48 | { 49 | // MTL doesn't appear to have global uncore enable registers. 50 | // Setting enable bit on sNCU/cNCU fixed counter ctrl regs is enough to enable counting 51 | } 52 | 53 | /// 54 | /// Get value to put in PERFEVTSEL register, for uncore counters 55 | /// 56 | /// Perf event 57 | /// Perf event qualification (umask) 58 | /// Edge detect 59 | /// Enable overflow forwarding 60 | /// Enable counter 61 | /// Invert cmask 62 | /// Count mask 63 | /// value to put in perfevtsel register 64 | public static ulong GetUncorePerfEvtSelRegisterValue(byte perfEvent, 65 | byte umask, 66 | bool edge, 67 | bool ovf_en, 68 | bool enable, 69 | bool invert, 70 | byte cmask) 71 | { 72 | return perfEvent | 73 | (ulong)umask << 8 | 74 | (edge ? 1UL : 0UL) << 18 | 75 | (ovf_en ? 1UL : 0UL) << 20 | 76 | (enable ? 1UL : 0UL) << 22 | 77 | (invert ? 1UL : 0UL) << 23 | 78 | (ulong)(cmask & 0xF) << 24; 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /Intel/SkylakeClientArb.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | 4 | namespace PmcReader.Intel 5 | { 6 | public class SkylakeClientArb : SkylakeClientUncore 7 | { 8 | private ulong lastUncoreClockCount; 9 | 10 | public SkylakeClientArb() 11 | { 12 | architectureName = "Skylake Client System Agent"; 13 | lastUncoreClockCount = 0; 14 | monitoringConfigs = new MonitoringConfig[3]; 15 | monitoringConfigs[0] = new MCRequests(this, "All MC Requests", 0x1); 16 | monitoringConfigs[1] = new MCRequests(this, "MC: Core Data Read", 0x2); 17 | monitoringConfigs[2] = new MCRequests(this, "MC: Write", 0x20); 18 | } 19 | 20 | public class NormalizedArbCounterData 21 | { 22 | public float uncoreClock; 23 | public float ctr0; 24 | public float ctr1; 25 | } 26 | 27 | public NormalizedArbCounterData UpdateArbCounterData(out ulong ctr0, out ulong ctr1) 28 | { 29 | NormalizedArbCounterData rc = new NormalizedArbCounterData(); 30 | float normalizationFactor = GetNormalizationFactor(0); 31 | ulong uncoreClock, elapsedUncoreClocks; 32 | ctr0 = ReadAndClearMsr(MSR_UNC_ARB_PERFCTR0); 33 | ctr1 = ReadAndClearMsr(MSR_UNC_ARB_PERFCTR1); 34 | Ring0.ReadMsr(MSR_UNC_PERF_FIXED_CTR, out uncoreClock); 35 | 36 | // MSR_UNC_PERF_FIXED_CTR is 48 bits wide, upper bits are reserved 37 | uncoreClock &= 0xFFFFFFFFFFFF; 38 | elapsedUncoreClocks = uncoreClock; 39 | if (uncoreClock > lastUncoreClockCount) 40 | elapsedUncoreClocks = uncoreClock - lastUncoreClockCount; 41 | lastUncoreClockCount = uncoreClock; 42 | 43 | rc.ctr0 = ctr0 * normalizationFactor; 44 | rc.ctr1 = ctr1 * normalizationFactor; 45 | rc.uncoreClock = elapsedUncoreClocks * normalizationFactor; 46 | return rc; 47 | } 48 | 49 | public Tuple[] GetOverallCounterValuesFromArbData(NormalizedArbCounterData data, string ctr0, string ctr1) 50 | { 51 | Tuple[] retval = new Tuple[3]; 52 | retval[0] = new Tuple("Uncore Clk", data.uncoreClock); 53 | retval[1] = new Tuple(ctr0, data.ctr0); 54 | retval[2] = new Tuple(ctr1, data.ctr1); 55 | return retval; 56 | } 57 | 58 | public class MCRequests : MonitoringConfig 59 | { 60 | private SkylakeClientArb cpu; 61 | private byte umask; 62 | private string configName; 63 | private ulong totalReqs; 64 | public string GetConfigName() { return configName; } 65 | 66 | public MCRequests(SkylakeClientArb intelCpu, string configName, byte umask) 67 | { 68 | cpu = intelCpu; 69 | this.configName = configName; 70 | this.umask = umask; 71 | this.totalReqs = 0; 72 | } 73 | 74 | public string[] GetColumns() 75 | { 76 | return columns; 77 | } 78 | 79 | public void Initialize() 80 | { 81 | cpu.EnableUncoreCounters(); 82 | // 0x80 = increments by number of outstanding requests every cycle 83 | // counts for coherent and non-coherent requests initiated by cores, igpu, or L3 84 | // only works in counter 0 85 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL0, 86 | GetUncorePerfEvtSelRegisterValue(0x80, umask, false, false, true, false, 0)); 87 | 88 | // 0x81 = number of requests 89 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL1, 90 | GetUncorePerfEvtSelRegisterValue(0x81, umask, false, false, true, false, 0)); 91 | } 92 | 93 | public MonitoringUpdateResults Update() 94 | { 95 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 96 | results.unitMetrics = null; 97 | NormalizedArbCounterData counterData = cpu.UpdateArbCounterData(out _, out ulong reqs); 98 | this.totalReqs += reqs; 99 | 100 | results.overallCounterValues = cpu.GetOverallCounterValuesFromArbData(counterData, "Arb Queue Occupancy", "Reqs"); 101 | results.overallMetrics = new string[] { FormatLargeNumber(counterData.uncoreClock), 102 | FormatLargeNumber(counterData.ctr1), 103 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s", 104 | string.Format("{0:F2}", counterData.ctr0 / counterData.uncoreClock), 105 | string.Format("{0:F2} clk", counterData.ctr0 / counterData.ctr1), 106 | string.Format("{0:F2} ns", (1000000000 / counterData.uncoreClock) * (counterData.ctr0 / counterData.ctr1)), 107 | FormatLargeNumber(totalReqs * 64) + "B" 108 | }; 109 | return results; 110 | } 111 | 112 | public string GetHelpText() { return ""; } 113 | public string[] columns = new string[] { "Clk", "Requests", "Req BW", "Q Len", "Req Latency", "Req Latency", "Total Req Data" }; 114 | } 115 | 116 | public ulong GetImcCounterDelta(ulong addressOffset, ref ulong lastValue) 117 | { 118 | uint value = 0; 119 | ulong rc = 0; 120 | if (!Ring0.ReadMemory(barAddress, ref value)) 121 | { 122 | return rc; 123 | } 124 | 125 | if (value < lastValue) 126 | { 127 | rc = value + (0xFFFFFFFF - lastValue); 128 | } 129 | else 130 | { 131 | rc = value - lastValue; 132 | } 133 | 134 | lastValue = value; 135 | return rc; 136 | } 137 | 138 | public class MemoryBandwidth : MonitoringConfig 139 | { 140 | private SkylakeClientArb cpu; 141 | public string GetConfigName() { return "Memory Bandwidth"; } 142 | 143 | private ulong lastDataReads; 144 | private ulong lastDataWrites; 145 | 146 | public MemoryBandwidth(SkylakeClientArb intelCpu) 147 | { 148 | cpu = intelCpu; 149 | } 150 | 151 | public string[] GetColumns() 152 | { 153 | return columns; 154 | } 155 | 156 | public void Initialize() 157 | { 158 | lastDataReads = 0; 159 | lastDataWrites = 0; 160 | } 161 | 162 | public MonitoringUpdateResults Update() 163 | { 164 | ulong reads = cpu.GetImcCounterDelta(0x5050, ref lastDataReads); 165 | ulong writes = cpu.GetImcCounterDelta(0x5054, ref lastDataWrites); 166 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 167 | results.unitMetrics = null; 168 | 169 | 170 | results.overallMetrics = new string[] { "Total", FormatLargeNumber(64*(reads + writes)) + "B/s" }; 171 | results.unitMetrics = new string[2][]; 172 | results.unitMetrics[0] = new string[] { "Read", FormatLargeNumber(64 * reads) + "B/s" }; 173 | results.unitMetrics[1] = new string[] { "Write", FormatLargeNumber(64 * writes) + "B/s" }; 174 | return results; 175 | } 176 | 177 | public string GetHelpText() { return ""; } 178 | public string[] columns = new string[] { "Clk", "Requests", "Requests * 64B", "Q Occupancy", "Req Latency", "Req Latency" }; 179 | } 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /Intel/SkylakeClientL3.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Runtime.InteropServices.WindowsRuntime; 5 | 6 | namespace PmcReader.Intel 7 | { 8 | public class SkylakeClientL3 : SkylakeClientUncore 9 | { 10 | /// 11 | /// Number of L3 cache coherency boxes 12 | /// 13 | public int CboCount; 14 | public NormalizedCboCounterData[] cboData; 15 | public NormalizedCboCounterData cboTotals; 16 | 17 | public SkylakeClientL3() 18 | { 19 | ulong cboConfig; 20 | architectureName = "Skylake Client L3"; 21 | 22 | // intel developer manual table 2-30 says bits 0-3 encode number of C-Box 23 | // "subtract one to determine number of CBo units" 24 | Ring0.ReadMsr(MSR_UNC_CBO_CONFIG, out cboConfig); 25 | if ((cboConfig & 0xF) == 10) CboCount = 10; 26 | else CboCount = (int)((cboConfig & 0xF) - 1); // but not for the 109000k? 27 | cboData = new NormalizedCboCounterData[CboCount]; 28 | 29 | List monitoringConfigList = new List(); 30 | monitoringConfigList.Add(new HitrateConfig(this)); 31 | monitoringConfigList.Add(new SnoopHitConfig(this)); 32 | monitoringConfigList.Add(new HitsCategoryConfig(this, "Data?", 0x80 | 0b10)); 33 | monitoringConfigList.Add(new HitsCategoryConfig(this, "Code?", 0x80 | 0b100)); 34 | monitoringConfigList.Add(new HitsCategoryConfig(this, "Modified", 0x80 | 0b1)); 35 | monitoringConfigs = monitoringConfigList.ToArray(); 36 | } 37 | 38 | public class NormalizedCboCounterData 39 | { 40 | public float ctr0; 41 | public float ctr1; 42 | public ulong ctr0Total; 43 | public ulong ctr1Total; 44 | } 45 | 46 | public void InitializeCboTotals() 47 | { 48 | if (cboTotals == null) 49 | { 50 | cboTotals = new NormalizedCboCounterData(); 51 | } 52 | 53 | cboTotals.ctr0 = 0; 54 | cboTotals.ctr1 = 0; 55 | } 56 | 57 | public void UpdateCboCounterData(uint cboIdx) 58 | { 59 | float normalizationFactor = GetNormalizationFactor((int)cboIdx); 60 | ulong ctr0 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR0_base + MSR_UNC_CBO_increment * cboIdx); 61 | ulong ctr1 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR1_base + MSR_UNC_CBO_increment * cboIdx); 62 | 63 | if (cboData[cboIdx] == null) 64 | { 65 | cboData[cboIdx] = new NormalizedCboCounterData(); 66 | } 67 | 68 | cboData[cboIdx].ctr0 = ctr0 * normalizationFactor; 69 | cboData[cboIdx].ctr1 = ctr1 * normalizationFactor; 70 | cboData[cboIdx].ctr0Total += ctr0; 71 | cboData[cboIdx].ctr1Total += ctr1; 72 | cboTotals.ctr0 += cboData[cboIdx].ctr0; 73 | cboTotals.ctr1 += cboData[cboIdx].ctr1; 74 | cboTotals.ctr0Total += ctr0; 75 | cboTotals.ctr1Total += ctr1; 76 | } 77 | 78 | public Tuple[] GetOverallCounterValues(string ctr0, string ctr1) 79 | { 80 | Tuple[] retval = new Tuple[2]; 81 | retval[0] = new Tuple(ctr0, cboTotals.ctr0); 82 | retval[1] = new Tuple(ctr1, cboTotals.ctr1); 83 | return retval; 84 | } 85 | 86 | public class HitrateConfig : MonitoringConfig 87 | { 88 | private SkylakeClientL3 cpu; 89 | public string GetConfigName() { return "L3 Hitrate"; } 90 | 91 | public HitrateConfig(SkylakeClientL3 intelCpu) 92 | { 93 | cpu = intelCpu; 94 | } 95 | 96 | public string[] GetColumns() 97 | { 98 | return columns; 99 | } 100 | 101 | public void Initialize() 102 | { 103 | cpu.EnableUncoreCounters(); 104 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 105 | { 106 | // Event 0x34 = uncore cbo cache lookup 107 | // Bit 0 = Modified state 108 | // Bit 1, 2 = Exclusive, Shared states 109 | // Bit 3 = Invalid state (miss) 110 | // Bit 4 = Read 111 | // Bit 5 = Write 112 | // Bit 6 = ??? 113 | // Bit 7 = Any 114 | // 0x34 = L3 lookups, 0xFF = all lookups 115 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx, 116 | GetUncorePerfEvtSelRegisterValue(0x34, 0x8F, false, false, true, false, 0)); 117 | 118 | // 0x34 = L3 lookups, high 4 bits = cacheable read | cacheable write | external snoop | irq/ipq 119 | // low 4 bits = M | ES | I, so select I to count misses 120 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx, 121 | GetUncorePerfEvtSelRegisterValue(0x34, 0x88, false, false, true, false, 0)); 122 | } 123 | } 124 | 125 | public MonitoringUpdateResults Update() 126 | { 127 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 128 | results.unitMetrics = new string[cpu.CboCount][]; 129 | cpu.InitializeCboTotals(); 130 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 131 | { 132 | cpu.UpdateCboCounterData(cboIdx); 133 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]); 134 | } 135 | 136 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals); 137 | results.overallCounterValues = cpu.GetOverallCounterValues("L3 Lookups", "L3 Misses"); 138 | return results; 139 | } 140 | 141 | public string[] columns = new string[] { "Item", "Hitrate", "Hit BW", "All Lookups", "I state", "Total Hit Data" }; 142 | 143 | public string GetHelpText() { return ""; } 144 | 145 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData) 146 | { 147 | return new string[] { label, 148 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)), 149 | FormatLargeNumber((counterData.ctr0 - counterData.ctr1) * 64) + "B/s", 150 | FormatLargeNumber(counterData.ctr0), 151 | FormatLargeNumber(counterData.ctr1), 152 | FormatLargeNumber((counterData.ctr0Total - counterData.ctr1Total) * 64) + "B"}; 153 | } 154 | } 155 | 156 | public class HitsCategoryConfig : MonitoringConfig 157 | { 158 | private SkylakeClientL3 cpu; 159 | private string category; 160 | private byte umask; 161 | public string GetConfigName() { return "L3 Hits, " + category; } 162 | 163 | public HitsCategoryConfig(SkylakeClientL3 intelCpu, string category, byte umask) 164 | { 165 | this.cpu = intelCpu; 166 | this.category = category; 167 | this.umask = umask; 168 | this.columns = new string[] { "Item", "Hit BW", category + " Hit BW", "% " + category + " hits" }; 169 | } 170 | 171 | public string[] GetColumns() 172 | { 173 | return columns; 174 | } 175 | 176 | public void Initialize() 177 | { 178 | cpu.EnableUncoreCounters(); 179 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 180 | { 181 | // Event 0x34 = uncore cbo cache lookup 182 | // Bit 0 = Modified state 183 | // Bit 1, 2 = Exclusive, Shared states 184 | // Bit 3 = Invalid state (miss) 185 | // Bit 4 = Read 186 | // Bit 5 = Write 187 | // Bit 6 = ??? 188 | // Bit 7 = Any 189 | // 0x34 = L3 lookups, 0xFF = all lookups 190 | 191 | // L3 hits 192 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx, 193 | GetUncorePerfEvtSelRegisterValue(0x34, 0x80 | 0b111, false, false, true, false, 0)); 194 | 195 | // Bit one (E or S?) 196 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx, 197 | GetUncorePerfEvtSelRegisterValue(0x34, umask, false, false, true, false, 0)); 198 | } 199 | } 200 | 201 | public MonitoringUpdateResults Update() 202 | { 203 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 204 | results.unitMetrics = new string[cpu.CboCount][]; 205 | cpu.InitializeCboTotals(); 206 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 207 | { 208 | cpu.UpdateCboCounterData(cboIdx); 209 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]); 210 | } 211 | 212 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals); 213 | results.overallCounterValues = cpu.GetOverallCounterValues("L3 Hits", this.category + " L3 Hits"); 214 | return results; 215 | } 216 | 217 | public string[] columns; 218 | 219 | public string GetHelpText() { return ""; } 220 | 221 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData) 222 | { 223 | return new string[] { label, 224 | FormatLargeNumber(counterData.ctr0 * 64) + "B/s", 225 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s", 226 | FormatPercentage(counterData.ctr1, counterData.ctr0)}; 227 | } 228 | } 229 | 230 | public class SnoopHitConfig : MonitoringConfig 231 | { 232 | private SkylakeClientL3 cpu; 233 | public string GetConfigName() { return "Snoop Hits"; } 234 | 235 | public SnoopHitConfig(SkylakeClientL3 intelCpu) 236 | { 237 | cpu = intelCpu; 238 | } 239 | 240 | public string[] GetColumns() 241 | { 242 | return columns; 243 | } 244 | 245 | public void Initialize() 246 | { 247 | ThreadAffinity.Set(0x1); 248 | cpu.EnableUncoreCounters(); 249 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 250 | { 251 | // CBo sent a snoop that hit a non-modified line 252 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx, 253 | GetUncorePerfEvtSelRegisterValue(0x22, 0x44, false, false, true, false, 0)); 254 | 255 | // CBo sent a snoop that hit a modified line 256 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx, 257 | GetUncorePerfEvtSelRegisterValue(0x22, 0x48, false, false, true, false, 0)); 258 | } 259 | } 260 | 261 | public MonitoringUpdateResults Update() 262 | { 263 | MonitoringUpdateResults results = new MonitoringUpdateResults(); 264 | results.unitMetrics = new string[cpu.CboCount][]; 265 | cpu.InitializeCboTotals(); 266 | ThreadAffinity.Set(0x1); 267 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) 268 | { 269 | cpu.UpdateCboCounterData(cboIdx); 270 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]); 271 | } 272 | 273 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals); 274 | return results; 275 | } 276 | 277 | public string[] columns = new string[] { "Item", "Snoop Hit BW", "Snoop Hit(M) BW", "Snoop Hit(non-M) BW", "Snoop Hits" }; 278 | 279 | public string GetHelpText() { return ""; } 280 | 281 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData) 282 | { 283 | return new string[] { label, 284 | FormatLargeNumber((counterData.ctr0 + counterData.ctr1) * 64) + "B/s", 285 | FormatLargeNumber(counterData.ctr0 * 64) + "B/s", 286 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s", 287 | FormatLargeNumber(counterData.ctr0 + counterData.ctr1)}; 288 | } 289 | } 290 | } 291 | } 292 | -------------------------------------------------------------------------------- /Intel/SkylakeClientUncore.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | 4 | namespace PmcReader.Intel 5 | { 6 | public class SkylakeClientUncore : ModernIntelCpu 7 | { 8 | public const uint MSR_UNC_PERF_GLOBAL_CTRL = 0xE01; 9 | public const uint MSR_UNC_PERF_FIXED_CTRL = 0x394; 10 | public const uint MSR_UNC_PERF_FIXED_CTR = 0x395; 11 | public const uint MSR_UNC_CBO_CONFIG = 0x396; 12 | public const uint MSR_UNC_CBO_PERFEVTSEL0_base = 0x700; 13 | public const uint MSR_UNC_CBO_PERFEVTSEL1_base = 0x701; 14 | public const uint MSR_UNC_CBO_PERFCTR0_base = 0x706; 15 | public const uint MSR_UNC_CBO_PERFCTR1_base = 0x707; 16 | public const uint MSR_UNC_ARB_PERFCTR0 = 0x3B0; 17 | public const uint MSR_UNC_ARB_PERFCTR1 = 0x3B1; 18 | public const uint MSR_UNC_ARB_PERFEVTSEL0 = 0x3B2; 19 | public const uint MSR_UNC_ARB_PERFEVTSEL1 = 0x3B3; 20 | public const uint MSR_UNC_CBO_increment = 0x10; 21 | 22 | public const ulong BAR_MASK = 0x0007FFFFF8000; 23 | public const uint DRAM_GT_REQUESTS_OFFSET = 0x5040; 24 | public const uint DRAM_IA_REQUESTS_OFFSET = 0x5044; 25 | public const uint DRAM_IO_REQUESTS_OFFSET = 0x5048; 26 | public const uint DRAM_DATA_READS_OFFSET = 0x5050; 27 | public const uint DRAM_DATA_WRITES_OFFSET = 0x5054; 28 | 29 | public ulong barAddress; 30 | 31 | public SkylakeClientUncore() 32 | { 33 | architectureName = "Skylake Client Uncore"; 34 | Ring0.ReadPciConfig(Ring0.GetPciAddress(0, 0, 0), 0x48, out uint barLo); 35 | barAddress = (ulong)barLo & BAR_MASK; 36 | } 37 | 38 | /// 39 | /// Enable skylake uncore counters, wtih overflow propagation/freezing disabled 40 | /// 41 | public void EnableUncoreCounters() 42 | { 43 | // Bit 29 - globally enable all PMU counters. 44 | // local counters still have to be individually enabled 45 | // other bits have to do with PMI or are reserved 46 | ulong enableUncoreCountersValue = 1UL << 29; 47 | Ring0.WriteMsr(MSR_UNC_PERF_GLOBAL_CTRL, enableUncoreCountersValue); 48 | 49 | // Bit 22 - locally enable fixed counter 50 | ulong enableUncoreFixedCtrValue = 1UL << 22; 51 | Ring0.WriteMsr(MSR_UNC_PERF_FIXED_CTRL, enableUncoreFixedCtrValue); 52 | } 53 | 54 | /// 55 | /// Get value to put in PERFEVTSEL register, for uncore counters 56 | /// 57 | /// Perf event 58 | /// Perf event qualification (umask) 59 | /// Edge detect 60 | /// Enable overflow forwarding 61 | /// Enable counter 62 | /// Invert cmask 63 | /// Count mask 64 | /// value to put in perfevtsel register 65 | public static ulong GetUncorePerfEvtSelRegisterValue(byte perfEvent, 66 | byte umask, 67 | bool edge, 68 | bool ovf_en, 69 | bool enable, 70 | bool invert, 71 | byte cmask) 72 | { 73 | return perfEvent | 74 | (ulong)umask << 8 | 75 | (edge ? 1UL : 0UL) << 18 | 76 | (ovf_en ? 1UL : 0UL) << 20 | 77 | (enable ? 1UL : 0UL) << 22 | 78 | (invert ? 1UL : 0UL) << 23 | 79 | (ulong)(cmask & 0xF) << 24; 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /Interop/AdvApi32.cs: -------------------------------------------------------------------------------- 1 | // From LibreHardwareMonitor 2 | // Mozilla Public License 2.0 3 | // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | // Copyright (C) LibreHardwareMonitor and Contributors 5 | // All Rights Reserved 6 | 7 | using System; 8 | using System.Runtime.InteropServices; 9 | 10 | // ReSharper disable InconsistentNaming 11 | 12 | namespace PmcReader.Interop 13 | { 14 | internal class AdvApi32 15 | { 16 | private const string DllName = "advapi32.dll"; 17 | 18 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi)] 19 | internal static extern IntPtr OpenSCManager(string lpMachineName, string lpDatabaseName, SC_MANAGER_ACCESS_MASK dwDesiredAccess); 20 | 21 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi)] 22 | [return: MarshalAs(UnmanagedType.Bool)] 23 | internal static extern bool CloseServiceHandle(IntPtr hSCObject); 24 | 25 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)] 26 | internal static extern IntPtr CreateService 27 | ( 28 | IntPtr hSCManager, 29 | string lpServiceName, 30 | string lpDisplayName, 31 | SERVICE_ACCESS_MASK dwDesiredAccess, 32 | SERVICE_TYPE dwServiceType, 33 | SERVICE_START dwStartType, 34 | SERVICE_ERROR dwErrorControl, 35 | string lpBinaryPathName, 36 | string lpLoadOrderGroup, 37 | string lpdwTagId, 38 | string lpDependencies, 39 | string lpServiceStartName, 40 | string lpPassword); 41 | 42 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)] 43 | internal static extern IntPtr OpenService(IntPtr hSCManager, string lpServiceName, SERVICE_ACCESS_MASK dwDesiredAccess); 44 | 45 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)] 46 | [return: MarshalAs(UnmanagedType.Bool)] 47 | internal static extern bool DeleteService(IntPtr hService); 48 | 49 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)] 50 | [return: MarshalAs(UnmanagedType.Bool)] 51 | internal static extern bool StartService(IntPtr hService, uint dwNumServiceArgs, string[] lpServiceArgVectors); 52 | 53 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)] 54 | [return: MarshalAs(UnmanagedType.Bool)] 55 | internal static extern bool ControlService(IntPtr hService, SERVICE_CONTROL dwControl, ref SERVICE_STATUS lpServiceStatus); 56 | 57 | [Flags] 58 | internal enum SC_MANAGER_ACCESS_MASK : uint 59 | { 60 | SC_MANAGER_CONNECT = 0x00001, 61 | SC_MANAGER_CREATE_SERVICE = 0x00002, 62 | SC_MANAGER_ENUMERATE_SERVICE = 0x00004, 63 | SC_MANAGER_LOCK = 0x00008, 64 | SC_MANAGER_QUERY_LOCK_STATUS = 0x00010, 65 | SC_MANAGER_MODIFY_BOOT_CONFIG = 0x00020, 66 | SC_MANAGER_ALL_ACCESS = 0xF003F 67 | } 68 | 69 | internal enum SERVICE_ACCESS_MASK : uint 70 | { 71 | SERVICE_QUERY_CONFIG = 0x00001, 72 | SERVICE_CHANGE_CONFIG = 0x00002, 73 | SERVICE_QUERY_STATUS = 0x00004, 74 | SERVICE_ENUMERATE_DEPENDENTS = 0x00008, 75 | SERVICE_START = 0x00010, 76 | SERVICE_STOP = 0x00020, 77 | SERVICE_PAUSE_CONTINUE = 0x00040, 78 | SERVICE_INTERROGATE = 0x00080, 79 | SERVICE_USER_DEFINED_CONTROL = 0x00100, 80 | SERVICE_ALL_ACCESS = 0xF01FF 81 | } 82 | 83 | internal enum SERVICE_TYPE : uint 84 | { 85 | SERVICE_DRIVER = 0x0000000B, 86 | SERVICE_WIN32 = 0x00000030, 87 | SERVICE_ADAPTER = 0x00000004, 88 | SERVICE_FILE_SYSTEM_DRIVER = 0x00000002, 89 | SERVICE_KERNEL_DRIVER = 0x00000001, 90 | SERVICE_RECOGNIZER_DRIVER = 0x00000008, 91 | SERVICE_WIN32_OWN_PROCESS = 0x00000010, 92 | SERVICE_WIN32_SHARE_PROCESS = 0x00000020, 93 | SERVICE_USER_OWN_PROCESS = 0x00000050, 94 | SERVICE_USER_SHARE_PROCESS = 0x00000060, 95 | SERVICE_INTERACTIVE_PROCESS = 0x00000100 96 | } 97 | 98 | internal enum SERVICE_START : uint 99 | { 100 | SERVICE_BOOT_START = 0, 101 | SERVICE_SYSTEM_START = 1, 102 | SERVICE_AUTO_START = 2, 103 | SERVICE_DEMAND_START = 3, 104 | SERVICE_DISABLED = 4 105 | } 106 | 107 | internal enum SERVICE_ERROR : uint 108 | { 109 | SERVICE_ERROR_IGNORE = 0, 110 | SERVICE_ERROR_NORMAL = 1, 111 | SERVICE_ERROR_SEVERE = 2, 112 | SERVICE_ERROR_CRITICAL = 3 113 | } 114 | 115 | internal enum SERVICE_CONTROL : uint 116 | { 117 | SERVICE_CONTROL_STOP = 1, 118 | SERVICE_CONTROL_PAUSE = 2, 119 | SERVICE_CONTROL_CONTINUE = 3, 120 | SERVICE_CONTROL_INTERROGATE = 4, 121 | SERVICE_CONTROL_SHUTDOWN = 5, 122 | SERVICE_CONTROL_PARAMCHANGE = 6, 123 | SERVICE_CONTROL_NETBINDADD = 7, 124 | SERVICE_CONTROL_NETBINDREMOVE = 8, 125 | SERVICE_CONTROL_NETBINDENABLE = 9, 126 | SERVICE_CONTROL_NETBINDDISABLE = 10, 127 | SERVICE_CONTROL_DEVICEEVENT = 11, 128 | SERVICE_CONTROL_HARDWAREPROFILECHANGE = 12, 129 | SERVICE_CONTROL_POWEREVENT = 13, 130 | SERVICE_CONTROL_SESSIONCHANGE = 14 131 | } 132 | 133 | [StructLayout(LayoutKind.Sequential, Pack = 1)] 134 | internal struct SERVICE_STATUS 135 | { 136 | public uint dwServiceType; 137 | public uint dwCurrentState; 138 | public uint dwControlsAccepted; 139 | public uint dwWin32ExitCode; 140 | public uint dwServiceSpecificExitCode; 141 | public uint dwCheckPoint; 142 | public uint dwWaitHint; 143 | } 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /Interop/KernelDriver.cs: -------------------------------------------------------------------------------- 1 | // From LibreHardwareMonitor 2 | // Mozilla Public License 2.0 3 | // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | // Copyright (C) LibreHardwareMonitor and Contributors 5 | // All Rights Reserved 6 | 7 | using System; 8 | using System.IO; 9 | using System.Runtime.InteropServices; 10 | using System.Security.AccessControl; 11 | using Microsoft.Win32.SafeHandles; 12 | 13 | namespace PmcReader.Interop 14 | { 15 | internal class KernelDriver 16 | { 17 | private readonly string _id; 18 | private SafeFileHandle _device; 19 | 20 | public int lastError; 21 | 22 | public KernelDriver(string id) 23 | { 24 | _id = id; 25 | } 26 | 27 | public bool IsOpen 28 | { 29 | get { return _device != null; } 30 | } 31 | 32 | public bool Install(string path, out string errorMessage) 33 | { 34 | IntPtr manager = AdvApi32.OpenSCManager(null, null, AdvApi32.SC_MANAGER_ACCESS_MASK.SC_MANAGER_ALL_ACCESS); 35 | if (manager == IntPtr.Zero) 36 | { 37 | errorMessage = "OpenSCManager returned zero."; 38 | return false; 39 | } 40 | 41 | IntPtr service = AdvApi32.CreateService(manager, 42 | _id, 43 | _id, 44 | AdvApi32.SERVICE_ACCESS_MASK.SERVICE_ALL_ACCESS, 45 | AdvApi32.SERVICE_TYPE.SERVICE_KERNEL_DRIVER, 46 | AdvApi32.SERVICE_START.SERVICE_DEMAND_START, 47 | AdvApi32.SERVICE_ERROR.SERVICE_ERROR_NORMAL, 48 | path, 49 | null, 50 | null, 51 | null, 52 | null, 53 | null); 54 | 55 | if (service == IntPtr.Zero) 56 | { 57 | if (Marshal.GetHRForLastWin32Error() == Kernel32.ERROR_SERVICE_EXISTS) 58 | { 59 | errorMessage = "Service already exists"; 60 | return false; 61 | } 62 | 63 | errorMessage = "CreateService returned the error: " + Marshal.GetExceptionForHR(Marshal.GetHRForLastWin32Error()).Message; 64 | AdvApi32.CloseServiceHandle(manager); 65 | return false; 66 | } 67 | 68 | if (!AdvApi32.StartService(service, 0, null)) 69 | { 70 | if (Marshal.GetHRForLastWin32Error() != Kernel32.ERROR_SERVICE_ALREADY_RUNNING) 71 | { 72 | errorMessage = "StartService returned the error: " + Marshal.GetExceptionForHR(Marshal.GetHRForLastWin32Error()).Message; 73 | AdvApi32.CloseServiceHandle(service); 74 | AdvApi32.CloseServiceHandle(manager); 75 | return false; 76 | } 77 | } 78 | 79 | AdvApi32.CloseServiceHandle(service); 80 | AdvApi32.CloseServiceHandle(manager); 81 | 82 | #if !NETSTANDARD2_0 83 | try 84 | { 85 | // restrict the driver access to system (SY) and builtin admins (BA) 86 | // TODO: replace with a call to IoCreateDeviceSecure in the driver 87 | FileSecurity fileSecurity = File.GetAccessControl(@"\\.\" + _id); 88 | fileSecurity.SetSecurityDescriptorSddlForm("O:BAG:SYD:(A;;FA;;;SY)(A;;FA;;;BA)"); 89 | File.SetAccessControl(@"\\.\" + _id, fileSecurity); 90 | } 91 | catch 92 | { } 93 | #endif 94 | errorMessage = null; 95 | return true; 96 | } 97 | 98 | public bool Open() 99 | { 100 | _device = new SafeFileHandle(Kernel32.CreateFile(@"\\.\" + _id, 0xC0000000, FileShare.ReadWrite, IntPtr.Zero, FileMode.Open, FileAttributes.Normal, IntPtr.Zero), true); 101 | if (_device.IsInvalid) 102 | { 103 | _device.Close(); 104 | _device.Dispose(); 105 | _device = null; 106 | } 107 | 108 | return _device != null; 109 | } 110 | 111 | public bool DeviceIOControl(Kernel32.IOControlCode ioControlCode, object inBuffer) 112 | { 113 | if (_device == null) 114 | return false; 115 | 116 | 117 | bool b = Kernel32.DeviceIoControl(_device, ioControlCode, inBuffer, inBuffer == null ? 0 : (uint)Marshal.SizeOf(inBuffer), null, 0, out uint _, IntPtr.Zero); 118 | return b; 119 | } 120 | 121 | public bool DeviceIOControl(Kernel32.IOControlCode ioControlCode, object inBuffer, ref T outBuffer) 122 | { 123 | if (_device == null) 124 | return false; 125 | 126 | 127 | object boxedOutBuffer = outBuffer; 128 | bool b = Kernel32.DeviceIoControl(_device, 129 | ioControlCode, 130 | inBuffer, 131 | inBuffer == null ? 0 : (uint)Marshal.SizeOf(inBuffer), 132 | boxedOutBuffer, 133 | (uint)Marshal.SizeOf(boxedOutBuffer), 134 | out uint _, 135 | IntPtr.Zero); 136 | 137 | if (!b) 138 | { 139 | int error = Marshal.GetLastWin32Error(); 140 | lastError = error; 141 | } 142 | 143 | outBuffer = (T)boxedOutBuffer; 144 | return b; 145 | } 146 | 147 | public void Close() 148 | { 149 | if (_device != null) 150 | { 151 | _device.Close(); 152 | _device.Dispose(); 153 | _device = null; 154 | } 155 | } 156 | 157 | public bool Delete() 158 | { 159 | IntPtr manager = AdvApi32.OpenSCManager(null, null, AdvApi32.SC_MANAGER_ACCESS_MASK.SC_MANAGER_ALL_ACCESS); 160 | if (manager == IntPtr.Zero) 161 | return false; 162 | 163 | 164 | IntPtr service = AdvApi32.OpenService(manager, _id, AdvApi32.SERVICE_ACCESS_MASK.SERVICE_ALL_ACCESS); 165 | if (service == IntPtr.Zero) 166 | return true; 167 | 168 | 169 | AdvApi32.SERVICE_STATUS status = new AdvApi32.SERVICE_STATUS(); 170 | AdvApi32.ControlService(service, AdvApi32.SERVICE_CONTROL.SERVICE_CONTROL_STOP, ref status); 171 | AdvApi32.DeleteService(service); 172 | AdvApi32.CloseServiceHandle(service); 173 | AdvApi32.CloseServiceHandle(manager); 174 | 175 | return true; 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /Interop/OpCode.cs: -------------------------------------------------------------------------------- 1 | // From LibreHardwareMonitor 2 | // Mozilla Public License 2.0 3 | // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | // Copyright (C) LibreHardwareMonitor and Contributors 5 | // All Rights Reserved 6 | 7 | using System; 8 | using System.Reflection; 9 | using System.Runtime.InteropServices; 10 | 11 | namespace PmcReader.Interop 12 | { 13 | internal static class OpCode 14 | { 15 | public static CpuidDelegate Cpuid; 16 | public static RdtscDelegate Rdtsc; 17 | 18 | private static IntPtr _codeBuffer; 19 | private static ulong _size; 20 | 21 | // void __stdcall cpuidex(unsigned int index, unsigned int ecxValue, 22 | // unsigned int* eax, unsigned int* ebx, unsigned int* ecx, 23 | // unsigned int* edx) 24 | // { 25 | // int info[4]; 26 | // __cpuidex(info, index, ecxValue); 27 | // *eax = info[0]; 28 | // *ebx = info[1]; 29 | // *ecx = info[2]; 30 | // *edx = info[3]; 31 | // } 32 | 33 | private static readonly byte[] CpuId32 = 34 | { 35 | 0x55, // push ebp 36 | 0x8B, 37 | 0xEC, // mov ebp, esp 38 | 0x83, 39 | 0xEC, 40 | 0x10, // sub esp, 10h 41 | 0x8B, 42 | 0x45, 43 | 0x08, // mov eax, dword ptr [ebp+8] 44 | 0x8B, 45 | 0x4D, 46 | 0x0C, // mov ecx, dword ptr [ebp+0Ch] 47 | 0x53, // push ebx 48 | 0x0F, 49 | 0xA2, // cpuid 50 | 0x56, // push esi 51 | 0x8D, 52 | 0x75, 53 | 0xF0, // lea esi, [info] 54 | 0x89, 55 | 0x06, // mov dword ptr [esi],eax 56 | 0x8B, 57 | 0x45, 58 | 0x10, // mov eax, dword ptr [eax] 59 | 0x89, 60 | 0x5E, 61 | 0x04, // mov dword ptr [esi+4], ebx 62 | 0x89, 63 | 0x4E, 64 | 0x08, // mov dword ptr [esi+8], ecx 65 | 0x89, 66 | 0x56, 67 | 0x0C, // mov dword ptr [esi+0Ch], edx 68 | 0x8B, 69 | 0x4D, 70 | 0xF0, // mov ecx, dword ptr [info] 71 | 0x89, 72 | 0x08, // mov dword ptr [eax], ecx 73 | 0x8B, 74 | 0x45, 75 | 0x14, // mov eax, dword ptr [ebx] 76 | 0x8B, 77 | 0x4D, 78 | 0xF4, // mov ecx, dword ptr [ebp-0Ch] 79 | 0x89, 80 | 0x08, // mov dword ptr [eax], ecx 81 | 0x8B, 82 | 0x45, 83 | 0x18, // mov eax, dword ptr [ecx] 84 | 0x8B, 85 | 0x4D, 86 | 0xF8, // mov ecx, dword ptr [ebp-8] 87 | 0x89, 88 | 0x08, // mov dword ptr [eax], ecx 89 | 0x8B, 90 | 0x45, 91 | 0x1C, // mov eax, dword ptr [edx] 92 | 0x8B, 93 | 0x4D, 94 | 0xFC, // mov ecx, dword ptr [ebp-4] 95 | 0x5E, // pop esi 96 | 0x89, 97 | 0x08, // mov dword ptr [eax], ecx 98 | 0x5B, // pop ebx 99 | 0xC9, // leave 100 | 0xC2, 101 | 0x18, 102 | 0x00 // ret 18h 103 | }; 104 | 105 | private static readonly byte[] CpuId64Linux = 106 | { 107 | 0x49, 108 | 0x89, 109 | 0xD2, // mov r10, rdx 110 | 0x49, 111 | 0x89, 112 | 0xCB, // mov r11, rcx 113 | 0x53, // push rbx 114 | 0x89, 115 | 0xF8, // mov eax, edi 116 | 0x89, 117 | 0xF1, // mov ecx, esi 118 | 0x0F, 119 | 0xA2, // cpuid 120 | 0x41, 121 | 0x89, 122 | 0x02, // mov dword ptr [r10], eax 123 | 0x41, 124 | 0x89, 125 | 0x1B, // mov dword ptr [r11], ebx 126 | 0x41, 127 | 0x89, 128 | 0x08, // mov dword ptr [r8], ecx 129 | 0x41, 130 | 0x89, 131 | 0x11, // mov dword ptr [r9], edx 132 | 0x5B, // pop rbx 133 | 0xC3 // ret 134 | }; 135 | 136 | private static readonly byte[] CpuId64Windows = 137 | { 138 | 0x48, 139 | 0x89, 140 | 0x5C, 141 | 0x24, 142 | 0x08, // mov qword ptr [rsp+8], rbx 143 | 0x8B, 144 | 0xC1, // mov eax, ecx 145 | 0x8B, 146 | 0xCA, // mov ecx, edx 147 | 0x0F, 148 | 0xA2, // cpuid 149 | 0x41, 150 | 0x89, 151 | 0x00, // mov dword ptr [r8], eax 152 | 0x48, 153 | 0x8B, 154 | 0x44, 155 | 0x24, 156 | 0x28, // mov rax, qword ptr [rsp+28h] 157 | 0x41, 158 | 0x89, 159 | 0x19, // mov dword ptr [r9], ebx 160 | 0x48, 161 | 0x8B, 162 | 0x5C, 163 | 0x24, 164 | 0x08, // mov rbx, qword ptr [rsp+8] 165 | 0x89, 166 | 0x08, // mov dword ptr [rax], ecx 167 | 0x48, 168 | 0x8B, 169 | 0x44, 170 | 0x24, 171 | 0x30, // mov rax, qword ptr [rsp+30h] 172 | 0x89, 173 | 0x10, // mov dword ptr [rax], edx 174 | 0xC3 // ret 175 | }; 176 | 177 | // unsigned __int64 __stdcall rdtsc() { 178 | // return __rdtsc(); 179 | // } 180 | 181 | private static readonly byte[] Rdtsc32 = 182 | { 183 | 0x0F, 184 | 0x31, // rdtsc 185 | 0xC3 // ret 186 | }; 187 | 188 | private static readonly byte[] Rdtsc64 = 189 | { 190 | 0x0F, 191 | 0x31, // rdtsc 192 | 0x48, 193 | 0xC1, 194 | 0xE2, 195 | 0x20, // shl rdx, 20h 196 | 0x48, 197 | 0x0B, 198 | 0xC2, // or rax, rdx 199 | 0xC3 // ret 200 | }; 201 | 202 | [UnmanagedFunctionPointer(CallingConvention.StdCall)] 203 | public delegate bool CpuidDelegate(uint index, uint ecxValue, out uint eax, out uint ebx, out uint ecx, out uint edx); 204 | 205 | [UnmanagedFunctionPointer(CallingConvention.StdCall)] 206 | public delegate ulong RdtscDelegate(); 207 | 208 | public static void Open() 209 | { 210 | byte[] rdTscCode; 211 | byte[] cpuidCode; 212 | if (IntPtr.Size == 4) 213 | { 214 | rdTscCode = Rdtsc32; 215 | cpuidCode = CpuId32; 216 | } 217 | else 218 | { 219 | rdTscCode = Rdtsc64; 220 | 221 | cpuidCode = CpuId64Windows; 222 | } 223 | 224 | _size = (ulong)(rdTscCode.Length + cpuidCode.Length); 225 | 226 | _codeBuffer = Kernel32.VirtualAlloc(IntPtr.Zero, 227 | (UIntPtr)_size, 228 | Kernel32.MEM.MEM_COMMIT | Kernel32.MEM.MEM_RESERVE, 229 | Kernel32.PAGE.PAGE_EXECUTE_READWRITE); 230 | 231 | Marshal.Copy(rdTscCode, 0, _codeBuffer, rdTscCode.Length); 232 | Rdtsc = Marshal.GetDelegateForFunctionPointer(_codeBuffer, typeof(RdtscDelegate)) as RdtscDelegate; 233 | IntPtr cpuidAddress = (IntPtr)((long)_codeBuffer + rdTscCode.Length); 234 | Marshal.Copy(cpuidCode, 0, cpuidAddress, cpuidCode.Length); 235 | Cpuid = Marshal.GetDelegateForFunctionPointer(cpuidAddress, typeof(CpuidDelegate)) as CpuidDelegate; 236 | } 237 | 238 | public static void Close() 239 | { 240 | Rdtsc = null; 241 | Cpuid = null; 242 | 243 | Kernel32.VirtualFree(_codeBuffer, UIntPtr.Zero, Kernel32.MEM.MEM_RELEASE); 244 | } 245 | 246 | public static bool CpuidTx(uint index, uint ecxValue, out uint eax, out uint ebx, out uint ecx, out uint edx, ulong threadAffinityMask) 247 | { 248 | ulong mask = ThreadAffinity.Set(threadAffinityMask); 249 | if (mask == 0) 250 | { 251 | eax = ebx = ecx = edx = 0; 252 | return false; 253 | } 254 | 255 | Cpuid(index, ecxValue, out eax, out ebx, out ecx, out edx); 256 | ThreadAffinity.Set(mask); 257 | return true; 258 | } 259 | 260 | /// 261 | /// Gets the CPU manufacturer ID string, from cpuid with eax = 0 262 | /// 263 | /// Manufacturer ID string 264 | public static string GetManufacturerId() 265 | { 266 | uint eax, ecx, edx, ebx; 267 | byte[] cpuManufacturerBytes = new byte[12]; 268 | Cpuid(0, 0, out eax, out ebx, out ecx, out edx); 269 | 270 | // when you use a managed language and can't play with types 271 | cpuManufacturerBytes[0] = (byte)ebx; 272 | cpuManufacturerBytes[1] = (byte)(ebx >> 8); 273 | cpuManufacturerBytes[2] = (byte)(ebx >> 16); 274 | cpuManufacturerBytes[3] = (byte)(ebx >> 24); 275 | cpuManufacturerBytes[4] = (byte)edx; 276 | cpuManufacturerBytes[5] = (byte)(edx >> 8); 277 | cpuManufacturerBytes[6] = (byte)(edx >> 16); 278 | cpuManufacturerBytes[7] = (byte)(edx >> 24); 279 | cpuManufacturerBytes[8] = (byte)ecx; 280 | cpuManufacturerBytes[9] = (byte)(ecx >> 8); 281 | cpuManufacturerBytes[10] = (byte)(ecx >> 16); 282 | cpuManufacturerBytes[11] = (byte)(ecx >> 24); 283 | return System.Text.Encoding.ASCII.GetString(cpuManufacturerBytes); 284 | } 285 | 286 | public static void GetProcessorVersion(out byte family, out byte model, out byte stepping) 287 | { 288 | uint eax, ecx, edx, ebx; 289 | Cpuid(1, 0, out eax, out ebx, out ecx, out edx); 290 | 291 | stepping = (byte)(eax & 0xF); 292 | family = (byte)((eax >> 8) & 0xF); 293 | model = (byte)((eax >> 4) & 0xF); 294 | 295 | // wikipedia says if family id is 6 or 15, model = model + extended model id shifted left by 4 bits 296 | // extended model id starts on bit 16 297 | if (family == 6 || family == 15) 298 | { 299 | model += (byte)((eax >> 12) & 0xF0); 300 | } 301 | 302 | // if family is 15, family = family + extended family 303 | if (family == 15) 304 | { 305 | family += (byte)(eax >> 20); 306 | } 307 | } 308 | } 309 | } 310 | -------------------------------------------------------------------------------- /Interop/ThreadAffinity.cs: -------------------------------------------------------------------------------- 1 | // From LibreHardwareMonitor 2 | // Mozilla Public License 2.0 3 | // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | // Copyright (C) LibreHardwareMonitor and Contributors 5 | // All Rights Reserved 6 | 7 | using System; 8 | 9 | namespace PmcReader.Interop 10 | { 11 | internal static class ThreadAffinity 12 | { 13 | public static ulong Set(ulong mask) 14 | { 15 | if (mask == 0) 16 | return 0; 17 | 18 | UIntPtr uIntPtrMask; 19 | try 20 | { 21 | uIntPtrMask = (UIntPtr)mask; 22 | } 23 | catch (OverflowException) 24 | { 25 | throw new ArgumentOutOfRangeException(nameof(mask)); 26 | } 27 | return (ulong)Kernel32.SetThreadAffinityMask(Kernel32.GetCurrentThread(), uIntPtrMask); 28 | } 29 | } 30 | } 31 | 32 | -------------------------------------------------------------------------------- /Interop/WinRing0.sys: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/Interop/WinRing0.sys -------------------------------------------------------------------------------- /Interop/WinRing0x64.sys: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/Interop/WinRing0x64.sys -------------------------------------------------------------------------------- /Interop/winpmem_64.sys: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/Interop/winpmem_64.sys -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /PmcReader.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {E43329E2-1CCB-4276-ADBC-D292EED22170} 8 | WinExe 9 | PmcReader 10 | PmcReader 11 | v4.7.2 12 | 512 13 | true 14 | true 15 | 16 | 17 | x64 18 | true 19 | full 20 | false 21 | bin\Debug\ 22 | DEBUG;TRACE 23 | prompt 24 | 4 25 | 26 | 27 | x64 28 | pdbonly 29 | true 30 | bin\Release\ 31 | TRACE 32 | prompt 33 | 4 34 | 35 | 36 | app.manifest 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | Form 81 | 82 | 83 | HaswellForm.cs 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | HaswellForm.cs 116 | 117 | 118 | ResXFileCodeGenerator 119 | Resources.Designer.cs 120 | Designer 121 | 122 | 123 | True 124 | Resources.resx 125 | True 126 | 127 | 128 | 129 | 130 | 131 | 132 | SettingsSingleFileGenerator 133 | Settings.Designer.cs 134 | 135 | 136 | True 137 | Settings.settings 138 | True 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /PmcReader.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30011.22 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PmcReader", "PmcReader.csproj", "{E43329E2-1CCB-4276-ADBC-D292EED22170}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {E43329E2-1CCB-4276-ADBC-D292EED22170}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {E43329E2-1CCB-4276-ADBC-D292EED22170}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {E43329E2-1CCB-4276-ADBC-D292EED22170}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {E43329E2-1CCB-4276-ADBC-D292EED22170}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {C6109254-7E89-4EA3-865F-09E37919EA09} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /Program.cs: -------------------------------------------------------------------------------- 1 | using PmcReader.Interop; 2 | using System; 3 | using System.Windows.Forms; 4 | 5 | namespace PmcReader 6 | { 7 | static class Program 8 | { 9 | /// 10 | /// The main entry point for the application. 11 | /// 12 | [STAThread] 13 | static void Main() 14 | { 15 | Ring0.Open(); 16 | OpCode.Open(); 17 | Application.EnableVisualStyles(); 18 | Application.SetCompatibleTextRenderingDefault(false); 19 | Application.Run(new HaswellForm()); 20 | OpCode.Close(); 21 | Ring0.Close(); 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("PmcReader")] 9 | [assembly: AssemblyDescription("Reads CPU Performance Monitoring Events")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("PmcReader")] 13 | [assembly: AssemblyCopyright("Copyright © 2020")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("e43329e2-1ccb-4276-adbc-d292eed22170")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /Properties/Resources.Designer.cs: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // 3 | // This code was generated by a tool. 4 | // Runtime Version:4.0.30319.42000 5 | // 6 | // Changes to this file may cause incorrect behavior and will be lost if 7 | // the code is regenerated. 8 | // 9 | //------------------------------------------------------------------------------ 10 | 11 | namespace PmcReader.Properties { 12 | using System; 13 | 14 | 15 | /// 16 | /// A strongly-typed resource class, for looking up localized strings, etc. 17 | /// 18 | // This class was auto-generated by the StronglyTypedResourceBuilder 19 | // class via a tool like ResGen or Visual Studio. 20 | // To add or remove a member, edit your .ResX file then rerun ResGen 21 | // with the /str option, or rebuild your VS project. 22 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "16.0.0.0")] 23 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] 24 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] 25 | internal class Resources { 26 | 27 | private static global::System.Resources.ResourceManager resourceMan; 28 | 29 | private static global::System.Globalization.CultureInfo resourceCulture; 30 | 31 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] 32 | internal Resources() { 33 | } 34 | 35 | /// 36 | /// Returns the cached ResourceManager instance used by this class. 37 | /// 38 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 39 | internal static global::System.Resources.ResourceManager ResourceManager { 40 | get { 41 | if (object.ReferenceEquals(resourceMan, null)) { 42 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("PmcReader.Properties.Resources", typeof(Resources).Assembly); 43 | resourceMan = temp; 44 | } 45 | return resourceMan; 46 | } 47 | } 48 | 49 | /// 50 | /// Overrides the current thread's CurrentUICulture property for all 51 | /// resource lookups using this strongly typed resource class. 52 | /// 53 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 54 | internal static global::System.Globalization.CultureInfo Culture { 55 | get { 56 | return resourceCulture; 57 | } 58 | set { 59 | resourceCulture = value; 60 | } 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /Properties/Resources.resx: -------------------------------------------------------------------------------- 1 |  2 | 3 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | text/microsoft-resx 107 | 108 | 109 | 2.0 110 | 111 | 112 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 113 | 114 | 115 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 116 | 117 | -------------------------------------------------------------------------------- /Properties/Settings.Designer.cs: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // 3 | // This code was generated by a tool. 4 | // Runtime Version:4.0.30319.42000 5 | // 6 | // Changes to this file may cause incorrect behavior and will be lost if 7 | // the code is regenerated. 8 | // 9 | //------------------------------------------------------------------------------ 10 | 11 | namespace PmcReader.Properties { 12 | 13 | 14 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] 15 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "16.5.0.0")] 16 | internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase { 17 | 18 | private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings()))); 19 | 20 | public static Settings Default { 21 | get { 22 | return defaultInstance; 23 | } 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /Properties/Settings.settings: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MsrUtil 2 | Performance Counter Reader 3 | 4 | # THIS SOFTWARE IS CONSIDERED EXPERIMENTAL. OUTPUT FROM THE APPLICATION MAY BE INACCURATE. NOT ALL CPU ARCHITECTURES ARE SUPPORTED. 5 | 6 | A messy attempt at reading performance counters for various CPUs and displaying derived metrics in real time. Probably due for a rewrite/rethink of how I approach this pretty soon, whenever I have time. The current structure is a bit messy. Winring0 interface code adapted from LibreHardwareMontor at https://github.com/LibreHardwareMonitor/LibreHardwareMonitor 7 | 8 | ## Building 9 | Open the sln in Visual Studio, hit build. 10 | 11 | ## Running 12 | Right click, run as admin. It needs admin privileges to use the winring0 driver. 13 | 14 | ## Supported Platforms 15 | Every CPU has tons of performance monitoring events, and in most cases it's not practical to cover them all. CPUs have been largely supported on an ad-hoc basis whenever I (Clam) wanted to investigate performance characteristics on that platform. 16 | 17 | ### AMD, Core Events 18 | Zen 2 has the most thorough coverage. Piledriver events are also covered, though in a more limited way because of counter restrictions. 19 | Code is present for Zen 1 and 3, but testing has been minimal on those CPUs because I don't have examples of them. 20 | 21 | ### AMD, Non-Core Events 22 | Basic L3 counter support is implemented for all Zen generations, but data fabric (Infinity Fabric) support is mostly not present because those counters are largely undocumented, especially on client platforms. 23 | 24 | Piledriver's northbridge is decently well covered. 25 | ### Intel, Core Events 26 | Sandy Bridge and Haswell have the best core event coverage. Skylake and Goldmont Plus are a work in progress, with most basic events covered. On other Intel cores, I have code that can read "architectural" events (instructions, cycles, branch mispredicts, last level cache misses), but other events won't be supported. 27 | 28 | There might be some code for Alder Lake, but we don't talk about that. Because it has never been tested. 29 | 30 | ### Intel, Non-Core Events 31 | The program can read basic counters on Haswell client/HEDT and Skylake client uncores for L3 hitrate and system agent arbitration queue events. 32 | 33 | There's pretty extensive support for Sandy Bridge HEDT L3 performance counters. Sandy Bridge's Power Control Unit (PCU) can be monitored as well. 34 | 35 | ## Use of Undocumented Events 36 | In some places, I use events and unit mask combinations not explcitly documented by AMD or Intel. In some cases, I use a combination of unit mask bits that isn't directly in Intel's docs (since they provide umask values, and don't document what's selected by individual bits). Or, I set combinations of edge/count mask fields that aren't directly documented. I expect those cases to work fine. 37 | 38 | In others, I might use a completely undocumented event/umask bit, with basic testing to ensure it does count what I think it counts. I think I've marked most of these cases with a '?', but I may have missed some. 39 | 40 | Anyway, it's best to do your own verification before taking the results as truth. For example, you can verify L3 hitrate is reported correctly by reading from an array that fits within L3, and seeing that the hitrate is indeed high. 41 | 42 | ## General Disclaimer 43 | Even documented performance monitoring events may be inaccurate. There's *plenty* of errata around performance monitoring events, and they're often never fixed by the manufacturers because an incorrectly counting perf event won't cause crashes or break user programs. And inaccuracies are usually small enough to not seriously affect code optimization efforts. 44 | 45 | Also, it's good to read about the events in use in Intel/AMD's docs before interpreting them. I don't expect everyone to do this because documentation can be really hard to parse, so there are the major things to be aware of: 46 | - Cache requests and misses are generally tracked per cache line. For example, if three instructions miss L1D but requested data from the same 64B cache line, that'll count as one L1D miss/fill request in the cache hierarchy. 47 | - Many events are "speculative" meaning that counts could be triggered by instructions that are never retired (committed, or have their results made final). For example, instructions could be fetched, pass through rename/execute and cause event count increments there, but then be thrown away before retirement because they came down a mispredicted path. In some cases, similar events on AMD and Intel cannot be directly compared because one is speculative and the other is not. 48 | - Non-core events should always be considered speculative. 49 | 50 | ## Other 51 | 52 | There's testing controls under the 'Do not push these buttons' section. They may or may not work and I generally recommend avoiding them unless you really know what you're doing. They'll most likely decrease performance, and could cause weird behavior. 53 | 54 | ### Intel, Testing Controls 55 | Prefetchers can be turned on and off, using MSRs documented by Intel. Specifically: 56 | - L2 HW PF: L2 hardware prefetcher 57 | - L2 Adj PF: L2 adjacent cache line prefetcher. On a L2 miss, this prefetcher fetches an adjacent cache line as well, taking advantage of spatial locality. 58 | - L1D Adj PF: Adjacent line prefetcher for L1D misses 59 | - L1D IP PF: Instruction pointer based prefetcher that tracks the address of previous load instructions and uses that to prefetch extra cache lines. 60 | 61 | ### AMD, Testing Controls 62 | For 17h and newer CPUs (Zen stuff): 63 | - Op Cache: Can be used to disable the micro-op cache. Not documented by AMD, generally drops performance by a few percent. Use at your own risk. 64 | - Core Performance Boost: Can be used to disable Core Performance Boost, which will prevent the CPU from raising frequencies beyond base clock. Potentially useful for ensuring clock consistency when microbenchmarking, or just making your CPU more power efficient. 65 | - L1D Stream Prefetcher, L2 Stream Prefetcher: Toggles MSR bits that should request the respective prefetchers to be disabled, but I'm not sure if it works. 66 | - Set CPU Name String: Can be used to set the CPU name reported by the CPUID instruction. This can be funny, but can also cause strange behavior. Benchmark apps and CPU-Z may misidentify your CPU. Ryzen Master may think you're on a different CPU and not show your saved profiles. -------------------------------------------------------------------------------- /app.manifest: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 25 | 26 | 27 | true 28 | true 29 | 30 | 31 | 32 | 33 | 34 | 35 | 43 | 44 | 45 | 46 | 47 | --------------------------------------------------------------------------------