├── .gitattributes
├── .gitignore
├── .vs
└── PmcReader
│ ├── config
│ └── applicationhost.config
│ ├── v15
│ └── .suo
│ └── v16
│ └── TestStore
│ └── 0
│ ├── 000.testlog
│ └── testlog.manifest
├── AMD
├── Amd10hCpu.cs
├── Amd15hCpu.cs
├── Amd16hCpu.cs
├── Amd17hCpu.cs
├── Amd19hCpu.cs
├── Bulldozer.cs
├── Jaguar.cs
├── K10.cs
├── Piledriver.cs
├── PiledriverNorthbridge.cs
├── Zen.cs
├── Zen1.cs
├── Zen2.cs
├── Zen2DataFabric.cs
├── Zen2L3Cache.cs
├── Zen3.cs
├── Zen3L3Cache.cs
├── Zen4.cs
├── Zen4DataFabric.cs
├── Zen4L3Cache.cs
├── Zen5.cs
├── Zen5DataFabric.cs
├── Zen5L3Cache.cs
└── ZenL3Cache.cs
├── App.config
├── Cpu.cs
├── GenericMonitoringArea.cs
├── HaswellForm.Designer.cs
├── HaswellForm.cs
├── HaswellForm.resx
├── Intel
├── AlderLake.cs
├── AlderLakeL3.cs
├── AlderLakeUncore.cs
├── GoldmontPlus.cs
├── Haswell.cs
├── HaswellClientArb.cs
├── HaswellClientL3.cs
├── HaswellClientUncore.cs
├── HaswellEL3.cs
├── MeteorLake.cs
├── MeteorLakeArb.cs
├── MeteorLakeL3.cs
├── MeteorLakeUncore.cs
├── ModernIntelCpu.cs
├── SandyBridge.cs
├── SandyBridgeEL3.cs
├── SandyBridgeUncore.cs
├── Skylake.cs
├── SkylakeClientArb.cs
├── SkylakeClientL3.cs
└── SkylakeClientUncore.cs
├── Interop
├── AdvApi32.cs
├── Kernel32.cs
├── KernelDriver.cs
├── OpCode.cs
├── Ring0.cs
├── ThreadAffinity.cs
├── WinRing0.sys
├── WinRing0x64.sys
└── winpmem_64.sys
├── LICENSE
├── PmcReader.csproj
├── PmcReader.sln
├── Program.cs
├── Properties
├── AssemblyInfo.cs
├── Resources.Designer.cs
├── Resources.resx
├── Settings.Designer.cs
└── Settings.settings
├── README.md
└── app.manifest
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/
2 | obj/
3 | .vs/
--------------------------------------------------------------------------------
/.vs/PmcReader/v15/.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/.vs/PmcReader/v15/.suo
--------------------------------------------------------------------------------
/.vs/PmcReader/v16/TestStore/0/000.testlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/.vs/PmcReader/v16/TestStore/0/000.testlog
--------------------------------------------------------------------------------
/.vs/PmcReader/v16/TestStore/0/testlog.manifest:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/.vs/PmcReader/v16/TestStore/0/testlog.manifest
--------------------------------------------------------------------------------
/AMD/Amd10hCpu.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 | using System.Windows.Forms;
4 | using System.Drawing;
5 |
6 | namespace PmcReader.AMD
7 | {
8 | public class Amd10hCpu : GenericMonitoringArea
9 | {
10 | public const uint MSR_TSC = 0x00000010;
11 | public const uint MSR_PERF_CTR_0 = 0xC0010004;
12 | public const uint MSR_PERF_CTR_1 = 0xC0010005;
13 | public const uint MSR_PERF_CTR_2 = 0xC0010006;
14 | public const uint MSR_PERF_CTR_3 = 0xC0010007;
15 | public const uint MSR_PERF_CTL_0 = 0xC0010000;
16 | public const uint MSR_PERF_CTL_1 = 0xC0010001;
17 | public const uint MSR_PERF_CTL_2 = 0xC0010002;
18 | public const uint MSR_PERF_CTL_3 = 0xC0010003;
19 |
20 | public const uint HWCR = 0xC0010015;
21 |
22 | public NormalizedCoreCounterData[] NormalizedThreadCounts;
23 | public NormalizedCoreCounterData NormalizedTotalCounts;
24 | private ulong[] lastThreadTsc;
25 |
26 | public Amd10hCpu()
27 | {
28 | architectureName = "AMD 10h Family";
29 | lastThreadTsc = new ulong[GetThreadCount()];
30 | }
31 |
32 | ///
33 | /// Program core perf counters
34 | ///
35 | /// Counter 0 event select
36 | /// Counter 1 event select
37 | /// Counter 2 event select
38 | /// Counter 3 event select
39 | public void ProgramPerfCounters(ulong ctr0, ulong ctr1, ulong ctr2, ulong ctr3)
40 | {
41 | for (int threadIdx = 0; threadIdx < this.GetThreadCount(); threadIdx++)
42 | {
43 | ThreadAffinity.Set(1UL << threadIdx);
44 | Ring0.WriteMsr(MSR_PERF_CTL_0, ctr0);
45 | Ring0.WriteMsr(MSR_PERF_CTL_1, ctr1);
46 | Ring0.WriteMsr(MSR_PERF_CTL_2, ctr2);
47 | Ring0.WriteMsr(MSR_PERF_CTL_3, ctr3);
48 | }
49 | }
50 |
51 | ///
52 | /// Update fixed counters for thread, affinity must be set going in
53 | ///
54 | /// thread to update fixed counters for
55 | public void ReadFixedCounters(int threadIdx, out ulong elapsedTsc)
56 | {
57 | ulong tsc;
58 | Ring0.ReadMsr(MSR_TSC, out tsc);
59 |
60 | elapsedTsc = tsc;
61 | if (tsc > lastThreadTsc[threadIdx])
62 | elapsedTsc = tsc - lastThreadTsc[threadIdx];
63 | else if (lastThreadTsc[threadIdx] > 0)
64 | elapsedTsc = tsc + (0xFFFFFFFFFFFFFFFF - lastThreadTsc[threadIdx]);
65 |
66 | lastThreadTsc[threadIdx] = tsc;
67 | }
68 |
69 | ///
70 | /// initialize/reset accumulated totals for core counter data
71 | ///
72 | public void InitializeCoreTotals()
73 | {
74 | if (NormalizedTotalCounts == null)
75 | {
76 | NormalizedTotalCounts = new NormalizedCoreCounterData();
77 | }
78 |
79 | NormalizedTotalCounts.tsc = 0;
80 | NormalizedTotalCounts.ctr0 = 0;
81 | NormalizedTotalCounts.ctr1 = 0;
82 | NormalizedTotalCounts.ctr2 = 0;
83 | NormalizedTotalCounts.ctr3 = 0;
84 | }
85 |
86 | ///
87 | /// Read and update counter data for thread
88 | ///
89 | /// Thread to set affinity to
90 | public void UpdateThreadCoreCounterData(int threadIdx)
91 | {
92 | ThreadAffinity.Set(1UL << threadIdx);
93 | float normalizationFactor = GetNormalizationFactor(threadIdx);
94 | ulong tsc;
95 | ulong ctr0, ctr1, ctr2, ctr3;
96 | ReadFixedCounters(threadIdx, out tsc);
97 | ctr0 = ReadAndClearMsr(MSR_PERF_CTR_0);
98 | ctr1 = ReadAndClearMsr(MSR_PERF_CTR_1);
99 | ctr2 = ReadAndClearMsr(MSR_PERF_CTR_2);
100 | ctr3 = ReadAndClearMsr(MSR_PERF_CTR_3);
101 |
102 | if (NormalizedThreadCounts == null) NormalizedThreadCounts = new NormalizedCoreCounterData[threadCount];
103 | if (NormalizedThreadCounts[threadIdx] == null) NormalizedThreadCounts[threadIdx] = new NormalizedCoreCounterData();
104 |
105 | if (NormalizedThreadCounts[threadIdx].NormalizationFactor != 0.0f)
106 | {
107 | NormalizedThreadCounts[threadIdx].totalctr0 += ctr0;
108 | NormalizedThreadCounts[threadIdx].totalctr1 += ctr1;
109 | NormalizedThreadCounts[threadIdx].totalctr2 += ctr2;
110 | NormalizedThreadCounts[threadIdx].totalctr3 += ctr3;
111 | NormalizedTotalCounts.totalctr0 += ctr0;
112 | NormalizedTotalCounts.totalctr1 += ctr1;
113 | NormalizedTotalCounts.totalctr2 += ctr2;
114 | NormalizedTotalCounts.totalctr3 += ctr3;
115 | }
116 |
117 | NormalizedThreadCounts[threadIdx].tsc = tsc * normalizationFactor;
118 | NormalizedThreadCounts[threadIdx].ctr0 = ctr0 * normalizationFactor;
119 | NormalizedThreadCounts[threadIdx].ctr1 = ctr1 * normalizationFactor;
120 | NormalizedThreadCounts[threadIdx].ctr2 = ctr2 * normalizationFactor;
121 | NormalizedThreadCounts[threadIdx].ctr3 = ctr3 * normalizationFactor;
122 | NormalizedThreadCounts[threadIdx].NormalizationFactor = normalizationFactor;
123 | NormalizedTotalCounts.tsc += NormalizedThreadCounts[threadIdx].tsc;
124 | NormalizedTotalCounts.ctr0 += NormalizedThreadCounts[threadIdx].ctr0;
125 | NormalizedTotalCounts.ctr1 += NormalizedThreadCounts[threadIdx].ctr1;
126 | NormalizedTotalCounts.ctr2 += NormalizedThreadCounts[threadIdx].ctr2;
127 | NormalizedTotalCounts.ctr3 += NormalizedThreadCounts[threadIdx].ctr3;
128 | }
129 |
130 | ///
131 | /// Assemble overall counter values into a Tuple of string, float array.
132 | ///
133 | /// Description for counter 0 value
134 | /// Description for counter 1 value
135 | /// Description for counter 2 value
136 | /// Description for counter 3 value
137 | /// Array to put in results object
138 | public Tuple[] GetOverallCounterValues(string ctr0, string ctr1, string ctr2, string ctr3)
139 | {
140 | NormalizedCoreCounterData dataToLog = this.NormalizedTotalCounts;
141 | if (this.targetLogCoreIndex >= 0)
142 | {
143 | dataToLog = NormalizedThreadCounts[this.targetLogCoreIndex];
144 | }
145 |
146 | Tuple[] retval = new Tuple[5];
147 | retval[0] = new Tuple("TSC", dataToLog.tsc);
148 | retval[1] = new Tuple(ctr0, dataToLog.ctr0);
149 | retval[2] = new Tuple(ctr1, dataToLog.ctr1);
150 | retval[3] = new Tuple(ctr2, dataToLog.ctr2);
151 | retval[4] = new Tuple(ctr3, dataToLog.ctr3);
152 | return retval;
153 | }
154 |
155 | ///
156 | /// Get perf ctl value assuming default values for stupid stuff
157 | ///
158 | /// Perf event, low 16 bits
159 | /// Unit mask
160 | /// only increment on transition
161 | /// count mask
162 | /// Perf event, high 8 bits
163 | ///
164 | public static ulong GetPerfCtlValue(byte perfEvent, byte umask, bool edge, byte cmask, byte perfEventHi)
165 | {
166 | return GetPerfCtlValue(perfEvent,
167 | umask,
168 | OsUsrMode.All,
169 | edge,
170 | interrupt: false,
171 | enable: true,
172 | invert: false,
173 | cmask,
174 | perfEventHi,
175 | HostGuestOnly.All);
176 | }
177 |
178 | ///
179 | /// Get core perf ctl value
180 | ///
181 | /// Low 16 bits of performance event
182 | /// perf event umask
183 | /// Count in os or user mode
184 | /// only increment on transition
185 | /// generate apic interrupt on overflow
186 | /// enable perf ctr
187 | /// invert cmask
188 | /// 0 = increment by event count. >0 = increment by 1 if event count in clock cycle >= cmask
189 | /// high 4 bits of performance event
190 | /// Count host or guest events
191 | /// value for perf ctl msr
192 | public static ulong GetPerfCtlValue(byte perfEvent, byte umask, OsUsrMode osUsrMode, bool edge, bool interrupt, bool enable, bool invert, byte cmask, byte perfEventHi, HostGuestOnly hostGuestOnly)
193 | {
194 | return perfEvent |
195 | (ulong)umask << 8 |
196 | ((ulong)osUsrMode) << 16 |
197 | (edge ? 1UL : 0UL) << 18 |
198 | (interrupt ? 1UL : 0UL) << 20 |
199 | (enable ? 1UL : 0UL) << 22 |
200 | (invert ? 1UL : 0UL) << 23 |
201 | (ulong)cmask << 24 |
202 | (ulong)perfEventHi << 32 |
203 | ((ulong)hostGuestOnly) << 40;
204 | }
205 |
206 | ///
207 | /// Selects what ring(s) events are counted for
208 | ///
209 | public enum OsUsrMode
210 | {
211 | None = 0b00,
212 | Usr = 0b01,
213 | OS = 0b10,
214 | All = 0b11
215 | }
216 |
217 | ///
218 | /// Whether to count events for guest (VM) or host
219 | ///
220 | public enum HostGuestOnly
221 | {
222 | All = 0b00,
223 | Guest = 0b01,
224 | Host = 0b10,
225 | AllSvme = 0b11
226 | }
227 |
228 | public class NormalizedCoreCounterData
229 | {
230 | ///
231 | /// Time stamp counter
232 | /// Increments at P0 frequency
233 | ///
234 | public float tsc;
235 |
236 | ///
237 | /// Programmable performance counter values
238 | ///
239 | public float ctr0;
240 | public float ctr1;
241 | public float ctr2;
242 | public float ctr3;
243 |
244 | public float NormalizationFactor;
245 |
246 | public ulong totalctr0;
247 | public ulong totalctr1;
248 | public ulong totalctr2;
249 | public ulong totalctr3;
250 | }
251 | }
252 | }
253 |
--------------------------------------------------------------------------------
/AMD/Jaguar.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 | using PmcReader.Interop;
3 |
4 | namespace PmcReader.AMD
5 | {
6 | public class Jaguar : Amd16hCpu
7 | {
8 | public Jaguar()
9 | {
10 | List configs = new List();
11 | configs.Add(new BpuMonitoringConfig(this));
12 | monitoringConfigs = configs.ToArray();
13 | architectureName = "Jaguar";
14 | }
15 |
16 | public class BpuMonitoringConfig : MonitoringConfig
17 | {
18 | private Jaguar cpu;
19 | public string GetConfigName() { return "Branch Prediction"; }
20 |
21 | public BpuMonitoringConfig(Jaguar amdCpu)
22 | {
23 | cpu = amdCpu;
24 | }
25 |
26 | public string[] GetColumns() { return columns; }
27 |
28 | public void Initialize()
29 | {
30 | cpu.ProgramCorePerfCounters(
31 | GetPerfCtlValue(0xC0, 0, false, 0, 0), // ret instr
32 | GetPerfCtlValue(0x76, 0, false, 0, 0), // cycles
33 | GetPerfCtlValue(0xC2, 0, false, 0, 0), // ret branch
34 | GetPerfCtlValue(0xC3, 0, false, 0, 0)); // ret misp branch
35 | }
36 |
37 | public MonitoringUpdateResults Update()
38 | {
39 | MonitoringUpdateResults results = new MonitoringUpdateResults();
40 | results.unitMetrics = new string[cpu.GetThreadCount()][];
41 | cpu.InitializeCoreTotals();
42 | for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++)
43 | {
44 | cpu.UpdateThreadCoreCounterData(threadIdx);
45 | results.unitMetrics[threadIdx] = computeMetrics("Thread " + threadIdx, cpu.NormalizedThreadCounts[threadIdx]);
46 | }
47 |
48 | results.overallMetrics = computeMetrics("Overall", cpu.NormalizedTotalCounts);
49 | results.overallCounterValues = cpu.GetOverallCounterValues("Instructions", "Cycles", "Retired Branches", "Retired Mispredicted Branches");
50 | return results;
51 | }
52 |
53 | public string[] columns = new string[] { "Item", "Active Cycles", "Instructions", "IPC", "BPU Acc", "Branch MPKI", "% Branches" };
54 |
55 | public string GetHelpText()
56 | {
57 | return "aaaaaa";
58 | }
59 |
60 | private string[] computeMetrics(string label, NormalizedCoreCounterData counterData)
61 | {
62 | float instr = counterData.ctr0;
63 | float cycles = counterData.ctr1;
64 | return new string[] { label,
65 | FormatLargeNumber(cycles),
66 | FormatLargeNumber(instr),
67 | string.Format("{0:F2}", instr / cycles),
68 | FormatPercentage(counterData.ctr2 - counterData.ctr3, counterData.ctr2),
69 | string.Format("{0:F2}", 1000 * counterData.ctr3 / instr),
70 | FormatPercentage(counterData.ctr3, instr)
71 | };
72 | }
73 | }
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/AMD/Zen.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.InteropServices.WindowsRuntime;
3 | using PmcReader.Interop;
4 |
5 | namespace PmcReader.AMD
6 | {
7 | public class Zen : Amd17hCpu
8 | {
9 | public Zen()
10 | {
11 | monitoringConfigs = new MonitoringConfig[1];
12 | monitoringConfigs[0] = new BpuMonitoringConfig(this);
13 | architectureName = "Zen 1";
14 | }
15 |
16 | public class BpuMonitoringConfig : MonitoringConfig
17 | {
18 | private Zen cpu;
19 | public string GetConfigName() { return "Branch Prediction and Fusion"; }
20 |
21 | public BpuMonitoringConfig(Zen amdCpu)
22 | {
23 | cpu = amdCpu;
24 | }
25 |
26 | public string[] GetColumns()
27 | {
28 | return columns;
29 | }
30 |
31 | public void Initialize()
32 | {
33 | cpu.EnablePerformanceCounters();
34 | for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++)
35 | {
36 | ThreadAffinity.Set(1UL << threadIdx);
37 | // Set PERF_CTR0 to count retired branches
38 | Ring0.WriteMsr(MSR_PERF_CTL_0, GetPerfCtlValue(0xC2, 0, true, true, false, false, true, false, 0, 0, false, false));
39 |
40 | // PERF_CTR1 = mispredicted retired branches
41 | Ring0.WriteMsr(MSR_PERF_CTL_1, GetPerfCtlValue(0xC3, 0, true, true, false, false, true, false, 0, 0, false, false));
42 |
43 | // PERF_CTR2 = retired instrs
44 | Ring0.WriteMsr(MSR_PERF_CTL_2, GetPerfCtlValue(0xC0, 0, true, true, false, false, true, false, 0, 0, false, false));
45 |
46 | // PERF_CTR3 = cycles not in halt
47 | Ring0.WriteMsr(MSR_PERF_CTL_3, GetPerfCtlValue(0x76, 0, true, true, false, false, true, false, 0, 0, false, false));
48 |
49 | // PERF_CTR4 = decoder overrides existing prediction
50 | Ring0.WriteMsr(MSR_PERF_CTL_4, GetPerfCtlValue(0x91, 0, true, true, false, false, true, false, 0, 0, false, false));
51 |
52 | // PERF_CTR5 = retired fused branch instructions
53 | Ring0.WriteMsr(MSR_PERF_CTL_5, GetPerfCtlValue(0xD0, 0, true, true, false, false, true, false, 0, 1, false, false));
54 | }
55 | }
56 |
57 | public MonitoringUpdateResults Update()
58 | {
59 | MonitoringUpdateResults results = new MonitoringUpdateResults();
60 | results.unitMetrics = new string[cpu.GetThreadCount()][];
61 | cpu.InitializeCoreTotals();
62 | for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++)
63 | {
64 | cpu.UpdateThreadCoreCounterData(threadIdx);
65 | results.unitMetrics[threadIdx] = computeMetrics("Thread " + threadIdx, cpu.NormalizedThreadCounts[threadIdx]);
66 | }
67 |
68 | results.overallMetrics = computeMetrics("Overall", cpu.NormalizedTotalCounts);
69 | results.overallCounterValues = cpu.GetOverallCounterValues("Retired Branches", "Retired Misp Branches", "L1 BTB Override", "L2 BTB Override", "Decoder Override", "Fused Branches");
70 | return results;
71 | }
72 |
73 | public string[] columns = new string[] { "Item", "Active Cycles", "Instructions", "IPC", "BPU Accuracy", "Branch MPKI", "Branches", "Mispredicted Branches", "Fused Branches" };
74 |
75 | public string GetHelpText()
76 | {
77 | return "";
78 | }
79 |
80 | private string[] computeMetrics(string label, NormalizedCoreCounterData counterData)
81 | {
82 |
83 | return new string[] { label,
84 | FormatLargeNumber(counterData.ctr3),
85 | FormatLargeNumber(counterData.ctr2),
86 | string.Format("{0:F2}", counterData.ctr2 / counterData.ctr3),
87 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)),
88 | string.Format("{0:F2}", counterData.ctr1 / counterData.ctr3 * 1000),
89 | FormatLargeNumber(counterData.ctr0),
90 | FormatLargeNumber(counterData.ctr1),
91 | FormatLargeNumber(counterData.ctr5)};
92 | }
93 | }
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/AMD/Zen2DataFabric.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using PmcReader.Interop;
4 |
5 | namespace PmcReader.AMD
6 | {
7 | public class Zen2DataFabric : Amd17hCpu
8 | {
9 | public enum DfType
10 | {
11 | Client = 0,
12 | DestkopThreadripper = 1,
13 | Server = 2
14 | }
15 | public Zen2DataFabric(DfType dfType)
16 | {
17 | architectureName = "Zen 2 Data Fabric";
18 | List monitoringConfigList = new List();
19 | if (dfType == DfType.Client) monitoringConfigList.Add(new ClientBwConfig(this));
20 | else if (dfType == DfType.DestkopThreadripper) monitoringConfigList.Add(new TrDramBwConfig(this));
21 | monitoringConfigs = monitoringConfigList.ToArray();
22 | }
23 |
24 | public class TrDramBwConfig : MonitoringConfig
25 | {
26 | private Zen2DataFabric dataFabric;
27 | private long lastUpdateTime;
28 | private const int monitoringThread = 1;
29 |
30 | public string[] columns = new string[] { "Item", "DRAM BW" };
31 | public string GetHelpText() { return ""; }
32 | public TrDramBwConfig(Zen2DataFabric dataFabric)
33 | {
34 | this.dataFabric = dataFabric;
35 | }
36 |
37 | public string GetConfigName() { return "TR DRAM Bandwidth?"; }
38 | public string[] GetColumns() { return columns; }
39 | public void Initialize()
40 | {
41 | // Undocumented data fabric mentioned in prelimary PPR, but removed in the latest one
42 | // prelimary PPR suggests calculating DRAM bandwidth by adding up all these events and
43 | // multiplying by 64
44 | // These four are always zero on the 3950X. Possibly for quad channel?
45 | /*ulong mysteryDramBytes7 = 0x00000001004038C7;
46 | ulong mysteryDramBytes6 = 0x0000000100403887; */
47 | // ulong mysteryDramBytes1 = 0x0000000000403847;
48 | //ulong mysteryDramBytes0 = 0x0000000000403807;
49 |
50 | // Nemes says these four return counts on her TR
51 | ulong mysteryDramBytes5 = 0x0000000100403847;
52 | ulong mysteryDramBytes4 = 0x0000000100403807;
53 | ulong mysteryDramBytes3 = 0x00000000004038C7;
54 | ulong mysteryDramBytes2 = 0x0000000000403887;
55 |
56 | ThreadAffinity.Set(1UL << monitoringThread);
57 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, mysteryDramBytes4);
58 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, mysteryDramBytes5);
59 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, mysteryDramBytes2);
60 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, mysteryDramBytes3);
61 |
62 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds();
63 | }
64 |
65 | public MonitoringUpdateResults Update()
66 | {
67 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime);
68 | MonitoringUpdateResults results = new MonitoringUpdateResults();
69 | results.unitMetrics = new string[4][];
70 | ThreadAffinity.Set(1UL << monitoringThread);
71 | ulong mysteryDramBytes4 = ReadAndClearMsr(MSR_DF_PERF_CTR_0) * 64;
72 | ulong mysteryDramBytes5 = ReadAndClearMsr(MSR_DF_PERF_CTR_1) * 64;
73 | ulong mysteryDramBytes2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2) * 64;
74 | ulong mysteryDramBytes3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3) * 64;
75 |
76 | results.unitMetrics[0] = new string[] { "DF Evt 0x87 Umask 0x38", FormatLargeNumber(mysteryDramBytes2 * normalizationFactor) + "B/s" };
77 | results.unitMetrics[1] = new string[] { "DF Evt 0xC7 Umask 0x38", FormatLargeNumber(mysteryDramBytes3 * normalizationFactor) + "B/s" };
78 | results.unitMetrics[2] = new string[] { "DF Evt 0x107 Umask 0x38", FormatLargeNumber(mysteryDramBytes4 * normalizationFactor) + "B/s" };
79 | results.unitMetrics[3] = new string[] { "DF Evt 0x147 Umask 0x38", FormatLargeNumber(mysteryDramBytes5 * normalizationFactor) + "B/s" };
80 |
81 | results.overallMetrics = new string[] { "Overall",
82 | FormatLargeNumber((mysteryDramBytes4 + mysteryDramBytes5 + mysteryDramBytes2 + mysteryDramBytes3) * normalizationFactor) + "B/s" };
83 | return results;
84 | }
85 | }
86 |
87 | public class ClientBwConfig : MonitoringConfig
88 | {
89 | private Zen2DataFabric dataFabric;
90 | private long lastUpdateTime;
91 | private const int monitoringThread = 1;
92 |
93 | public string[] columns = new string[] { "Item", "Count * 64B", "Count", "Pkg Pwr" };
94 | public string GetHelpText() { return ""; }
95 | public ClientBwConfig(Zen2DataFabric dataFabric)
96 | {
97 | this.dataFabric = dataFabric;
98 | }
99 |
100 | public string GetConfigName() { return "MTS/RNR DRAM Bandwidth??"; }
101 | public string[] GetColumns() { return columns; }
102 | public void Initialize()
103 | {
104 | ThreadAffinity.Set(1UL << monitoringThread);
105 | /* From experimentation, the umask seems to be laid out as:
106 | * bit 0: include NT writes, but requires bit 3 to be set???
107 | * bit 1: unknown (very low counts)
108 | * bit 2: unknown (very low counts)
109 | * bit 3: writes
110 | * bit 4: unknown (very low counts for normal reads/writes, zero counts for NT write)
111 | * bit 5: reads
112 | * bit 6: unknown (zero)
113 | * bit 7: unknown (zero)
114 | * Unit masks tested on a 3950X and 4800H
115 | * These work for events 0x7 and 0x47, which seem to correspond to the two memory channels
116 | * based on one of them reading zero if the DIMMs for one channel are pulled
117 | */
118 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, GetDFPerfCtlValue(0x7, 0x20, true, 0, 0)); // ch0 read?
119 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, GetDFPerfCtlValue(0x7, 0x8, true, 0, 0)); // ch0 write?
120 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, GetDFPerfCtlValue(0x47, 0x20, true, 0, 0));// ch1 read?
121 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, GetDFPerfCtlValue(0x47, 0x8, true, 0, 0)); // ch1 write?
122 |
123 | dataFabric.InitializeCoreTotals();
124 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds();
125 | }
126 |
127 | public MonitoringUpdateResults Update()
128 | {
129 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime);
130 | MonitoringUpdateResults results = new MonitoringUpdateResults();
131 | ThreadAffinity.Set(1UL << monitoringThread);
132 | ulong ctr0 = ReadAndClearMsr(MSR_DF_PERF_CTR_0);
133 | ulong ctr1 = ReadAndClearMsr(MSR_DF_PERF_CTR_1);
134 | ulong ctr2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2);
135 | ulong ctr3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3);
136 |
137 | dataFabric.ReadPackagePowerCounter();
138 | results.unitMetrics = new string[4][];
139 | results.unitMetrics[0] = new string[] { "Ch 0 Read?", FormatLargeNumber(ctr0 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr0 * normalizationFactor), "N/A" };
140 | results.unitMetrics[1] = new string[] { "Ch 0 Write?", FormatLargeNumber(ctr1 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr1 * normalizationFactor), "N/A" };
141 | results.unitMetrics[2] = new string[] { "Ch 1 Read?", FormatLargeNumber(ctr2 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr2 * normalizationFactor), "N/A" };
142 | results.unitMetrics[3] = new string[] { "Ch 1 Write?", FormatLargeNumber(ctr3 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr3 * normalizationFactor), "N/A" };
143 |
144 | ulong total = ctr0 + ctr1 + ctr2 + ctr3;
145 | results.overallMetrics = new string[] { "Total",
146 | FormatLargeNumber(total * normalizationFactor * 64) + "B/s",
147 | FormatLargeNumber(total * normalizationFactor),
148 | string.Format("{0:F2} W", dataFabric.NormalizedTotalCounts.watts)
149 | };
150 |
151 | results.overallCounterValues = new Tuple[5];
152 | results.overallCounterValues[0] = new Tuple("Package Power", dataFabric.NormalizedTotalCounts.watts);
153 | results.overallCounterValues[1] = new Tuple("Ch 0 Read?", ctr0);
154 | results.overallCounterValues[2] = new Tuple("Ch 0 Write?", ctr1);
155 | results.overallCounterValues[3] = new Tuple("Ch 1 Read?", ctr2);
156 | results.overallCounterValues[4] = new Tuple("Ch 1 Write?", ctr3);
157 | return results;
158 | }
159 | }
160 |
161 | public class OutboundDataConfig : MonitoringConfig
162 | {
163 | private Zen2DataFabric dataFabric;
164 | private long lastUpdateTime;
165 | private const int monitoringThread = 1;
166 |
167 | public string[] columns = new string[] { "Item", "Outbound Data BW" };
168 | public string GetHelpText() { return ""; }
169 | public OutboundDataConfig(Zen2DataFabric dataFabric)
170 | {
171 | this.dataFabric = dataFabric;
172 | }
173 |
174 | public string GetConfigName() { return "Remote Outbound Data???"; }
175 | public string[] GetColumns() { return columns; }
176 | public void Initialize()
177 | {
178 | /* from preliminary PPR */
179 | ulong mysteryOutboundBytes3 = 0x800400247;
180 | ulong mysteryOutboundBytes2 = 0x800400247; // yes the same event is mentioned twice
181 | ulong mysteryOutboundBytes1 = 0x800400207;
182 | ulong mysteryOutboundBytes0 = 0x7004002C7;
183 |
184 | ThreadAffinity.Set(1UL << monitoringThread);
185 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, mysteryOutboundBytes0);
186 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, mysteryOutboundBytes1);
187 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, mysteryOutboundBytes2);
188 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, mysteryOutboundBytes3);
189 |
190 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds();
191 | }
192 |
193 | public MonitoringUpdateResults Update()
194 | {
195 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime);
196 | MonitoringUpdateResults results = new MonitoringUpdateResults();
197 | results.unitMetrics = new string[4][];
198 | ThreadAffinity.Set(1UL << monitoringThread);
199 | ulong mysteryOutboundBytes0 = ReadAndClearMsr(MSR_DF_PERF_CTR_0) * 32;
200 | ulong mysteryOutboundBytes1 = ReadAndClearMsr(MSR_DF_PERF_CTR_1) * 32;
201 | ulong mysteryOutboundBytes2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2) * 32;
202 | ulong mysteryOutboundBytes3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3) * 32;
203 |
204 | results.unitMetrics[0] = new string[] { "DF Evt 0x7C7 Umask 0x2", FormatLargeNumber(mysteryOutboundBytes0 * normalizationFactor) + "B/s" };
205 | results.unitMetrics[1] = new string[] { "DF Evt 0x807 Umask 0x2", FormatLargeNumber(mysteryOutboundBytes1 * normalizationFactor) + "B/s" };
206 | results.unitMetrics[2] = new string[] { "DF Evt 0x847 Umask 0x2", FormatLargeNumber(mysteryOutboundBytes2 * normalizationFactor) + "B/s" };
207 | results.unitMetrics[3] = new string[] { "DF Evt 0x847 Umask 0x2", FormatLargeNumber(mysteryOutboundBytes3 * normalizationFactor) + "B/s" };
208 |
209 | results.overallMetrics = new string[] { "Overall",
210 | FormatLargeNumber((mysteryOutboundBytes0 + mysteryOutboundBytes1 + mysteryOutboundBytes2 + mysteryOutboundBytes3) * normalizationFactor) + "B/s" };
211 | return results;
212 | }
213 | }
214 | }
215 | }
216 |
--------------------------------------------------------------------------------
/AMD/Zen3L3Cache.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using PmcReader.Interop;
5 |
6 | namespace PmcReader.AMD
7 | {
8 | public class Zen3L3Cache : Amd17hCpu
9 | {
10 | // ccx -> thread id mapping. Just need one thread per ccx - we'll always sample using that thread
11 | protected Dictionary ccxSampleThreads;
12 | // ccx -> list of thread ids mapping
13 | protected Dictionary> allCcxThreads;
14 | public L3CounterData[] ccxCounterData;
15 | public L3CounterData ccxTotals;
16 |
17 | public Zen3L3Cache()
18 | {
19 | architectureName = "Zen 3 L3";
20 | ccxSampleThreads = new Dictionary();
21 | allCcxThreads = new Dictionary>();
22 | for (int threadIdx = 0; threadIdx < GetThreadCount(); threadIdx++)
23 | {
24 | int ccxIdx = Get19hCcxId(threadIdx);
25 | ccxSampleThreads[ccxIdx] = threadIdx;
26 | List ccxThreads;
27 | if (! allCcxThreads.TryGetValue(ccxIdx, out ccxThreads))
28 | {
29 | ccxThreads = new List();
30 | allCcxThreads.Add(ccxIdx, ccxThreads);
31 | }
32 |
33 | ccxThreads.Add(threadIdx);
34 | }
35 |
36 | monitoringConfigs = new MonitoringConfig[1];
37 | monitoringConfigs[0] = new HitRateLatencyConfig(this);
38 |
39 | ccxCounterData = new L3CounterData[ccxSampleThreads.Count()];
40 | ccxTotals = new L3CounterData();
41 | }
42 |
43 | public class L3CounterData
44 | {
45 | public float ctr0;
46 | public float ctr1;
47 | public float ctr2;
48 | public float ctr3;
49 | public float ctr4;
50 | public float ctr5;
51 | }
52 |
53 | public void ClearTotals()
54 | {
55 | ccxTotals.ctr0 = 0;
56 | ccxTotals.ctr1 = 0;
57 | ccxTotals.ctr2 = 0;
58 | ccxTotals.ctr3 = 0;
59 | ccxTotals.ctr4 = 0;
60 | ccxTotals.ctr5 = 0;
61 | }
62 |
63 | public void UpdateCcxL3CounterData(int ccxIdx, int threadIdx)
64 | {
65 | ThreadAffinity.Set(1UL << threadIdx);
66 | float normalizationFactor = GetNormalizationFactor(threadIdx);
67 | ulong ctr0 = ReadAndClearMsr(MSR_L3_PERF_CTR_0);
68 | ulong ctr1 = ReadAndClearMsr(MSR_L3_PERF_CTR_1);
69 | ulong ctr2 = ReadAndClearMsr(MSR_L3_PERF_CTR_2);
70 | ulong ctr3 = ReadAndClearMsr(MSR_L3_PERF_CTR_3);
71 | ulong ctr4 = ReadAndClearMsr(MSR_L3_PERF_CTR_4);
72 | ulong ctr5 = ReadAndClearMsr(MSR_L3_PERF_CTR_5);
73 |
74 | if (ccxCounterData[ccxIdx] == null) ccxCounterData[ccxIdx] = new L3CounterData();
75 | ccxCounterData[ccxIdx].ctr0 = ctr0 * normalizationFactor;
76 | ccxCounterData[ccxIdx].ctr1 = ctr1 * normalizationFactor;
77 | ccxCounterData[ccxIdx].ctr2 = ctr2 * normalizationFactor;
78 | ccxCounterData[ccxIdx].ctr3 = ctr3 * normalizationFactor;
79 | ccxCounterData[ccxIdx].ctr4 = ctr4 * normalizationFactor;
80 | ccxCounterData[ccxIdx].ctr5 = ctr5 * normalizationFactor;
81 | ccxTotals.ctr0 += ccxCounterData[ccxIdx].ctr0;
82 | ccxTotals.ctr1 += ccxCounterData[ccxIdx].ctr1;
83 | ccxTotals.ctr2 += ccxCounterData[ccxIdx].ctr2;
84 | ccxTotals.ctr3 += ccxCounterData[ccxIdx].ctr3;
85 | ccxTotals.ctr4 += ccxCounterData[ccxIdx].ctr4;
86 | ccxTotals.ctr5 += ccxCounterData[ccxIdx].ctr5;
87 | }
88 |
89 | public Tuple[] GetOverallL3CounterValues(ulong aperf, ulong mperf, ulong irperfcount, ulong tsc,
90 | string ctr0, string ctr1, string ctr2, string ctr3, string ctr4, string ctr5)
91 | {
92 | Tuple[] retval = new Tuple[10];
93 | retval[0] = new Tuple("APERF", aperf);
94 | retval[1] = new Tuple("MPERF", mperf);
95 | retval[2] = new Tuple("TSC", tsc);
96 | retval[3] = new Tuple("IRPerfCount", irperfcount);
97 | retval[4] = new Tuple(ctr0, ccxTotals.ctr0);
98 | retval[5] = new Tuple(ctr1, ccxTotals.ctr1);
99 | retval[6] = new Tuple(ctr2, ccxTotals.ctr2);
100 | retval[7] = new Tuple(ctr3, ccxTotals.ctr3);
101 | retval[8] = new Tuple(ctr4, ccxTotals.ctr4);
102 | retval[9] = new Tuple(ctr5, ccxTotals.ctr5);
103 | return retval;
104 | }
105 |
106 | public class HitRateLatencyConfig : MonitoringConfig
107 | {
108 | private Zen3L3Cache l3Cache;
109 |
110 | public HitRateLatencyConfig(Zen3L3Cache l3Cache)
111 | {
112 | this.l3Cache = l3Cache;
113 | }
114 |
115 | public string GetConfigName() { return "Hitrate and Miss Latency"; }
116 | public string[] GetColumns() { return columns; }
117 | public void Initialize()
118 | {
119 | ulong L3AccessPerfCtl = Get19hL3PerfCtlValue(0x4, 0xFF, true, 0, true, true, 0, 0b11);
120 | ulong L3MissLatencyCtl = Get19hL3PerfCtlValue(0x90, 0, true, 0, true, true, 0, 0);
121 | ulong L3MissSdpRequestPerfCtl = Get19hL3PerfCtlValue(0x9A, 0xFF, true, 0, true, true, 0, 0);
122 | ulong L3MissesForLatencyCalculation = 0x0300C00000401F9a;
123 | ulong L3Miss = 0x0300C00000400104;
124 |
125 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads)
126 | {
127 | ThreadAffinity.Set(1UL << ccxThread.Value);
128 | Ring0.WriteMsr(MSR_L3_PERF_CTL_0, L3AccessPerfCtl);
129 | Ring0.WriteMsr(MSR_L3_PERF_CTL_1, L3MissLatencyCtl);
130 | Ring0.WriteMsr(MSR_L3_PERF_CTL_2, L3MissSdpRequestPerfCtl);
131 | Ring0.WriteMsr(MSR_L3_PERF_CTL_3, L3MissesForLatencyCalculation);
132 | Ring0.WriteMsr(MSR_L3_PERF_CTL_4, L3Miss);
133 | }
134 | }
135 |
136 | public MonitoringUpdateResults Update()
137 | {
138 | MonitoringUpdateResults results = new MonitoringUpdateResults();
139 | results.unitMetrics = new string[l3Cache.ccxSampleThreads.Count()][];
140 | float[] ccxClocks = new float[l3Cache.allCcxThreads.Count()];
141 | l3Cache.ClearTotals();
142 | ulong totalAperf = 0, totalMperf = 0, totalTsc = 0, totalIrPerfCount = 0;
143 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads)
144 | {
145 | // Try to determine frequency, by getting max frequency of cores in ccx
146 | foreach (int ccxThreadIdx in l3Cache.allCcxThreads[ccxThread.Key])
147 | {
148 | ThreadAffinity.Set(1UL << ccxThreadIdx);
149 | float normalizationFactor = l3Cache.GetNormalizationFactor(l3Cache.GetThreadCount() + ccxThreadIdx);
150 | ulong aperf, mperf, tsc, irperfcount;
151 | l3Cache.ReadFixedCounters(ccxThreadIdx, out aperf, out irperfcount, out tsc, out mperf);
152 | totalAperf += aperf;
153 | totalIrPerfCount += irperfcount;
154 | totalTsc += tsc;
155 | totalMperf += mperf;
156 | float clk = tsc * ((float)aperf / mperf) * normalizationFactor;
157 | if (clk > ccxClocks[ccxThread.Key]) ccxClocks[ccxThread.Key] = clk;
158 | if (ccxThreadIdx == ccxThread.Value)
159 | {
160 | l3Cache.UpdateCcxL3CounterData(ccxThread.Key, ccxThread.Value);
161 | results.unitMetrics[ccxThread.Key] = computeMetrics("CCX " + ccxThread.Key, l3Cache.ccxCounterData[ccxThread.Key], ccxClocks[ccxThread.Key]);
162 | }
163 | }
164 | }
165 |
166 | float avgClk = 0;
167 | foreach (float ccxClock in ccxClocks) avgClk += ccxClock;
168 | avgClk /= l3Cache.allCcxThreads.Count();
169 | results.overallMetrics = computeMetrics("Overall", l3Cache.ccxTotals, avgClk);
170 | results.overallCounterValues = l3Cache.GetOverallL3CounterValues(totalAperf, totalMperf, totalIrPerfCount, totalTsc,
171 | "L3Access", "L3MissLat/16", "L3MissSdpReq", "L3MissesForLatencyCalculation", "L3Miss", "Unused");
172 | return results;
173 | }
174 |
175 | public string[] columns = new string[] { "Item", "Clk", "Hitrate", "Hit BW", "Mem Latency", "Mem Latency?", "Pend. Miss/C", "SDP Requests", "SDP Requests * 64B" };
176 |
177 | public string GetHelpText() { return ""; }
178 |
179 | private string[] computeMetrics(string label, L3CounterData counterData, float clk)
180 | {
181 | // event 0x90 counts "total cycles for all transactions divided by 16"
182 | float ccxL3MissLatency = (float)counterData.ctr1 * 16 / counterData.ctr3;
183 | float ccxL3Hitrate = (1 - (float)counterData.ctr4 / counterData.ctr0) * 100;
184 | float ccxL3HitBw = ((float)counterData.ctr0 - counterData.ctr4) * 64;
185 | return new string[] { label,
186 | FormatLargeNumber(clk),
187 | string.Format("{0:F2}%", ccxL3Hitrate),
188 | FormatLargeNumber(ccxL3HitBw) + "B/s",
189 | string.Format("{0:F1} clks", ccxL3MissLatency),
190 | string.Format("{0:F1} ns", (1000000000 / clk) * ccxL3MissLatency),
191 | string.Format("{0:F2}", counterData.ctr1 * 16 / clk),
192 | FormatLargeNumber(counterData.ctr2),
193 | FormatLargeNumber(counterData.ctr2 * 64) + "B/s"};
194 | }
195 | }
196 | }
197 | }
198 |
--------------------------------------------------------------------------------
/AMD/Zen4DataFabric.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using PmcReader.Interop;
4 |
5 | namespace PmcReader.AMD
6 | {
7 | public class Zen4DataFabric : Amd19hCpu
8 | {
9 | public enum DfType
10 | {
11 | Client = 0
12 | }
13 |
14 | public Zen4DataFabric(DfType dfType)
15 | {
16 | architectureName = "Zen 4 Data Fabric";
17 | List monitoringConfigList = new List();
18 | if (dfType == DfType.Client) monitoringConfigList.Add(new ClientBwConfig(this));
19 | monitoringConfigList.Add(new CSConfig(this));
20 | monitoringConfigList.Add(new UMCConfig(this));
21 | monitoringConfigs = monitoringConfigList.ToArray();
22 | }
23 |
24 | public class ClientBwConfig : MonitoringConfig
25 | {
26 | private Zen4DataFabric dataFabric;
27 | private long lastUpdateTime;
28 | private const int monitoringThread = 1;
29 |
30 | public string[] columns = new string[] { "Item", "Count * 64B", "Count", "Pkg Pwr" };
31 | public string GetHelpText() { return ""; }
32 | public ClientBwConfig(Zen4DataFabric dataFabric)
33 | {
34 | this.dataFabric = dataFabric;
35 | // does not work
36 | // dataFabric.GetUmcPerfmonInfo(out uint umcCount, out uint umcPerfcounterCount);
37 | // Console.WriteLine("Have {0} UMCs, {1} perf counters", umcCount, umcPerfcounterCount);
38 | }
39 |
40 | public string GetConfigName() { return "DRAM Bandwidth??"; }
41 | public string[] GetColumns() { return columns; }
42 | public void Initialize()
43 | {
44 | ThreadAffinity.Set(1UL << monitoringThread);
45 | ulong evt0 = GetDramPerfEvent(true, 0);
46 | ulong evt1 = GetDramPerfEvent(true, 0) + 0x20;
47 | ulong evt2 = GetDramPerfEvent(false, 11);
48 | ulong evt3 = GetDramPerfEvent(false, 0);
49 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, evt0); // ch0 read?
50 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, evt1); // ch0 write?
51 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, evt2);// ch1 read?
52 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, evt3); // ch1 write?
53 |
54 | dataFabric.InitializeCoreTotals();
55 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds();
56 | }
57 |
58 | private ulong GetDramPerfEvent(bool read, uint index)
59 | {
60 | ulong dramEventBase = 0x740F00F;
61 | if (read) dramEventBase |= 0xE00;
62 | else dramEventBase |= 0xF00;
63 |
64 | index = index * 4 + 1;
65 | dramEventBase |= (index & 0xF) << 4;
66 | dramEventBase |= (index & 0xF0) << 28;
67 | return dramEventBase;
68 | }
69 |
70 | public MonitoringUpdateResults Update()
71 | {
72 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime);
73 | MonitoringUpdateResults results = new MonitoringUpdateResults();
74 | ThreadAffinity.Set(1UL << monitoringThread);
75 | ulong ctr0 = ReadAndClearMsr(MSR_DF_PERF_CTR_0);
76 | ulong ctr1 = ReadAndClearMsr(MSR_DF_PERF_CTR_1);
77 | ulong ctr2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2);
78 | ulong ctr3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3);
79 |
80 | dataFabric.ReadPackagePowerCounter();
81 | results.unitMetrics = new string[4][];
82 | results.unitMetrics[0] = new string[] { "DRAM Read?", FormatLargeNumber(ctr0 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr0 * normalizationFactor), "N/A" };
83 | results.unitMetrics[1] = new string[] { "Write 0?", FormatLargeNumber(ctr1 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr1 * normalizationFactor), "N/A" };
84 | results.unitMetrics[2] = new string[] { "iGPU Related?", FormatLargeNumber(ctr2 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr2 * normalizationFactor), "N/A" };
85 | results.unitMetrics[3] = new string[] { "Write 2?", FormatLargeNumber(ctr3 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr3 * normalizationFactor), "N/A" };
86 |
87 | ulong total = ctr0 + ctr1 + ctr2 + ctr3;
88 | results.overallMetrics = new string[] { "Total",
89 | FormatLargeNumber(total * normalizationFactor * 64) + "B/s",
90 | FormatLargeNumber(total * normalizationFactor),
91 | string.Format("{0:F2} W", dataFabric.NormalizedTotalCounts.watts)
92 | };
93 |
94 | results.overallCounterValues = new Tuple[5];
95 | results.overallCounterValues[0] = new Tuple("Package Power", dataFabric.NormalizedTotalCounts.watts);
96 | results.overallCounterValues[1] = new Tuple("Ch 0 Read?", ctr0);
97 | results.overallCounterValues[2] = new Tuple("Ch 0 Write?", ctr1);
98 | results.overallCounterValues[3] = new Tuple("Ch 1 Read?", ctr2);
99 | results.overallCounterValues[4] = new Tuple("Ch 1 Write?", ctr3);
100 | return results;
101 | }
102 | }
103 |
104 | public class CSConfig : MonitoringConfig
105 | {
106 | private Zen4DataFabric dataFabric;
107 | private long lastUpdateTime;
108 | private const int monitoringThread = 1;
109 |
110 | public string[] columns = new string[] { "Item", "Count * 64B", "Count", "Pkg Pwr" };
111 | public string GetHelpText() { return ""; }
112 | public CSConfig(Zen4DataFabric dataFabric)
113 | {
114 | this.dataFabric = dataFabric;
115 | }
116 |
117 | public string GetConfigName() { return "Coherent Station?"; }
118 | public string[] GetColumns() { return columns; }
119 | public void Initialize()
120 | {
121 | ThreadAffinity.Set(1UL << monitoringThread);
122 | ulong evt0 = GetDFPerfCtlValue(0x1f, 0, 0xfe, 0x7, true); // cs0 read
123 | ulong evt1 = GetDFPerfCtlValue(0x5f, 0, 0xfe, 0x7, true); // cs1 read
124 | ulong evt2 = GetDFPerfCtlValue(0x1f, 0, 0xff, 0x7, true); // cs0 write
125 | ulong evt3 = GetDFPerfCtlValue(0x5f, 0, 0xff, 0x7, true); // cs1 write
126 | Ring0.WriteMsr(MSR_DF_PERF_CTL_0, evt0);
127 | Ring0.WriteMsr(MSR_DF_PERF_CTL_1, evt1);
128 | Ring0.WriteMsr(MSR_DF_PERF_CTL_2, evt2);
129 | Ring0.WriteMsr(MSR_DF_PERF_CTL_3, evt3);
130 |
131 | dataFabric.InitializeCoreTotals();
132 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds();
133 | }
134 |
135 | public MonitoringUpdateResults Update()
136 | {
137 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime);
138 | MonitoringUpdateResults results = new MonitoringUpdateResults();
139 | ThreadAffinity.Set(1UL << monitoringThread);
140 | ulong ctr0 = ReadAndClearMsr(MSR_DF_PERF_CTR_0);
141 | ulong ctr1 = ReadAndClearMsr(MSR_DF_PERF_CTR_1);
142 | ulong ctr2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2);
143 | ulong ctr3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3);
144 |
145 | dataFabric.ReadPackagePowerCounter();
146 | results.unitMetrics = new string[4][];
147 | results.unitMetrics[0] = new string[] { "CS0 Read", FormatLargeNumber(ctr0 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr0 * normalizationFactor), "N/A" };
148 | results.unitMetrics[1] = new string[] { "CS1 Read", FormatLargeNumber(ctr1 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr1 * normalizationFactor), "N/A" };
149 | results.unitMetrics[2] = new string[] { "CS0 Write", FormatLargeNumber(ctr2 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr2 * normalizationFactor), "N/A" };
150 | results.unitMetrics[3] = new string[] { "CS1 Write", FormatLargeNumber(ctr3 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr3 * normalizationFactor), "N/A" };
151 |
152 | ulong total = ctr0 + ctr1 + ctr2 + ctr3;
153 | results.overallMetrics = new string[] { "Total",
154 | FormatLargeNumber(total * normalizationFactor * 64) + "B/s",
155 | FormatLargeNumber(total * normalizationFactor),
156 | string.Format("{0:F2} W", dataFabric.NormalizedTotalCounts.watts)
157 | };
158 |
159 | results.overallCounterValues = new Tuple[5];
160 | results.overallCounterValues[0] = new Tuple("Package Power", dataFabric.NormalizedTotalCounts.watts);
161 | results.overallCounterValues[1] = new Tuple("Ch 0 Read?", ctr0);
162 | results.overallCounterValues[2] = new Tuple("Ch 0 Write?", ctr1);
163 | results.overallCounterValues[3] = new Tuple("Ch 1 Read?", ctr2);
164 | results.overallCounterValues[4] = new Tuple("Ch 1 Write?", ctr3);
165 | return results;
166 | }
167 | }
168 |
169 | public class UMCConfig : MonitoringConfig
170 | {
171 | private Zen4DataFabric dataFabric;
172 | private long lastUpdateTime;
173 | private const int monitoringThread = 1;
174 |
175 | public string[] columns = new string[] { "Item", "Count * 64B", "Count", "Pkg Pwr" };
176 | public string GetHelpText() { return ""; }
177 | public UMCConfig(Zen4DataFabric dataFabric)
178 | {
179 | this.dataFabric = dataFabric;
180 | }
181 |
182 | public string GetConfigName() { return "UMC?"; }
183 | public string[] GetColumns() { return columns; }
184 | public void Initialize()
185 | {
186 | ThreadAffinity.Set(1UL << monitoringThread);
187 |
188 | ulong hwcrValue;
189 | Ring0.ReadMsr(HWCR, out hwcrValue);
190 | hwcrValue |= 1UL << 30; // instructions retired counter
191 | hwcrValue |= 1UL << 31; // enable UMC counters
192 | Ring0.WriteMsr(HWCR, hwcrValue);
193 | Ring0.ReadMsr(HWCR, out hwcrValue);
194 |
195 | ulong evt0 = GetUmcPerfCtlValue(0, false, false); // clk
196 | ulong evt1 = GetUmcPerfCtlValue(0xa, false, false); // cas
197 | ulong evt2 = GetUmcPerfCtlValue(0x5, false, false); // activate
198 | ulong evt3 = GetUmcPerfCtlValue(0x6, false, false); // precharge
199 | Ring0.WriteMsr(MSR_UMC_PERF_CTL_base, evt0);
200 | Ring0.WriteMsr(MSR_UMC_PERF_CTL_base + MSR_UMC_PERF_increment, evt1);
201 | Ring0.WriteMsr(MSR_UMC_PERF_CTL_base + MSR_UMC_PERF_increment * 2, evt2);
202 | Ring0.WriteMsr(MSR_UMC_PERF_CTL_base + MSR_UMC_PERF_increment * 3, evt3);
203 |
204 | dataFabric.InitializeCoreTotals();
205 | lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds();
206 | }
207 |
208 | public MonitoringUpdateResults Update()
209 | {
210 | float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime);
211 | MonitoringUpdateResults results = new MonitoringUpdateResults();
212 | ThreadAffinity.Set(1UL << monitoringThread);
213 | ulong ctr0 = ReadAndClearMsr(MSR_UMC_PERF_CTR_base);
214 | ulong ctr1 = ReadAndClearMsr(MSR_UMC_PERF_CTR_base + MSR_UMC_PERF_increment);
215 | ulong ctr2 = ReadAndClearMsr(MSR_UMC_PERF_CTR_base + MSR_UMC_PERF_increment * 2);
216 | ulong ctr3 = ReadAndClearMsr(MSR_UMC_PERF_CTR_base + MSR_UMC_PERF_increment * 3);
217 |
218 | dataFabric.ReadPackagePowerCounter();
219 | results.unitMetrics = new string[4][];
220 | results.unitMetrics[0] = new string[] { "Clk?", FormatLargeNumber(ctr0 * normalizationFactor) + "Hz", "N/A", "N/A" };
221 | results.unitMetrics[1] = new string[] { "CS1 Read", FormatLargeNumber(ctr1 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr1 * normalizationFactor), "N/A" };
222 | results.unitMetrics[2] = new string[] { "CS0 Write", FormatLargeNumber(ctr2 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr2 * normalizationFactor), "N/A" };
223 | results.unitMetrics[3] = new string[] { "CS1 Write", FormatLargeNumber(ctr3 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr3 * normalizationFactor), "N/A" };
224 |
225 | ulong total = ctr0 + ctr1 + ctr2 + ctr3;
226 | results.overallMetrics = new string[] { "Total",
227 | FormatLargeNumber(total * normalizationFactor * 64) + "B/s",
228 | FormatLargeNumber(total * normalizationFactor),
229 | string.Format("{0:F2} W", dataFabric.NormalizedTotalCounts.watts)
230 | };
231 |
232 | results.overallCounterValues = new Tuple[5];
233 | results.overallCounterValues[0] = new Tuple("Package Power", dataFabric.NormalizedTotalCounts.watts);
234 | results.overallCounterValues[1] = new Tuple("Ch 0 Read?", ctr0);
235 | results.overallCounterValues[2] = new Tuple("Ch 0 Write?", ctr1);
236 | results.overallCounterValues[3] = new Tuple("Ch 1 Read?", ctr2);
237 | results.overallCounterValues[4] = new Tuple("Ch 1 Write?", ctr3);
238 | return results;
239 | }
240 | }
241 | }
242 | }
243 |
--------------------------------------------------------------------------------
/AMD/Zen5L3Cache.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using PmcReader.Interop;
5 |
6 | namespace PmcReader.AMD
7 | {
8 | public class Zen5L3Cache : Amd19hCpu
9 | {
10 | // ccx -> thread id mapping. Just need one thread per ccx - we'll always sample using that thread
11 | protected Dictionary ccxSampleThreads;
12 | protected Dictionary> allCcxThreads;
13 | public L3CounterData[] ccxCounterData;
14 | public L3CounterData ccxTotals;
15 |
16 | public Zen5L3Cache()
17 | {
18 | architectureName = "Zen 5 L3";
19 | ccxSampleThreads = new Dictionary();
20 | allCcxThreads = new Dictionary>();
21 | ccxSampleThreads[0] = 0;
22 | ccxSampleThreads[1] = 12;
23 | allCcxThreads[0] = new List() { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
24 | allCcxThreads[1] = new List() { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 };
25 |
26 | monitoringConfigs = new MonitoringConfig[1];
27 | monitoringConfigs[0] = new HitRateLatencyConfig(this);
28 |
29 | ccxCounterData = new L3CounterData[ccxSampleThreads.Count()];
30 | ccxTotals = new L3CounterData();
31 | }
32 |
33 | public class L3CounterData
34 | {
35 | public float ctr0;
36 | public float ctr1;
37 | public float ctr2;
38 | public float ctr3;
39 | public float ctr4;
40 | public float ctr5;
41 | }
42 |
43 | public void ClearTotals()
44 | {
45 | ccxTotals.ctr0 = 0;
46 | ccxTotals.ctr1 = 0;
47 | ccxTotals.ctr2 = 0;
48 | ccxTotals.ctr3 = 0;
49 | ccxTotals.ctr4 = 0;
50 | ccxTotals.ctr5 = 0;
51 | }
52 |
53 | public void UpdateCcxL3CounterData(int ccxIdx, int threadIdx)
54 | {
55 | ThreadAffinity.Set(1UL << threadIdx);
56 | float normalizationFactor = GetNormalizationFactor(threadIdx);
57 | ulong ctr0 = ReadAndClearMsr(MSR_L3_PERF_CTR_0);
58 | ulong ctr1 = ReadAndClearMsr(MSR_L3_PERF_CTR_1);
59 | ulong ctr2 = ReadAndClearMsr(MSR_L3_PERF_CTR_2);
60 | ulong ctr3 = ReadAndClearMsr(MSR_L3_PERF_CTR_3);
61 | ulong ctr4 = ReadAndClearMsr(MSR_L3_PERF_CTR_4);
62 | ulong ctr5 = ReadAndClearMsr(MSR_L3_PERF_CTR_5);
63 |
64 | if (ccxCounterData[ccxIdx] == null) ccxCounterData[ccxIdx] = new L3CounterData();
65 | ccxCounterData[ccxIdx].ctr0 = ctr0 * normalizationFactor;
66 | ccxCounterData[ccxIdx].ctr1 = ctr1 * normalizationFactor;
67 | ccxCounterData[ccxIdx].ctr2 = ctr2 * normalizationFactor;
68 | ccxCounterData[ccxIdx].ctr3 = ctr3 * normalizationFactor;
69 | ccxCounterData[ccxIdx].ctr4 = ctr4 * normalizationFactor;
70 | ccxCounterData[ccxIdx].ctr5 = ctr5 * normalizationFactor;
71 | ccxTotals.ctr0 += ccxCounterData[ccxIdx].ctr0;
72 | ccxTotals.ctr1 += ccxCounterData[ccxIdx].ctr1;
73 | ccxTotals.ctr2 += ccxCounterData[ccxIdx].ctr2;
74 | ccxTotals.ctr3 += ccxCounterData[ccxIdx].ctr3;
75 | ccxTotals.ctr4 += ccxCounterData[ccxIdx].ctr4;
76 | ccxTotals.ctr5 += ccxCounterData[ccxIdx].ctr5;
77 | }
78 |
79 | public Tuple[] GetOverallL3CounterValues(ulong aperf, ulong mperf, ulong irperfcount, ulong tsc,
80 | string ctr0, string ctr1, string ctr2, string ctr3, string ctr4, string ctr5)
81 | {
82 | Tuple[] retval = new Tuple[10];
83 | retval[0] = new Tuple("APERF", aperf);
84 | retval[1] = new Tuple("MPERF", mperf);
85 | retval[2] = new Tuple("TSC", tsc);
86 | retval[3] = new Tuple("IRPerfCount", irperfcount);
87 | retval[4] = new Tuple(ctr0, ccxTotals.ctr0);
88 | retval[5] = new Tuple(ctr1, ccxTotals.ctr1);
89 | retval[6] = new Tuple(ctr2, ccxTotals.ctr2);
90 | retval[7] = new Tuple(ctr3, ccxTotals.ctr3);
91 | retval[8] = new Tuple(ctr4, ccxTotals.ctr4);
92 | retval[9] = new Tuple(ctr5, ccxTotals.ctr5);
93 | return retval;
94 | }
95 |
96 | public class HitRateLatencyConfig : MonitoringConfig
97 | {
98 | private Zen5L3Cache l3Cache;
99 |
100 | public HitRateLatencyConfig(Zen5L3Cache l3Cache)
101 | {
102 | this.l3Cache = l3Cache;
103 | }
104 |
105 | public string GetConfigName() { return "Hitrate and Latency"; }
106 | public string[] GetColumns() { return columns; }
107 | public void Initialize()
108 | {
109 |
110 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads)
111 | {
112 | ThreadAffinity.Set(1UL << ccxThread.Value);
113 | InitializeThread();
114 | }
115 | }
116 |
117 | private void InitializeThread()
118 | {
119 | // L3 tag lookup state, all coherent accesses to L3
120 | ulong L3AccessPerfCtl = Get1AhL3PerfCtlValue(0x4, 0xFF, true, 0, true, true, 0, threadMask: 3);
121 | ulong L3MissPerfCtl = Get1AhL3PerfCtlValue(0x4, 1, true, 0, true, true, 0, threadMask: 3);
122 |
123 | // bit 2,3 of unit mask = near,far ccx's cache
124 | ulong L3MissLatencyOtherCacheReqs = Get19hL3PerfCtlValue(0xAD, 0b1100, true, 0, true, enableAllSlices: true, sliceId: 0x3, 0b11);
125 | ulong L3MissLatencyOtherCache = Get19hL3PerfCtlValue(0xAC, 0b1100, true, 0, true, enableAllSlices: true, sliceId: 0x3, 0b11);
126 |
127 | // bits 0,1 of unit mask = near,far dram
128 | ulong L3MissLatencyDramReqs = Get19hL3PerfCtlValue(0xAD, 0b11, true, 0, true, enableAllSlices: true, sliceId: 0x3, 0b11);
129 | ulong L3MissLatencyDram = Get19hL3PerfCtlValue(0xAC, 0b11, true, 0, true, enableAllSlices: true, sliceId: 0x3, 0b11);
130 |
131 | Ring0.WriteMsr(MSR_L3_PERF_CTL_0, L3AccessPerfCtl);
132 | Ring0.WriteMsr(MSR_L3_PERF_CTL_1, L3MissPerfCtl);
133 | Ring0.WriteMsr(MSR_L3_PERF_CTL_2, L3MissLatencyOtherCacheReqs);
134 | Ring0.WriteMsr(MSR_L3_PERF_CTL_3, L3MissLatencyOtherCache);
135 | Ring0.WriteMsr(MSR_L3_PERF_CTL_4, L3MissLatencyDramReqs);
136 | Ring0.WriteMsr(MSR_L3_PERF_CTL_5, L3MissLatencyDram);
137 |
138 | }
139 |
140 | public MonitoringUpdateResults Update()
141 | {
142 | MonitoringUpdateResults results = new MonitoringUpdateResults();
143 | results.unitMetrics = new string[l3Cache.ccxSampleThreads.Count()][];
144 | float[] ccxClocks = new float[l3Cache.allCcxThreads.Count()];
145 | l3Cache.ClearTotals();
146 | ulong totalAperf = 0, totalMperf = 0, totalTsc = 0, totalIrPerfCount = 0;
147 | List> overallCounterValues = new List>();
148 |
149 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads)
150 | {
151 | // Try to determine frequency, by getting max frequency of cores in ccx
152 | foreach (int ccxThreadIdx in l3Cache.allCcxThreads[ccxThread.Key])
153 | {
154 | ThreadAffinity.Set(1UL << ccxThreadIdx);
155 | float normalizationFactor = l3Cache.GetNormalizationFactor(l3Cache.GetThreadCount() + ccxThreadIdx);
156 | ulong aperf, mperf, tsc, irperfcount;
157 | l3Cache.ReadFixedCounters(ccxThreadIdx, out aperf, out irperfcount, out tsc, out mperf);
158 | totalAperf += aperf;
159 | totalIrPerfCount += irperfcount;
160 | totalTsc += tsc;
161 | totalMperf += mperf;
162 | float clk = tsc * ((float)aperf / mperf) * normalizationFactor;
163 | if (clk > ccxClocks[ccxThread.Key]) ccxClocks[ccxThread.Key] = clk;
164 | if (ccxThreadIdx == ccxThread.Value)
165 | {
166 | l3Cache.UpdateCcxL3CounterData(ccxThread.Key, ccxThread.Value);
167 | InitializeThread(); // somehow these get cleared every once in a while?
168 | results.unitMetrics[ccxThread.Key] = computeMetrics("CCX " + ccxThread.Key, l3Cache.ccxCounterData[ccxThread.Key], ccxClocks[ccxThread.Key]);
169 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " L3 Access", l3Cache.ccxCounterData[ccxThread.Key].ctr0));
170 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " L3 Miss", l3Cache.ccxCounterData[ccxThread.Key].ctr1));
171 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " Other CCX Sampled Reqs", l3Cache.ccxCounterData[ccxThread.Key].ctr2));
172 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " Other CCX Sampled Latency", l3Cache.ccxCounterData[ccxThread.Key].ctr3));
173 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " DRAM Sampled Reqs", l3Cache.ccxCounterData[ccxThread.Key].ctr4));
174 | overallCounterValues.Add(new Tuple("CCX" + ccxThread.Key + " DRAM Sampled Latency", l3Cache.ccxCounterData[ccxThread.Key].ctr5));
175 | }
176 | }
177 | }
178 |
179 | overallCounterValues.Add(new Tuple("APERF", totalAperf));
180 | overallCounterValues.Add(new Tuple("MPERF", totalMperf));
181 | overallCounterValues.Add(new Tuple("REF_TSC", totalTsc));
182 | overallCounterValues.Add(new Tuple("IrPerfCount", totalIrPerfCount));
183 |
184 | float avgClk = 0;
185 | foreach (float ccxClock in ccxClocks) avgClk += ccxClock;
186 | avgClk /= l3Cache.allCcxThreads.Count();
187 | results.overallMetrics = computeMetrics("Overall", l3Cache.ccxTotals, avgClk);
188 | /*results.overallCounterValues = l3Cache.GetOverallL3CounterValues(totalAperf, totalMperf, totalIrPerfCount, totalTsc,
189 | "Coherent L3 Access", "L3 Miss", "Other CCX Reqs", "Other CCX Pending Reqs Per Cycle", "DRAM Reqs", "DRAM Pending Reqs Per Cycle");*/
190 | results.overallCounterValues = overallCounterValues.ToArray();
191 | return results;
192 | }
193 |
194 | public string[] columns = new string[] { "Item", "Clk", "Hitrate", "Hit BW", "Miss BW", "Latency, Other CCX", "Latency, DRAM" };
195 |
196 | public string GetHelpText() { return ""; }
197 |
198 | private string[] computeMetrics(string label, L3CounterData counterData, float clk)
199 | {
200 | // average sampled latency is XiSampledLatency / XiSampledLatencyRequests * 10 ns
201 | float ccxL3MissLatencyNs = (float)10 * counterData.ctr3 / counterData.ctr2;
202 | float dramL3MissLatencyNs = (float)10 * counterData.ctr5 / counterData.ctr4;
203 | float ccxL3Hitrate = (1 - (float)counterData.ctr1 / counterData.ctr0) * 100;
204 | float ccxL3HitBw = ((float)counterData.ctr0 - counterData.ctr1) * 64;
205 | return new string[] { label,
206 | FormatLargeNumber(clk),
207 | string.Format("{0:F2}%", ccxL3Hitrate),
208 | FormatLargeNumber(ccxL3HitBw) + "B/s",
209 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s",
210 | string.Format("{0:F1} ns", ccxL3MissLatencyNs),
211 | string.Format("{0:F1} ns", dramL3MissLatencyNs)};
212 | }
213 | }
214 | }
215 | }
216 |
--------------------------------------------------------------------------------
/AMD/ZenL3Cache.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using PmcReader.Interop;
5 |
6 | namespace PmcReader.AMD
7 | {
8 | public class ZenL3Cache : Amd17hCpu
9 | {
10 | // ccx -> thread id mapping. Just need one thread per ccx - we'll always sample using that thread
11 | protected Dictionary ccxSampleThreads;
12 | protected Dictionary> allCcxThreads;
13 | public L3CounterData[] ccxCounterData;
14 | public L3CounterData ccxTotals;
15 |
16 | public ZenL3Cache()
17 | {
18 | architectureName = "Zen L3";
19 | ccxSampleThreads = new Dictionary();
20 | allCcxThreads = new Dictionary>();
21 | for (int threadIdx = 0; threadIdx < GetThreadCount(); threadIdx++)
22 | {
23 | int ccxIdx = GetCcxId(threadIdx);
24 | ccxSampleThreads[ccxIdx] = threadIdx;
25 | List ccxThreads;
26 | if (! allCcxThreads.TryGetValue(ccxIdx, out ccxThreads))
27 | {
28 | ccxThreads = new List();
29 | allCcxThreads.Add(ccxIdx, ccxThreads);
30 | }
31 |
32 | ccxThreads.Add(threadIdx);
33 | }
34 |
35 | monitoringConfigs = new MonitoringConfig[1];
36 | monitoringConfigs[0] = new HitRateLatencyConfig(this);
37 |
38 | ccxCounterData = new L3CounterData[ccxSampleThreads.Count()];
39 | ccxTotals = new L3CounterData();
40 | }
41 |
42 | public class L3CounterData
43 | {
44 | public float ctr0;
45 | public float ctr1;
46 | public float ctr2;
47 | public float ctr3;
48 | public float ctr4;
49 | public float ctr5;
50 | }
51 |
52 | public void ClearTotals()
53 | {
54 | ccxTotals.ctr0 = 0;
55 | ccxTotals.ctr1 = 0;
56 | ccxTotals.ctr2 = 0;
57 | ccxTotals.ctr3 = 0;
58 | ccxTotals.ctr4 = 0;
59 | ccxTotals.ctr5 = 0;
60 | }
61 |
62 | public void UpdateCcxL3CounterData(int ccxIdx, int threadIdx)
63 | {
64 | ThreadAffinity.Set(1UL << threadIdx);
65 | float normalizationFactor = GetNormalizationFactor(threadIdx);
66 | ulong ctr0 = ReadAndClearMsr(MSR_L3_PERF_CTR_0);
67 | ulong ctr1 = ReadAndClearMsr(MSR_L3_PERF_CTR_1);
68 | ulong ctr2 = ReadAndClearMsr(MSR_L3_PERF_CTR_2);
69 | ulong ctr3 = ReadAndClearMsr(MSR_L3_PERF_CTR_3);
70 | ulong ctr4 = ReadAndClearMsr(MSR_L3_PERF_CTR_4);
71 | ulong ctr5 = ReadAndClearMsr(MSR_L3_PERF_CTR_5);
72 |
73 | if (ccxCounterData[ccxIdx] == null) ccxCounterData[ccxIdx] = new L3CounterData();
74 | ccxCounterData[ccxIdx].ctr0 = ctr0 * normalizationFactor;
75 | ccxCounterData[ccxIdx].ctr1 = ctr1 * normalizationFactor;
76 | ccxCounterData[ccxIdx].ctr2 = ctr2 * normalizationFactor;
77 | ccxCounterData[ccxIdx].ctr3 = ctr3 * normalizationFactor;
78 | ccxCounterData[ccxIdx].ctr4 = ctr4 * normalizationFactor;
79 | ccxCounterData[ccxIdx].ctr5 = ctr5 * normalizationFactor;
80 | ccxTotals.ctr0 += ccxCounterData[ccxIdx].ctr0;
81 | ccxTotals.ctr1 += ccxCounterData[ccxIdx].ctr1;
82 | ccxTotals.ctr2 += ccxCounterData[ccxIdx].ctr2;
83 | ccxTotals.ctr3 += ccxCounterData[ccxIdx].ctr3;
84 | ccxTotals.ctr4 += ccxCounterData[ccxIdx].ctr4;
85 | ccxTotals.ctr5 += ccxCounterData[ccxIdx].ctr5;
86 | }
87 |
88 | public Tuple[] GetOverallL3CounterValues(ulong aperf, ulong mperf, ulong irperfcount, ulong tsc,
89 | string ctr0, string ctr1, string ctr2, string ctr3, string ctr4, string ctr5)
90 | {
91 | Tuple[] retval = new Tuple[10];
92 | retval[0] = new Tuple("APERF", aperf);
93 | retval[1] = new Tuple("MPERF", mperf);
94 | retval[2] = new Tuple("TSC", tsc);
95 | retval[3] = new Tuple("IRPerfCount", irperfcount);
96 | retval[4] = new Tuple(ctr0, ccxTotals.ctr0);
97 | retval[5] = new Tuple(ctr1, ccxTotals.ctr1);
98 | retval[6] = new Tuple(ctr2, ccxTotals.ctr2);
99 | retval[7] = new Tuple(ctr3, ccxTotals.ctr3);
100 | retval[8] = new Tuple(ctr4, ccxTotals.ctr4);
101 | retval[9] = new Tuple(ctr5, ccxTotals.ctr5);
102 | return retval;
103 | }
104 |
105 | public class HitRateLatencyConfig : MonitoringConfig
106 | {
107 | private ZenL3Cache l3Cache;
108 |
109 | public HitRateLatencyConfig(ZenL3Cache l3Cache)
110 | {
111 | this.l3Cache = l3Cache;
112 | }
113 |
114 | public string GetConfigName() { return "Hitrate and Miss Latency"; }
115 | public string[] GetColumns() { return columns; }
116 | public void Initialize()
117 | {
118 | ulong L3AccessPerfCtl = GetL3PerfCtlValue(0x01, 1 << 7, true, 0xF, 0xFF); // bit 7 = caching: L3 cache access
119 | ulong L3MissPerfCtl = GetL3PerfCtlValue(0x06, 0x01, true, 0xF, 0xFF); // bit 0 = requestmiss
120 | ulong L3MissLatencyCtl = GetL3PerfCtlValue(0x90, 0, true, 0xF, 0xFF);
121 | ulong L3MissSdpRequestPerfCtl = GetL3PerfCtlValue(0x9A, 0x1F, true, 0xF, 0xFF);
122 | ulong L3ReqLookupState = GetL3PerfCtlValue(0x04, 0xFF, true, 0xF, 0xFF);
123 | ulong L3ReqMiss = GetL3PerfCtlValue(0x04, 0x1, true, 0xF, 0xFF);
124 |
125 | foreach(KeyValuePair ccxThread in l3Cache.ccxSampleThreads)
126 | {
127 | ThreadAffinity.Set(1UL << ccxThread.Value);
128 | Ring0.WriteMsr(MSR_L3_PERF_CTL_0, L3AccessPerfCtl);
129 | Ring0.WriteMsr(MSR_L3_PERF_CTL_1, L3MissPerfCtl);
130 | Ring0.WriteMsr(MSR_L3_PERF_CTL_2, L3MissLatencyCtl);
131 | Ring0.WriteMsr(MSR_L3_PERF_CTL_3, L3MissSdpRequestPerfCtl);
132 | Ring0.WriteMsr(MSR_L3_PERF_CTL_4, L3ReqLookupState);
133 | Ring0.WriteMsr(MSR_L3_PERF_CTL_5, L3ReqMiss);
134 | }
135 | }
136 |
137 | public MonitoringUpdateResults Update()
138 | {
139 | MonitoringUpdateResults results = new MonitoringUpdateResults();
140 | results.unitMetrics = new string[l3Cache.ccxSampleThreads.Count()][];
141 | float[] ccxClocks = new float[l3Cache.allCcxThreads.Count()];
142 | l3Cache.ClearTotals();
143 | ulong totalAperf = 0, totalMperf = 0, totalTsc = 0, totalIrPerfCount = 0;
144 | foreach (KeyValuePair ccxThread in l3Cache.ccxSampleThreads)
145 | {
146 | // Try to determine frequency, by getting max frequency of cores in ccx
147 | foreach (int ccxThreadIdx in l3Cache.allCcxThreads[ccxThread.Key])
148 | {
149 | ThreadAffinity.Set(1UL << ccxThreadIdx);
150 | float normalizationFactor = l3Cache.GetNormalizationFactor(l3Cache.GetThreadCount() + ccxThreadIdx);
151 | ulong aperf, mperf, tsc, irperfcount;
152 | l3Cache.ReadFixedCounters(ccxThreadIdx, out aperf, out irperfcount, out tsc, out mperf);
153 | totalAperf += aperf;
154 | totalIrPerfCount += irperfcount;
155 | totalTsc += tsc;
156 | totalMperf += mperf;
157 | float clk = tsc * ((float)aperf / mperf) * normalizationFactor;
158 | if (clk > ccxClocks[ccxThread.Key]) ccxClocks[ccxThread.Key] = clk;
159 | if (ccxThreadIdx == ccxThread.Value)
160 | {
161 | l3Cache.UpdateCcxL3CounterData(ccxThread.Key, ccxThread.Value);
162 | results.unitMetrics[ccxThread.Key] = computeMetrics("CCX " + ccxThread.Key, l3Cache.ccxCounterData[ccxThread.Key], ccxClocks[ccxThread.Key]);
163 | }
164 | }
165 | }
166 |
167 | float avgClk = 0;
168 | foreach (float ccxClock in ccxClocks) avgClk += ccxClock;
169 | avgClk /= l3Cache.allCcxThreads.Count();
170 | results.overallMetrics = computeMetrics("Overall", l3Cache.ccxTotals, avgClk);
171 | results.overallCounterValues = l3Cache.GetOverallL3CounterValues(totalAperf, totalMperf, totalIrPerfCount, totalTsc,
172 | "L3Access", "L3Miss", "L3MissLat/16", "L3MissSdpReq", "Unused", "Unused");
173 | return results;
174 | }
175 |
176 | public string[] columns = new string[] { "Item", "Clk", "Hitrate", "Hit BW", "Mem Latency", "Mem Latency?", "Pend. Miss/C", "SDP Requests", "SDP Requests * 64B" };
177 |
178 | public string GetHelpText() { return ""; }
179 |
180 | private string[] computeMetrics(string label, L3CounterData counterData, float clk)
181 | {
182 | // event 0x90 counts "total cycles for all transactions divided by 16"
183 | float ccxL3MissLatency = (float)counterData.ctr2 * 16 / counterData.ctr3;
184 | float ccxL3Hitrate = (1 - (float)counterData.ctr1 / counterData.ctr0) * 100;
185 | float ccxL3HitBw = ((float)counterData.ctr0 - counterData.ctr1) * 64;
186 | return new string[] { label,
187 | FormatLargeNumber(clk),
188 | string.Format("{0:F2}%", ccxL3Hitrate),
189 | FormatLargeNumber(ccxL3HitBw) + "B/s",
190 | string.Format("{0:F1} clks", ccxL3MissLatency),
191 | string.Format("{0:F1} ns", (1000000000 / clk) * ccxL3MissLatency),
192 | string.Format("{0:F2}", counterData.ctr2 * 16 / clk),
193 | FormatLargeNumber(counterData.ctr3),
194 | FormatLargeNumber(counterData.ctr3 * 64) + "B/s"};
195 | }
196 | }
197 | }
198 | }
199 |
--------------------------------------------------------------------------------
/App.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/Cpu.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Threading;
3 | using System.Windows.Forms;
4 |
5 | namespace PmcReader
6 | {
7 | public interface MonitoringArea
8 | {
9 | MonitoringConfig[] GetMonitoringConfigs();
10 |
11 | string GetArchitectureName();
12 |
13 | ///
14 | /// Monitoring thread function, periodically populates listView with results
15 | ///
16 | void MonitoringThread(int configId, ListView listView, CancellationToken cancelToken);
17 |
18 | ///
19 | /// Get number of threads in CPU
20 | ///
21 | /// Number of threads
22 | int GetThreadCount();
23 |
24 | ///
25 | /// Start logging to file
26 | ///
27 | /// File path to log to
28 | /// If >= 0, only log for a specific core
29 | ///
30 | string StartLogToFile(string filePath, int targetCore);
31 | void StopLoggingToFile();
32 | }
33 |
34 | public interface MonitoringConfig
35 | {
36 | ///
37 | /// Display name for configuration
38 | ///
39 | ///
40 | string GetConfigName();
41 |
42 | ///
43 | /// Description of this config
44 | ///
45 | ///
46 | string GetHelpText();
47 |
48 | ///
49 | /// Get columns to display in listview
50 | ///
51 | ///
52 | string[] GetColumns();
53 |
54 | ///
55 | /// Program the appropriate counters
56 | ///
57 | void Initialize();
58 |
59 | ///
60 | /// Read counters, return metrics
61 | ///
62 | MonitoringUpdateResults Update();
63 | }
64 |
65 | ///
66 | /// Result metrics, collected after each update
67 | ///
68 | public class MonitoringUpdateResults
69 | {
70 | ///
71 | /// Aggregated metrics
72 | ///
73 | public string[] overallMetrics;
74 |
75 | ///
76 | /// List of per-unit metrics
77 | ///
78 | public string[][] unitMetrics;
79 |
80 | ///
81 | /// Counter values, for logging
82 | ///
83 | public Tuple[] overallCounterValues;
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/HaswellForm.resx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 | text/microsoft-resx
110 |
111 |
112 | 2.0
113 |
114 |
115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
116 |
117 |
118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
119 |
120 |
121 | 17, 17
122 |
123 |
--------------------------------------------------------------------------------
/Intel/AlderLakeL3.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 | using System.Collections.Generic;
4 | using System.Runtime.InteropServices.WindowsRuntime;
5 |
6 | namespace PmcReader.Intel
7 | {
8 | public class AlderLakeL3 : AlderLakeUncore
9 | {
10 | ///
11 | /// Number of L3 cache coherency boxes
12 | ///
13 | public int CboCount;
14 | public NormalizedCboCounterData[] cboData;
15 | public NormalizedCboCounterData cboTotals;
16 |
17 | public AlderLakeL3()
18 | {
19 | ulong cboConfig;
20 | architectureName = "Alder Lake Client L3";
21 |
22 | // intel developer manual table 2-30 says bits 0-3 encode number of C-Box
23 | // ADL no longer requires subtracting one from the reported C-Box count, unlike Haswell and Skylake
24 | Ring0.ReadMsr(MSR_UNC_CBO_CONFIG, out cboConfig);
25 | CboCount = (int)(cboConfig & 0xF);
26 | cboData = new NormalizedCboCounterData[CboCount];
27 |
28 | List monitoringConfigList = new List();
29 | monitoringConfigList.Add(new HitrateConfig(this));
30 | monitoringConfigs = monitoringConfigList.ToArray();
31 | }
32 |
33 | public class NormalizedCboCounterData
34 | {
35 | public float ctr0;
36 | public float ctr1;
37 | }
38 |
39 | public void InitializeCboTotals()
40 | {
41 | if (cboTotals == null)
42 | {
43 | cboTotals = new NormalizedCboCounterData();
44 | }
45 |
46 | cboTotals.ctr0 = 0;
47 | cboTotals.ctr1 = 0;
48 | }
49 |
50 | public void UpdateCboCounterData(uint cboIdx)
51 | {
52 | float normalizationFactor = GetNormalizationFactor((int)cboIdx);
53 | ulong ctr0 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR0_base + MSR_UNC_CBO_increment * cboIdx);
54 | ulong ctr1 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR1_base + MSR_UNC_CBO_increment * cboIdx);
55 |
56 | if (cboData[cboIdx] == null)
57 | {
58 | cboData[cboIdx] = new NormalizedCboCounterData();
59 | }
60 |
61 | cboData[cboIdx].ctr0 = ctr0 * normalizationFactor;
62 | cboData[cboIdx].ctr1 = ctr1 * normalizationFactor;
63 | cboTotals.ctr0 += cboData[cboIdx].ctr0;
64 | cboTotals.ctr1 += cboData[cboIdx].ctr1;
65 | }
66 |
67 | public Tuple[] GetOverallCounterValues(string ctr0, string ctr1)
68 | {
69 | Tuple[] retval = new Tuple[2];
70 | retval[0] = new Tuple(ctr0, cboTotals.ctr0);
71 | retval[1] = new Tuple(ctr1, cboTotals.ctr1);
72 | return retval;
73 | }
74 |
75 | public class HitrateConfig : MonitoringConfig
76 | {
77 | private AlderLakeL3 cpu;
78 | public string GetConfigName() { return "L3 Hitrate"; }
79 |
80 | public HitrateConfig(AlderLakeL3 intelCpu)
81 | {
82 | cpu = intelCpu;
83 | }
84 |
85 | public string[] GetColumns()
86 | {
87 | return columns;
88 | }
89 |
90 | public void Initialize()
91 | {
92 | cpu.EnableUncoreCounters();
93 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
94 | {
95 | // Reusing Skylake events since Intel has not documented uncore events for arches after that
96 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx,
97 | GetUncorePerfEvtSelRegisterValue(0x34, 0x8F, false, false, true, false, 0));
98 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx,
99 | GetUncorePerfEvtSelRegisterValue(0x34, 0x88, false, false, true, false, 0));
100 | }
101 | }
102 |
103 | public MonitoringUpdateResults Update()
104 | {
105 | MonitoringUpdateResults results = new MonitoringUpdateResults();
106 | results.unitMetrics = new string[cpu.CboCount][];
107 | cpu.InitializeCboTotals();
108 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
109 | {
110 | cpu.UpdateCboCounterData(cboIdx);
111 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]);
112 | }
113 |
114 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals);
115 | results.overallCounterValues = cpu.GetOverallCounterValues("L3 Lookups", "L3 Misses");
116 | return results;
117 | }
118 |
119 | public string[] columns = new string[] { "Item", "Hitrate", "Hit BW", "All Lookups", "I state" };
120 |
121 | public string GetHelpText() { return ""; }
122 |
123 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData)
124 | {
125 | return new string[] { label,
126 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)),
127 | FormatLargeNumber((counterData.ctr0 - counterData.ctr1) * 64) + "B/s",
128 | FormatLargeNumber(counterData.ctr0),
129 | FormatLargeNumber(counterData.ctr1)};
130 | }
131 | }
132 | }
133 | }
134 |
--------------------------------------------------------------------------------
/Intel/AlderLakeUncore.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 |
4 | namespace PmcReader.Intel
5 | {
6 | public class AlderLakeUncore : ModernIntelCpu
7 | {
8 | public const uint MSR_UNC_PERF_GLOBAL_CTRL = 0x2FF0;
9 | public const uint MSR_UNC_PERF_FIXED_CTRL = 0x2FDE;
10 | public const uint MSR_UNC_PERF_FIXED_CTR = 0x2FDF;
11 | public const uint MSR_UNC_CBO_CONFIG = 0x396;
12 | public const uint MSR_UNC_CBO_PERFEVTSEL0_base = 0x2000;
13 | public const uint MSR_UNC_CBO_PERFEVTSEL1_base = 0x2001;
14 | public const uint MSR_UNC_CBO_PERFCTR0_base = 0x2002;
15 | public const uint MSR_UNC_CBO_PERFCTR1_base = 0x2003;
16 | public const uint MSR_UNC_ARB_PERFCTR0 = 0x2FD2;
17 | public const uint MSR_UNC_ARB_PERFCTR1 = 0x2FD3;
18 | public const uint MSR_UNC_ARB_PERFEVTSEL0 = 0x2FD0;
19 | public const uint MSR_UNC_ARB_PERFEVTSEL1 = 0x2FD1;
20 | public const uint MSR_UNC_CBO_increment = 0x8;
21 |
22 | public AlderLakeUncore()
23 | {
24 | architectureName = "Alder Lake Client Uncore";
25 | }
26 |
27 | ///
28 | /// Enable uncore counters, wtih overflow propagation/freezing disabled
29 | ///
30 | public void EnableUncoreCounters()
31 | {
32 | // Bit 29 - globally enable all PMU counters.
33 | // local counters still have to be individually enabled
34 | // other bits have to do with PMI or are reserved
35 | ulong enableUncoreCountersValue = 1UL << 29;
36 | Ring0.WriteMsr(MSR_UNC_PERF_GLOBAL_CTRL, enableUncoreCountersValue);
37 |
38 | // Bit 22 - locally enable fixed counter
39 | ulong enableUncoreFixedCtrValue = 1UL << 22;
40 | Ring0.WriteMsr(MSR_UNC_PERF_FIXED_CTRL, enableUncoreFixedCtrValue);
41 | }
42 |
43 | ///
44 | /// Get value to put in PERFEVTSEL register, for uncore counters
45 | ///
46 | /// Perf event
47 | /// Perf event qualification (umask)
48 | /// Edge detect
49 | /// Enable overflow forwarding
50 | /// Enable counter
51 | /// Invert cmask
52 | /// Count mask
53 | /// value to put in perfevtsel register
54 | public static ulong GetUncorePerfEvtSelRegisterValue(byte perfEvent,
55 | byte umask,
56 | bool edge,
57 | bool ovf_en,
58 | bool enable,
59 | bool invert,
60 | byte cmask)
61 | {
62 | return perfEvent |
63 | (ulong)umask << 8 |
64 | (edge ? 1UL : 0UL) << 18 |
65 | (ovf_en ? 1UL : 0UL) << 20 |
66 | (enable ? 1UL : 0UL) << 22 |
67 | (invert ? 1UL : 0UL) << 23 |
68 | (ulong)(cmask & 0xF) << 24;
69 | }
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/Intel/HaswellClientArb.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 |
4 | namespace PmcReader.Intel
5 | {
6 | public class HaswellClientArb : HaswellClientUncore
7 | {
8 | private ulong lastUncoreClockCount;
9 |
10 | public HaswellClientArb()
11 | {
12 | architectureName = "Haswell Client System Agent";
13 | lastUncoreClockCount = 0;
14 | monitoringConfigs = new MonitoringConfig[2];
15 | monitoringConfigs[0] = new MCRequests(this);
16 | monitoringConfigs[1] = new CoherencyRequests(this);
17 | }
18 |
19 | public class NormalizedArbCounterData
20 | {
21 | public float uncoreClock;
22 | public float ctr0;
23 | public float ctr1;
24 | }
25 |
26 | public NormalizedArbCounterData UpdateArbCounterData(out ulong ctr0, out ulong ctr1)
27 | {
28 | NormalizedArbCounterData rc = new NormalizedArbCounterData();
29 | float normalizationFactor = GetNormalizationFactor(0);
30 | ulong uncoreClock, elapsedUncoreClocks;
31 | ctr0 = ReadAndClearMsr(MSR_UNC_ARB_PERFCTR0);
32 | ctr1 = ReadAndClearMsr(MSR_UNC_ARB_PERFCTR1);
33 | Ring0.ReadMsr(MSR_UNC_PERF_FIXED_CTR, out uncoreClock);
34 |
35 | // MSR_UNC_PERF_FIXED_CTR is 48 bits wide, upper bits are reserved
36 | uncoreClock &= 0xFFFFFFFFFFFF;
37 | elapsedUncoreClocks = uncoreClock;
38 | if (uncoreClock > lastUncoreClockCount)
39 | elapsedUncoreClocks = uncoreClock - lastUncoreClockCount;
40 | lastUncoreClockCount = uncoreClock;
41 |
42 | rc.ctr0 = ctr0 * normalizationFactor;
43 | rc.ctr1 = ctr1 * normalizationFactor;
44 | rc.uncoreClock = elapsedUncoreClocks * normalizationFactor;
45 | return rc;
46 | }
47 |
48 | public Tuple[] GetOverallCounterValues(NormalizedArbCounterData data, string ctr0, string ctr1)
49 | {
50 | Tuple[] retval = new Tuple[3];
51 | retval[0] = new Tuple("Uncore Clk", data.uncoreClock);
52 | retval[1] = new Tuple(ctr0, data.ctr0);
53 | retval[2] = new Tuple(ctr1, data.ctr1);
54 | return retval;
55 | }
56 |
57 | public class MCRequests : MonitoringConfig
58 | {
59 | private HaswellClientArb cpu;
60 | private ulong totalReqs;
61 | public string GetConfigName() { return "All MC Requests"; }
62 |
63 | public MCRequests(HaswellClientArb intelCpu)
64 | {
65 | cpu = intelCpu;
66 | this.totalReqs = 0;
67 | }
68 |
69 | public string[] GetColumns()
70 | {
71 | return columns;
72 | }
73 |
74 | public void Initialize()
75 | {
76 | cpu.EnableUncoreCounters();
77 | // 0x80 = increments by number of outstanding requests every cycle
78 | // counts for coherent and non-coherent requests initiated by cores, igpu, or L3
79 | // only works in counter 0
80 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL0,
81 | GetUncorePerfEvtSelRegisterValue(0x80, 1, false, false, true, false, 0));
82 |
83 | // 0x81 = number of requests
84 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL1,
85 | GetUncorePerfEvtSelRegisterValue(0x81, 1, false, false, true, false, 0));
86 |
87 | Ring0.WriteMsr(MSR_UNC_ARB_PERFCTR0, 0);
88 | Ring0.WriteMsr(MSR_UNC_ARB_PERFCTR1, 0);
89 | }
90 |
91 | public MonitoringUpdateResults Update()
92 | {
93 | MonitoringUpdateResults results = new MonitoringUpdateResults();
94 | results.unitMetrics = null;
95 | NormalizedArbCounterData counterData = cpu.UpdateArbCounterData(out ulong _, out ulong reqs);
96 | this.totalReqs += reqs;
97 |
98 | results.overallMetrics = new string[] { FormatLargeNumber(counterData.uncoreClock),
99 | FormatLargeNumber(counterData.ctr1),
100 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s",
101 | string.Format("{0:F2}", counterData.ctr0 / counterData.uncoreClock),
102 | string.Format("{0:F2} clk", counterData.ctr0 / counterData.ctr1),
103 | string.Format("{0:F2} ns", (1000000000 / counterData.uncoreClock) * (counterData.ctr0 / counterData.ctr1)),
104 | FormatLargeNumber(totalReqs * 64) + "B"
105 | };
106 | results.overallCounterValues = cpu.GetOverallCounterValues(counterData, "Pending Requests Per Cycle", "Requests");
107 | return results;
108 | }
109 |
110 | public string GetHelpText() { return ""; }
111 | public string[] columns = new string[] { "Clk", "Requests", "Req*64B", "Q Occupancy", "Req Latency", "Req Latency", "Total Req Data" };
112 | }
113 |
114 | public class CoherencyRequests : MonitoringConfig
115 | {
116 | private HaswellClientArb cpu;
117 | public string GetConfigName() { return "Coherency Tracker Requests"; }
118 |
119 | public CoherencyRequests(HaswellClientArb intelCpu)
120 | {
121 | cpu = intelCpu;
122 | }
123 |
124 | public string[] GetColumns()
125 | {
126 | return columns;
127 | }
128 |
129 | public void Initialize()
130 | {
131 | cpu.EnableUncoreCounters();
132 | // 0x83 = increments by number of outstanding requests every cycle in coherency tracker
133 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL0,
134 | GetUncorePerfEvtSelRegisterValue(0x83, 1, false, false, true, false, 0));
135 |
136 | // 0x84 = number of requests allocated in coherency tracker
137 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL1,
138 | GetUncorePerfEvtSelRegisterValue(0x84, 1, false, false, true, false, 0));
139 | }
140 |
141 | public MonitoringUpdateResults Update()
142 | {
143 | MonitoringUpdateResults results = new MonitoringUpdateResults();
144 | results.unitMetrics = null;
145 | NormalizedArbCounterData counterData = cpu.UpdateArbCounterData(out _, out _);
146 |
147 | results.overallMetrics = new string[] { FormatLargeNumber(counterData.uncoreClock),
148 | FormatLargeNumber(counterData.ctr1),
149 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s",
150 | string.Format("{0:F2}", counterData.ctr0 / counterData.uncoreClock),
151 | string.Format("{0:F2} clk", counterData.ctr0 / counterData.ctr1),
152 | string.Format("{0:F2} ns", (1000000000 / counterData.uncoreClock) * (counterData.ctr0 / counterData.ctr1))
153 | };
154 | return results;
155 | }
156 |
157 | public string GetHelpText() { return ""; }
158 | public string[] columns = new string[] { "Clk", "Requests", "Req*64B", "Q Occupancy", "Req Latency", "Req Latency" };
159 | }
160 | }
161 | }
162 |
--------------------------------------------------------------------------------
/Intel/HaswellClientL3.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 |
4 | namespace PmcReader.Intel
5 | {
6 | public class HaswellClientL3 : HaswellClientUncore
7 | {
8 | ///
9 | /// Number of L3 cache coherency boxes
10 | ///
11 | public int CboCount;
12 | public NormalizedCboCounterData[] cboData;
13 | public NormalizedCboCounterData cboTotals;
14 |
15 | public HaswellClientL3()
16 | {
17 | ulong cboConfig;
18 | architectureName = "Haswell Client L3";
19 |
20 | // intel developer manual table 2-30 syas bits 0-3 encode number of C-Box
21 | // "derive value by -1"
22 | Ring0.ReadMsr(MSR_UNC_CBO_CONFIG, out cboConfig);
23 | CboCount = (int)((cboConfig & 0x7) - 1);
24 | cboData = new NormalizedCboCounterData[CboCount];
25 |
26 | monitoringConfigs = new MonitoringConfig[3];
27 | monitoringConfigs[0] = new HitrateConfig(this);
28 | monitoringConfigs[1] = new SnoopHitConfig(this);
29 | monitoringConfigs[2] = new SnoopInvalidateConfig(this);
30 | }
31 |
32 | public class NormalizedCboCounterData
33 | {
34 | public float ctr0;
35 | public float ctr1;
36 | }
37 |
38 | public void InitializeCboTotals()
39 | {
40 | if (cboTotals == null)
41 | {
42 | cboTotals = new NormalizedCboCounterData();
43 | }
44 |
45 | cboTotals.ctr0 = 0;
46 | cboTotals.ctr1 = 0;
47 | }
48 |
49 | public void UpdateCboCounterData(uint cboIdx)
50 | {
51 | float normalizationFactor = GetNormalizationFactor((int)cboIdx);
52 | ulong ctr0 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR0_base + MSR_UNC_CBO_increment * cboIdx);
53 | ulong ctr1 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR1_base + MSR_UNC_CBO_increment * cboIdx);
54 |
55 | if (cboData[cboIdx] == null)
56 | {
57 | cboData[cboIdx] = new NormalizedCboCounterData();
58 | }
59 |
60 | cboData[cboIdx].ctr0 = ctr0 * normalizationFactor;
61 | cboData[cboIdx].ctr1 = ctr1 * normalizationFactor;
62 | cboTotals.ctr0 += cboData[cboIdx].ctr0;
63 | cboTotals.ctr1 += cboData[cboIdx].ctr1;
64 | }
65 |
66 | public Tuple[] GetCboOverallCounterValues(string ctr0, string ctr1)
67 | {
68 | Tuple[] retval = new Tuple[2];
69 | retval[0] = new Tuple(ctr0, cboTotals.ctr0);
70 | retval[1] = new Tuple(ctr1, cboTotals.ctr1);
71 | return retval;
72 | }
73 |
74 | public class HitrateConfig : MonitoringConfig
75 | {
76 | private HaswellClientL3 cpu;
77 | public string GetConfigName() { return "L3 Hitrate"; }
78 |
79 | public HitrateConfig(HaswellClientL3 intelCpu)
80 | {
81 | cpu = intelCpu;
82 | }
83 |
84 | public string[] GetColumns()
85 | {
86 | return columns;
87 | }
88 |
89 | public void Initialize()
90 | {
91 | cpu.EnableUncoreCounters();
92 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
93 | {
94 | // 0x34 = L3 lookups, 0xFF = all lookups
95 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx,
96 | GetUncorePerfEvtSelRegisterValue(0x34, 0xFF, false, false, true, false, 0));
97 |
98 | // 0x34 = L3 lookups, high 4 bits = cacheable read | cacheable write | external snoop | irq/ipq
99 | // low 4 bits = M | ES | I, so select I to count misses
100 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx,
101 | GetUncorePerfEvtSelRegisterValue(0x34, 0xF8, false, false, true, false, 0));
102 | }
103 | }
104 |
105 | public MonitoringUpdateResults Update()
106 | {
107 | MonitoringUpdateResults results = new MonitoringUpdateResults();
108 | results.unitMetrics = new string[cpu.CboCount][];
109 | cpu.InitializeCboTotals();
110 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
111 | {
112 | cpu.UpdateCboCounterData(cboIdx);
113 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]);
114 | }
115 |
116 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals);
117 | results.overallCounterValues = cpu.GetCboOverallCounterValues("L3 Lookups", "L3 Miss");
118 | return results;
119 | }
120 |
121 | public string[] columns = new string[] { "Item", "Hitrate", "Hit BW", "All Lookups", "L3 Miss" };
122 |
123 | public string GetHelpText() { return ""; }
124 |
125 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData)
126 | {
127 | return new string[] { label,
128 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)),
129 | FormatLargeNumber((counterData.ctr0 - counterData.ctr1) * 64) + "B/s",
130 | FormatLargeNumber(counterData.ctr0),
131 | FormatLargeNumber(counterData.ctr1)};
132 | }
133 | }
134 |
135 | public class SnoopInvalidateConfig : MonitoringConfig
136 | {
137 | private HaswellClientL3 cpu;
138 | public string GetConfigName() { return "Snoop Invalidations"; }
139 |
140 | public SnoopInvalidateConfig(HaswellClientL3 intelCpu)
141 | {
142 | cpu = intelCpu;
143 | }
144 |
145 | public string[] GetColumns()
146 | {
147 | return columns;
148 | }
149 |
150 | public void Initialize()
151 | {
152 | ThreadAffinity.Set(0x1);
153 | cpu.EnableUncoreCounters();
154 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
155 | {
156 | // 0x22 = Snoop response, 0xFF = all responses
157 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx,
158 | GetUncorePerfEvtSelRegisterValue(0x22, 0xFF, false, false, true, false, 0));
159 |
160 | // 0x22 = Snoop response, umask 0x2 = non-modified line invalidated, umask 0x10 = modified line invalidated
161 | // high 3 bits of umask = filter. 0x20 = external snoop, 0x40 = core memory request, 0x80 = L3 eviction
162 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx,
163 | GetUncorePerfEvtSelRegisterValue(0x22, 0x12 | 0x20 | 0x40 | 0x80, false, false, true, false, 0));
164 | }
165 | }
166 |
167 | public MonitoringUpdateResults Update()
168 | {
169 | MonitoringUpdateResults results = new MonitoringUpdateResults();
170 | results.unitMetrics = new string[cpu.CboCount][];
171 | cpu.InitializeCboTotals();
172 | ThreadAffinity.Set(0x1);
173 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
174 | {
175 | cpu.UpdateCboCounterData(cboIdx);
176 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]);
177 | }
178 |
179 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals);
180 | return results;
181 | }
182 |
183 | public string[] columns = new string[] { "Item", "Invalidate Resp %", "Invalidate BW", "All Snoop Responses", "Core Cache Lines Invalidated" };
184 |
185 | public string GetHelpText() { return ""; }
186 |
187 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData)
188 | {
189 | return new string[] { label,
190 | string.Format("{0:F2}%", 100 * (counterData.ctr1 / counterData.ctr0)),
191 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s",
192 | FormatLargeNumber(counterData.ctr0),
193 | FormatLargeNumber(counterData.ctr1)};
194 | }
195 | }
196 |
197 | public class SnoopHitConfig : MonitoringConfig
198 | {
199 | private HaswellClientL3 cpu;
200 | public string GetConfigName() { return "Snoop Hits"; }
201 |
202 | public SnoopHitConfig(HaswellClientL3 intelCpu)
203 | {
204 | cpu = intelCpu;
205 | }
206 |
207 | public string[] GetColumns()
208 | {
209 | return columns;
210 | }
211 |
212 | public void Initialize()
213 | {
214 | ThreadAffinity.Set(0x1);
215 | cpu.EnableUncoreCounters();
216 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
217 | {
218 | // 0x22 = Snoop response, 0xFF = all responses
219 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx,
220 | GetUncorePerfEvtSelRegisterValue(0x22, 0xFF, false, false, true, false, 0));
221 |
222 | // 0x22 = Snoop response, umask 0x4 = non-modified line hit, umask 0x8 = modified line hit
223 | // high 3 bits of umask = filter. 0x20 = external snoop, 0x40 = core memory request, 0x80 = L3 eviction
224 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx,
225 | GetUncorePerfEvtSelRegisterValue(0x22, 0x4 | 0x8 | 0x20 | 0x40 | 0x80, false, false, true, false, 0));
226 | }
227 | }
228 |
229 | public MonitoringUpdateResults Update()
230 | {
231 | MonitoringUpdateResults results = new MonitoringUpdateResults();
232 | results.unitMetrics = new string[cpu.CboCount][];
233 | cpu.InitializeCboTotals();
234 | ThreadAffinity.Set(0x1);
235 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
236 | {
237 | cpu.UpdateCboCounterData(cboIdx);
238 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]);
239 | }
240 |
241 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals);
242 | return results;
243 | }
244 |
245 | public string[] columns = new string[] { "Item", "Snoop Hitrate", "Snoop Hit BW", "All Snoop Responses", "Snoop Hits" };
246 |
247 | public string GetHelpText() { return ""; }
248 |
249 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData)
250 | {
251 | return new string[] { label,
252 | string.Format("{0:F2}%", 100 * (counterData.ctr1 / counterData.ctr0)),
253 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s",
254 | FormatLargeNumber(counterData.ctr0),
255 | FormatLargeNumber(counterData.ctr1)};
256 | }
257 | }
258 | }
259 | }
260 |
--------------------------------------------------------------------------------
/Intel/HaswellClientUncore.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 |
4 | namespace PmcReader.Intel
5 | {
6 | public class HaswellClientUncore : ModernIntelCpu
7 | {
8 | public const uint MSR_UNC_PERF_GLOBAL_CTRL = 0x391;
9 | public const uint MSR_UNC_PERF_FIXED_CTRL = 0x394;
10 | public const uint MSR_UNC_PERF_FIXED_CTR = 0x395;
11 | public const uint MSR_UNC_CBO_CONFIG = 0x396;
12 | public const uint MSR_UNC_CBO_PERFEVTSEL0_base = 0x700;
13 | public const uint MSR_UNC_CBO_PERFEVTSEL1_base = 0x701;
14 | public const uint MSR_UNC_CBO_PERFCTR0_base = 0x706;
15 | public const uint MSR_UNC_CBO_PERFCTR1_base = 0x707;
16 | public const uint MSR_UNC_ARB_PERFCTR0 = 0x3B0;
17 | public const uint MSR_UNC_ARB_PERFCTR1 = 0x3B1;
18 | public const uint MSR_UNC_ARB_PERFEVTSEL0 = 0x3B2;
19 | public const uint MSR_UNC_ARB_PERFEVTSEL1 = 0x3B3;
20 | public const uint MSR_UNC_CBO_increment = 0x10;
21 |
22 | public HaswellClientUncore()
23 | {
24 | architectureName = "Haswell Client Uncore";
25 | }
26 |
27 | ///
28 | /// Enable haswell uncore counters, wtih overflow propagation/freezing disabled
29 | ///
30 | public void EnableUncoreCounters()
31 | {
32 | ulong enableUncoreCountersValue = 1UL << 29;
33 | Ring0.WriteMsr(MSR_UNC_PERF_GLOBAL_CTRL, enableUncoreCountersValue);
34 | ulong enableUncoreFixedCtrValue = 1UL << 22;
35 | Ring0.WriteMsr(MSR_UNC_PERF_FIXED_CTRL, enableUncoreFixedCtrValue);
36 | }
37 |
38 | ///
39 | /// Get value to put in PERFEVTSEL register, for uncore counters
40 | ///
41 | /// Perf event
42 | /// Perf event qualification (umask)
43 | /// Edge detect
44 | /// Enable overflow forwarding
45 | /// Enable counter
46 | /// Invert cmask
47 | /// Count mask
48 | /// value to put in perfevtsel register
49 | public static ulong GetUncorePerfEvtSelRegisterValue(byte perfEvent,
50 | byte umask,
51 | bool edge,
52 | bool ovf_en,
53 | bool enable,
54 | bool invert,
55 | byte cmask)
56 | {
57 | return perfEvent |
58 | (ulong)umask << 8 |
59 | (edge ? 1UL : 0UL) << 18 |
60 | (ovf_en ? 1UL : 0UL) << 20 |
61 | (enable ? 1UL : 0UL) << 22 |
62 | (invert ? 1UL : 0UL) << 23 |
63 | (ulong)(cmask & 0xF) << 24;
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/Intel/MeteorLakeArb.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 | using System.Collections.Generic;
4 |
5 | namespace PmcReader.Intel
6 | {
7 | public class MeteorLakeArb : MeteorLakeUncore
8 | {
9 | private ulong lastSncuClk, lastCncuClk;
10 |
11 | public MeteorLakeArb()
12 | {
13 | architectureName = "Meteor Lake ARB";
14 | lastSncuClk = 0;
15 | List arbMonitoringConfigs = new List();
16 | arbMonitoringConfigs.Add(new FixedCounters(this));
17 | arbMonitoringConfigs.Add(new ArbCounters(this));
18 | monitoringConfigs = arbMonitoringConfigs.ToArray();
19 | }
20 |
21 | public class NormalizedArbCounterData
22 | {
23 | public float sncuUncoreClk;
24 |
25 | ///
26 | /// Documented as UCLK (UNC_CLOCK.SOCKET) cycles
27 | ///
28 | public float cncuUncoreClk;
29 | public float arbCtr0;
30 | public float arbCtr1;
31 | public float hacArbCtr0;
32 | public float hacArbCtr1;
33 | public float hacCboCtr0;
34 | public float hacCboCtr1;
35 | }
36 |
37 | public void InitializeFixedCounters()
38 | {
39 | ulong boxEnable = 1UL << 29;
40 | Ring0.WriteMsr(MTL_UNC_SNCU_BOX_CTRL, boxEnable);
41 | Ring0.WriteMsr(MTL_UNC_CNCU_BOX_CTRL, boxEnable);
42 |
43 | // 0xFF = clockticks, bit 22 = enable
44 | // cNCU = socket uncore clocks from Intel's description
45 | // reaches 3.3 GHz and likely corresponds to uncore clk on the CPU tile
46 | // sNCU could be socket uncore clock for the IO die.
47 | // reaches 2.4 GHz
48 | Ring0.WriteMsr(MTL_UNC_SNCU_FIXED_CTRL, 0xFF | (1UL << 22));
49 | Ring0.WriteMsr(MTL_UNC_CNCU_FIXED_CTRL, 0xFF | (1UL << 22));
50 | Ring0.WriteMsr(MTL_UNC_SNCU_FIXED_CTR, 0);
51 | Ring0.WriteMsr(MTL_UNC_CNCU_FIXED_CTR, 0);
52 | }
53 |
54 | public NormalizedArbCounterData UpdateArbCounterData()
55 | {
56 | NormalizedArbCounterData rc = new NormalizedArbCounterData();
57 | float normalizationFactor = GetNormalizationFactor(0);
58 | ulong sncuClk, cncuClk, elapsedSncuClk, elapsedCncuClk;
59 | ulong arbCtr0 = ReadAndClearMsr(MTL_UNC_ARB_CTR);
60 | ulong arbCtr1 = ReadAndClearMsr(MTL_UNC_ARB_CTR + 1);
61 | ulong hacArbCtr0 = ReadAndClearMsr(MTL_UNC_HAC_ARB_CTR);
62 | ulong hacArbCtr1 = ReadAndClearMsr(MTL_UNC_HAC_ARB_CTR + 1);
63 | ulong hacCboCtr0 = ReadAndClearMsr(MTL_UNC_HAC_CBO_CTR);
64 | ulong hacCboCtr1 = ReadAndClearMsr(MTL_UNC_HAC_CBO_CTR + 1);
65 |
66 | // Fixed counters
67 | Ring0.ReadMsr(MTL_UNC_SNCU_FIXED_CTR, out sncuClk);
68 | Ring0.ReadMsr(MTL_UNC_CNCU_FIXED_CTR, out cncuClk);
69 |
70 | // MSR_UNC_PERF_FIXED_CTR is 48 bits wide, upper bits are reserved
71 | sncuClk &= 0xFFFFFFFFFFFF;
72 | elapsedSncuClk = sncuClk;
73 | if (sncuClk > lastSncuClk)
74 | elapsedSncuClk = sncuClk - lastSncuClk;
75 | lastSncuClk = sncuClk;
76 |
77 | cncuClk &= 0xFFFFFFFFFFFF;
78 | elapsedCncuClk = cncuClk;
79 | if (cncuClk > lastCncuClk)
80 | elapsedCncuClk = cncuClk - lastCncuClk;
81 | lastCncuClk = cncuClk;
82 |
83 | rc.arbCtr0 = arbCtr0 * normalizationFactor;
84 | rc.arbCtr1 = arbCtr1 * normalizationFactor;
85 | rc.hacArbCtr0 = hacArbCtr0 * normalizationFactor;
86 | rc.hacArbCtr1 = hacArbCtr1 * normalizationFactor;
87 | rc.hacCboCtr0 = hacCboCtr0 * normalizationFactor;
88 | rc.hacCboCtr1 = hacCboCtr1 * normalizationFactor;
89 | rc.sncuUncoreClk = elapsedSncuClk * normalizationFactor;
90 | rc.cncuUncoreClk = elapsedCncuClk * normalizationFactor;
91 | return rc;
92 | }
93 |
94 | public Tuple[] GetOverallCounterValues(NormalizedArbCounterData data, string ctr0, string ctr1)
95 | {
96 | Tuple[] retval = new Tuple[3];
97 | retval[0] = new Tuple("sNCU Clk", data.sncuUncoreClk);
98 | retval[1] = new Tuple("cNCU Clk", data.cncuUncoreClk);
99 | retval[2] = new Tuple(ctr0, data.arbCtr0);
100 | retval[3] = new Tuple(ctr1, data.arbCtr1);
101 | return retval;
102 | }
103 |
104 | public class FixedCounters : MonitoringConfig
105 | {
106 | private MeteorLakeArb arb;
107 | public FixedCounters(MeteorLakeArb arb)
108 | {
109 | this.arb = arb;
110 | }
111 |
112 | public string[] columns = new string[] { "Item", "GHz" };
113 | public string[] GetColumns() { return columns; }
114 | public string GetConfigName() { return "Fixed Counters"; }
115 | public string GetHelpText() { return ""; }
116 |
117 | public void Initialize()
118 | {
119 | arb.InitializeFixedCounters();
120 |
121 | // HAC CBo ToR allocation, all requests
122 | Ring0.WriteMsr(MTL_UNC_HAC_CBO_CTRL, GetUncorePerfEvtSelRegisterValue(0x35, 8, false, false, true, false, 0));
123 | Ring0.WriteMsr(MTL_UNC_HAC_CBO_CTR, 0);
124 | }
125 |
126 | public MonitoringUpdateResults Update()
127 | {
128 | MonitoringUpdateResults results = new MonitoringUpdateResults();
129 | results.overallMetrics = new string[] { "N/A", "N/A" };
130 | NormalizedArbCounterData normalizedArbCounterData = arb.UpdateArbCounterData();
131 | results.unitMetrics = new string[2][];
132 | results.unitMetrics[0] = new string[] { "sNCU", FormatLargeNumber(normalizedArbCounterData.sncuUncoreClk) + "Hz" };
133 | results.unitMetrics[1] = new string[] { "cNCU", FormatLargeNumber(normalizedArbCounterData.cncuUncoreClk) + "Hz" };
134 | return results;
135 | }
136 | }
137 |
138 | public class ArbCounters : MonitoringConfig
139 | {
140 | private MeteorLakeArb arb;
141 | public ArbCounters(MeteorLakeArb arb)
142 | {
143 | this.arb = arb;
144 | }
145 |
146 | public string[] columns = new string[] { "Item", "Metric", "Occupancy", "Latency" };
147 | public string[] GetColumns() { return columns; }
148 | public string GetConfigName() { return "Arb"; }
149 | public string GetHelpText() { return ""; }
150 |
151 | public void Initialize()
152 | {
153 | arb.InitializeFixedCounters();
154 |
155 | // HAC CBo ToR allocation, all requests
156 | Ring0.WriteMsr(MTL_UNC_HAC_CBO_CTRL, GetUncorePerfEvtSelRegisterValue(0x35, 8, false, false, true, false, 0));
157 | Ring0.WriteMsr(MTL_UNC_HAC_CBO_CTR, 0);
158 |
159 | // HAC ARB, all requests
160 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTRL, GetUncorePerfEvtSelRegisterValue(0x81, 1, false, false, true, false, 0));
161 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTR, 0);
162 |
163 | // HAC ARB, CMI transactions
164 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTRL + 1, GetUncorePerfEvtSelRegisterValue(0x8A, 1, false, false, true, false, 0));
165 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTR, 0);
166 | Ring0.WriteMsr(MTL_UNC_HAC_ARB_CTR + 1, 0);
167 |
168 | // ARB Occupancy. 2 = data read, 0 = all (in the past, not documented)
169 | // 0x85 = occupancy. Uses cNCU clock
170 | // ok 0x81 doesn't work, how about 0x8A
171 | // 0x86 is almost right? seems to count in 32B increments and doesn't count GPU BW
172 | //Ring0.WriteMsr(MTL_UNC_ARB_CTRL, GetUncorePerfEvtSelRegisterValue(0x85, 0, false, false, true, false, 0));
173 | Ring0.WriteMsr(MTL_UNC_ARB_CTRL, GetUncorePerfEvtSelRegisterValue(0x85, 0, false, false, true, false, 20));
174 | Ring0.WriteMsr(MTL_UNC_ARB_CTR, 0);
175 | //Ring0.WriteMsr(MTL_UNC_ARB_CTR + 1, 0);
176 | }
177 |
178 | public MonitoringUpdateResults Update()
179 | {
180 | MonitoringUpdateResults results = new MonitoringUpdateResults();
181 | NormalizedArbCounterData normalizedArbCounterData = arb.UpdateArbCounterData();
182 | float arbReqs = normalizedArbCounterData.arbCtr0;
183 | // float arbOcc = normalizedArbCounterData.arbCtr0;
184 | results.unitMetrics = new string[][] {
185 | new string[] { "HAC CBo", FormatLargeNumber(normalizedArbCounterData.hacCboCtr0 * 64) + "B/s", "-", "-"},
186 | new string[] { "HAC ARB (All Reqs)", FormatLargeNumber(normalizedArbCounterData.hacArbCtr0 * 64) + "B/s", "-", "-"},
187 | new string[] { "HAC ARB (CMI Transactions)", FormatLargeNumber(normalizedArbCounterData.hacArbCtr1 * 64) + "B/s", "-", "-"},
188 |
189 | // which clock?
190 | new string[] { "ARB", FormatLargeNumber(arbReqs) + ">20", "-", "-"},
191 | new string[] { "sNCU", FormatLargeNumber(normalizedArbCounterData.sncuUncoreClk) + "Hz", "-", "-" },
192 | new string[] { "cNCU", FormatLargeNumber(normalizedArbCounterData.cncuUncoreClk) + "Hz", "-", "-" },
193 | };
194 |
195 | results.overallMetrics = new string[] { "N/A", "N/A" };
196 | return results;
197 | }
198 | }
199 | }
200 | }
201 |
--------------------------------------------------------------------------------
/Intel/MeteorLakeL3.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 | using System.Collections.Generic;
4 |
5 | namespace PmcReader.Intel
6 | {
7 | public class MeteorLakeL3 : MeteorLakeUncore
8 | {
9 | ///
10 | /// Number of L3 cache coherency boxes
11 | ///
12 | public int CboCount;
13 | public NormalizedCboCounterData[] cboData;
14 | public NormalizedCboCounterData cboTotals;
15 |
16 | public MeteorLakeL3()
17 | {
18 | ulong cboConfig;
19 | architectureName = "Meteor Lake Client L3";
20 |
21 | // Verbatim from Linux perf code
22 | Ring0.ReadMsr(MTL_UNC_CBO_CONFIG, out cboConfig);
23 | CboCount = (int)(cboConfig & MTL_UNC_NUM_CBO_MASK);
24 | cboData = new NormalizedCboCounterData[CboCount];
25 |
26 | List monitoringConfigList = new List();
27 | monitoringConfigList.Add(new HitrateConfig(this));
28 | monitoringConfigs = monitoringConfigList.ToArray();
29 | }
30 |
31 | public class NormalizedCboCounterData
32 | {
33 | public float ctr0;
34 | public float ctr1;
35 | }
36 |
37 | public void InitializeCboTotals()
38 | {
39 | if (cboTotals == null)
40 | {
41 | cboTotals = new NormalizedCboCounterData();
42 | }
43 |
44 | cboTotals.ctr0 = 0;
45 | cboTotals.ctr1 = 0;
46 | }
47 |
48 | public void UpdateCboCounterData(uint cboIdx)
49 | {
50 | float normalizationFactor = GetNormalizationFactor((int)cboIdx);
51 | ulong ctr0 = ReadAndClearMsr(MTL_UNC_CBO_CTR + MTL_UNC_INCREMENT * cboIdx);
52 | ulong ctr1 = ReadAndClearMsr(MTL_UNC_CBO_CTR + MTL_UNC_INCREMENT * cboIdx + 1);
53 |
54 | if (cboData[cboIdx] == null)
55 | {
56 | cboData[cboIdx] = new NormalizedCboCounterData();
57 | }
58 |
59 | cboData[cboIdx].ctr0 = ctr0 * normalizationFactor;
60 | cboData[cboIdx].ctr1 = ctr1 * normalizationFactor;
61 | cboTotals.ctr0 += cboData[cboIdx].ctr0;
62 | cboTotals.ctr1 += cboData[cboIdx].ctr1;
63 | }
64 |
65 | public Tuple[] GetOverallCounterValues(string ctr0, string ctr1)
66 | {
67 | Tuple[] retval = new Tuple[2];
68 | retval[0] = new Tuple(ctr0, cboTotals.ctr0);
69 | retval[1] = new Tuple(ctr1, cboTotals.ctr1);
70 | return retval;
71 | }
72 |
73 | public class HitrateConfig : MonitoringConfig
74 | {
75 | private MeteorLakeL3 cpu;
76 | public string GetConfigName() { return "L3 Hitrate"; }
77 |
78 | public HitrateConfig(MeteorLakeL3 intelCpu)
79 | {
80 | cpu = intelCpu;
81 | }
82 |
83 | public string[] GetColumns()
84 | {
85 | return columns;
86 | }
87 |
88 | public void Initialize()
89 | {
90 | cpu.EnableUncoreCounters();
91 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
92 | {
93 | // Reusing Skylake events since Intel has not documented uncore events for arches after that
94 | Ring0.WriteMsr(MTL_UNC_CBO_CTRL + MTL_UNC_INCREMENT * cboIdx,
95 | GetUncorePerfEvtSelRegisterValue(0x34, 0x8F, false, false, true, false, 0));
96 | Ring0.WriteMsr(MTL_UNC_CBO_CTRL + MTL_UNC_INCREMENT * cboIdx + 1,
97 | GetUncorePerfEvtSelRegisterValue(0x34, 0x88, false, false, true, false, 0));
98 | }
99 | }
100 |
101 | public MonitoringUpdateResults Update()
102 | {
103 | MonitoringUpdateResults results = new MonitoringUpdateResults();
104 | results.unitMetrics = new string[cpu.CboCount][];
105 | cpu.InitializeCboTotals();
106 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
107 | {
108 | cpu.UpdateCboCounterData(cboIdx);
109 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]);
110 | }
111 |
112 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals);
113 | results.overallCounterValues = cpu.GetOverallCounterValues("L3 Lookups", "L3 Misses");
114 | return results;
115 | }
116 |
117 | public string[] columns = new string[] { "Item", "Hitrate", "Hit BW", "All Lookups", "I state" };
118 |
119 | public string GetHelpText() { return ""; }
120 |
121 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData)
122 | {
123 | return new string[] { label,
124 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)),
125 | FormatLargeNumber((counterData.ctr0 - counterData.ctr1) * 64) + "B/s",
126 | FormatLargeNumber(counterData.ctr0),
127 | FormatLargeNumber(counterData.ctr1)};
128 | }
129 | }
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/Intel/MeteorLakeUncore.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 |
4 | namespace PmcReader.Intel
5 | {
6 | // Really just a container for MSR numbers
7 | public class MeteorLakeUncore : ModernIntelCpu
8 | { // applies to cbox, arb, hac_arb, hac_cbox (hbo)
9 | public const uint MTL_UNC_INCREMENT = 0x10;
10 |
11 | // same as Ice Lake
12 | public const uint MTL_UNC_CBO_CONFIG = 0x396;
13 | public const ulong MTL_UNC_NUM_CBO_MASK = 0xF;
14 |
15 | // SNCU and CNCU provide fixed counters for clock ticks
16 | public const uint MTL_UNC_SNCU_FIXED_CTRL = 0x2002;
17 | public const uint MTL_UNC_SNCU_FIXED_CTR = 0x2008;
18 | public const uint MTL_UNC_SNCU_BOX_CTRL = 0x200e;
19 | public const uint MTL_UNC_CNCU_FIXED_CTRL = 0x2402;
20 | public const uint MTL_UNC_CNCU_FIXED_CTR = 0x2408;
21 | public const uint MTL_UNC_CNCU_BOX_CTRL = 0x240e;
22 |
23 | // System agent's arbitration queue?
24 | public const uint MTL_UNC_ARB_CTRL = 0x2412;
25 | public const uint MTL_UNC_ARB_CTR = 0x2418;
26 |
27 | // Home agent's arbitration queue? Compute tile -> SoC tile
28 | public const uint MTL_UNC_HAC_ARB_CTRL = 0x2012;
29 | public const uint MTL_UNC_HAC_ARB_CTR = 0x2018;
30 |
31 | // Home agent cbox? 2 counters
32 | public const uint MTL_UNC_HAC_CBO_CTRL = 0x2042;
33 | public const uint MTL_UNC_HAC_CBO_CTR = 0x2048;
34 |
35 | // L3 cboxes. 2x 48-bit ctrs per cbo
36 | public const uint MTL_UNC_CBO_CTRL = 0x2442;
37 | public const uint MTL_UNC_CBO_CTR = 0x2448;
38 |
39 | public MeteorLakeUncore()
40 | {
41 | architectureName = "Meteor Lake Uncore";
42 | }
43 |
44 | ///
45 | /// Enable uncore counters, wtih overflow propagation/freezing disabled
46 | ///
47 | public void EnableUncoreCounters()
48 | {
49 | // MTL doesn't appear to have global uncore enable registers.
50 | // Setting enable bit on sNCU/cNCU fixed counter ctrl regs is enough to enable counting
51 | }
52 |
53 | ///
54 | /// Get value to put in PERFEVTSEL register, for uncore counters
55 | ///
56 | /// Perf event
57 | /// Perf event qualification (umask)
58 | /// Edge detect
59 | /// Enable overflow forwarding
60 | /// Enable counter
61 | /// Invert cmask
62 | /// Count mask
63 | /// value to put in perfevtsel register
64 | public static ulong GetUncorePerfEvtSelRegisterValue(byte perfEvent,
65 | byte umask,
66 | bool edge,
67 | bool ovf_en,
68 | bool enable,
69 | bool invert,
70 | byte cmask)
71 | {
72 | return perfEvent |
73 | (ulong)umask << 8 |
74 | (edge ? 1UL : 0UL) << 18 |
75 | (ovf_en ? 1UL : 0UL) << 20 |
76 | (enable ? 1UL : 0UL) << 22 |
77 | (invert ? 1UL : 0UL) << 23 |
78 | (ulong)(cmask & 0xF) << 24;
79 | }
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/Intel/SkylakeClientArb.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 |
4 | namespace PmcReader.Intel
5 | {
6 | public class SkylakeClientArb : SkylakeClientUncore
7 | {
8 | private ulong lastUncoreClockCount;
9 |
10 | public SkylakeClientArb()
11 | {
12 | architectureName = "Skylake Client System Agent";
13 | lastUncoreClockCount = 0;
14 | monitoringConfigs = new MonitoringConfig[3];
15 | monitoringConfigs[0] = new MCRequests(this, "All MC Requests", 0x1);
16 | monitoringConfigs[1] = new MCRequests(this, "MC: Core Data Read", 0x2);
17 | monitoringConfigs[2] = new MCRequests(this, "MC: Write", 0x20);
18 | }
19 |
20 | public class NormalizedArbCounterData
21 | {
22 | public float uncoreClock;
23 | public float ctr0;
24 | public float ctr1;
25 | }
26 |
27 | public NormalizedArbCounterData UpdateArbCounterData(out ulong ctr0, out ulong ctr1)
28 | {
29 | NormalizedArbCounterData rc = new NormalizedArbCounterData();
30 | float normalizationFactor = GetNormalizationFactor(0);
31 | ulong uncoreClock, elapsedUncoreClocks;
32 | ctr0 = ReadAndClearMsr(MSR_UNC_ARB_PERFCTR0);
33 | ctr1 = ReadAndClearMsr(MSR_UNC_ARB_PERFCTR1);
34 | Ring0.ReadMsr(MSR_UNC_PERF_FIXED_CTR, out uncoreClock);
35 |
36 | // MSR_UNC_PERF_FIXED_CTR is 48 bits wide, upper bits are reserved
37 | uncoreClock &= 0xFFFFFFFFFFFF;
38 | elapsedUncoreClocks = uncoreClock;
39 | if (uncoreClock > lastUncoreClockCount)
40 | elapsedUncoreClocks = uncoreClock - lastUncoreClockCount;
41 | lastUncoreClockCount = uncoreClock;
42 |
43 | rc.ctr0 = ctr0 * normalizationFactor;
44 | rc.ctr1 = ctr1 * normalizationFactor;
45 | rc.uncoreClock = elapsedUncoreClocks * normalizationFactor;
46 | return rc;
47 | }
48 |
49 | public Tuple[] GetOverallCounterValuesFromArbData(NormalizedArbCounterData data, string ctr0, string ctr1)
50 | {
51 | Tuple[] retval = new Tuple[3];
52 | retval[0] = new Tuple("Uncore Clk", data.uncoreClock);
53 | retval[1] = new Tuple(ctr0, data.ctr0);
54 | retval[2] = new Tuple(ctr1, data.ctr1);
55 | return retval;
56 | }
57 |
58 | public class MCRequests : MonitoringConfig
59 | {
60 | private SkylakeClientArb cpu;
61 | private byte umask;
62 | private string configName;
63 | private ulong totalReqs;
64 | public string GetConfigName() { return configName; }
65 |
66 | public MCRequests(SkylakeClientArb intelCpu, string configName, byte umask)
67 | {
68 | cpu = intelCpu;
69 | this.configName = configName;
70 | this.umask = umask;
71 | this.totalReqs = 0;
72 | }
73 |
74 | public string[] GetColumns()
75 | {
76 | return columns;
77 | }
78 |
79 | public void Initialize()
80 | {
81 | cpu.EnableUncoreCounters();
82 | // 0x80 = increments by number of outstanding requests every cycle
83 | // counts for coherent and non-coherent requests initiated by cores, igpu, or L3
84 | // only works in counter 0
85 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL0,
86 | GetUncorePerfEvtSelRegisterValue(0x80, umask, false, false, true, false, 0));
87 |
88 | // 0x81 = number of requests
89 | Ring0.WriteMsr(MSR_UNC_ARB_PERFEVTSEL1,
90 | GetUncorePerfEvtSelRegisterValue(0x81, umask, false, false, true, false, 0));
91 | }
92 |
93 | public MonitoringUpdateResults Update()
94 | {
95 | MonitoringUpdateResults results = new MonitoringUpdateResults();
96 | results.unitMetrics = null;
97 | NormalizedArbCounterData counterData = cpu.UpdateArbCounterData(out _, out ulong reqs);
98 | this.totalReqs += reqs;
99 |
100 | results.overallCounterValues = cpu.GetOverallCounterValuesFromArbData(counterData, "Arb Queue Occupancy", "Reqs");
101 | results.overallMetrics = new string[] { FormatLargeNumber(counterData.uncoreClock),
102 | FormatLargeNumber(counterData.ctr1),
103 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s",
104 | string.Format("{0:F2}", counterData.ctr0 / counterData.uncoreClock),
105 | string.Format("{0:F2} clk", counterData.ctr0 / counterData.ctr1),
106 | string.Format("{0:F2} ns", (1000000000 / counterData.uncoreClock) * (counterData.ctr0 / counterData.ctr1)),
107 | FormatLargeNumber(totalReqs * 64) + "B"
108 | };
109 | return results;
110 | }
111 |
112 | public string GetHelpText() { return ""; }
113 | public string[] columns = new string[] { "Clk", "Requests", "Req BW", "Q Len", "Req Latency", "Req Latency", "Total Req Data" };
114 | }
115 |
116 | public ulong GetImcCounterDelta(ulong addressOffset, ref ulong lastValue)
117 | {
118 | uint value = 0;
119 | ulong rc = 0;
120 | if (!Ring0.ReadMemory(barAddress, ref value))
121 | {
122 | return rc;
123 | }
124 |
125 | if (value < lastValue)
126 | {
127 | rc = value + (0xFFFFFFFF - lastValue);
128 | }
129 | else
130 | {
131 | rc = value - lastValue;
132 | }
133 |
134 | lastValue = value;
135 | return rc;
136 | }
137 |
138 | public class MemoryBandwidth : MonitoringConfig
139 | {
140 | private SkylakeClientArb cpu;
141 | public string GetConfigName() { return "Memory Bandwidth"; }
142 |
143 | private ulong lastDataReads;
144 | private ulong lastDataWrites;
145 |
146 | public MemoryBandwidth(SkylakeClientArb intelCpu)
147 | {
148 | cpu = intelCpu;
149 | }
150 |
151 | public string[] GetColumns()
152 | {
153 | return columns;
154 | }
155 |
156 | public void Initialize()
157 | {
158 | lastDataReads = 0;
159 | lastDataWrites = 0;
160 | }
161 |
162 | public MonitoringUpdateResults Update()
163 | {
164 | ulong reads = cpu.GetImcCounterDelta(0x5050, ref lastDataReads);
165 | ulong writes = cpu.GetImcCounterDelta(0x5054, ref lastDataWrites);
166 | MonitoringUpdateResults results = new MonitoringUpdateResults();
167 | results.unitMetrics = null;
168 |
169 |
170 | results.overallMetrics = new string[] { "Total", FormatLargeNumber(64*(reads + writes)) + "B/s" };
171 | results.unitMetrics = new string[2][];
172 | results.unitMetrics[0] = new string[] { "Read", FormatLargeNumber(64 * reads) + "B/s" };
173 | results.unitMetrics[1] = new string[] { "Write", FormatLargeNumber(64 * writes) + "B/s" };
174 | return results;
175 | }
176 |
177 | public string GetHelpText() { return ""; }
178 | public string[] columns = new string[] { "Clk", "Requests", "Requests * 64B", "Q Occupancy", "Req Latency", "Req Latency" };
179 | }
180 | }
181 | }
182 |
--------------------------------------------------------------------------------
/Intel/SkylakeClientL3.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 | using System.Collections.Generic;
4 | using System.Runtime.InteropServices.WindowsRuntime;
5 |
6 | namespace PmcReader.Intel
7 | {
8 | public class SkylakeClientL3 : SkylakeClientUncore
9 | {
10 | ///
11 | /// Number of L3 cache coherency boxes
12 | ///
13 | public int CboCount;
14 | public NormalizedCboCounterData[] cboData;
15 | public NormalizedCboCounterData cboTotals;
16 |
17 | public SkylakeClientL3()
18 | {
19 | ulong cboConfig;
20 | architectureName = "Skylake Client L3";
21 |
22 | // intel developer manual table 2-30 says bits 0-3 encode number of C-Box
23 | // "subtract one to determine number of CBo units"
24 | Ring0.ReadMsr(MSR_UNC_CBO_CONFIG, out cboConfig);
25 | if ((cboConfig & 0xF) == 10) CboCount = 10;
26 | else CboCount = (int)((cboConfig & 0xF) - 1); // but not for the 109000k?
27 | cboData = new NormalizedCboCounterData[CboCount];
28 |
29 | List monitoringConfigList = new List();
30 | monitoringConfigList.Add(new HitrateConfig(this));
31 | monitoringConfigList.Add(new SnoopHitConfig(this));
32 | monitoringConfigList.Add(new HitsCategoryConfig(this, "Data?", 0x80 | 0b10));
33 | monitoringConfigList.Add(new HitsCategoryConfig(this, "Code?", 0x80 | 0b100));
34 | monitoringConfigList.Add(new HitsCategoryConfig(this, "Modified", 0x80 | 0b1));
35 | monitoringConfigs = monitoringConfigList.ToArray();
36 | }
37 |
38 | public class NormalizedCboCounterData
39 | {
40 | public float ctr0;
41 | public float ctr1;
42 | public ulong ctr0Total;
43 | public ulong ctr1Total;
44 | }
45 |
46 | public void InitializeCboTotals()
47 | {
48 | if (cboTotals == null)
49 | {
50 | cboTotals = new NormalizedCboCounterData();
51 | }
52 |
53 | cboTotals.ctr0 = 0;
54 | cboTotals.ctr1 = 0;
55 | }
56 |
57 | public void UpdateCboCounterData(uint cboIdx)
58 | {
59 | float normalizationFactor = GetNormalizationFactor((int)cboIdx);
60 | ulong ctr0 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR0_base + MSR_UNC_CBO_increment * cboIdx);
61 | ulong ctr1 = ReadAndClearMsr(MSR_UNC_CBO_PERFCTR1_base + MSR_UNC_CBO_increment * cboIdx);
62 |
63 | if (cboData[cboIdx] == null)
64 | {
65 | cboData[cboIdx] = new NormalizedCboCounterData();
66 | }
67 |
68 | cboData[cboIdx].ctr0 = ctr0 * normalizationFactor;
69 | cboData[cboIdx].ctr1 = ctr1 * normalizationFactor;
70 | cboData[cboIdx].ctr0Total += ctr0;
71 | cboData[cboIdx].ctr1Total += ctr1;
72 | cboTotals.ctr0 += cboData[cboIdx].ctr0;
73 | cboTotals.ctr1 += cboData[cboIdx].ctr1;
74 | cboTotals.ctr0Total += ctr0;
75 | cboTotals.ctr1Total += ctr1;
76 | }
77 |
78 | public Tuple[] GetOverallCounterValues(string ctr0, string ctr1)
79 | {
80 | Tuple[] retval = new Tuple[2];
81 | retval[0] = new Tuple(ctr0, cboTotals.ctr0);
82 | retval[1] = new Tuple(ctr1, cboTotals.ctr1);
83 | return retval;
84 | }
85 |
86 | public class HitrateConfig : MonitoringConfig
87 | {
88 | private SkylakeClientL3 cpu;
89 | public string GetConfigName() { return "L3 Hitrate"; }
90 |
91 | public HitrateConfig(SkylakeClientL3 intelCpu)
92 | {
93 | cpu = intelCpu;
94 | }
95 |
96 | public string[] GetColumns()
97 | {
98 | return columns;
99 | }
100 |
101 | public void Initialize()
102 | {
103 | cpu.EnableUncoreCounters();
104 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
105 | {
106 | // Event 0x34 = uncore cbo cache lookup
107 | // Bit 0 = Modified state
108 | // Bit 1, 2 = Exclusive, Shared states
109 | // Bit 3 = Invalid state (miss)
110 | // Bit 4 = Read
111 | // Bit 5 = Write
112 | // Bit 6 = ???
113 | // Bit 7 = Any
114 | // 0x34 = L3 lookups, 0xFF = all lookups
115 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx,
116 | GetUncorePerfEvtSelRegisterValue(0x34, 0x8F, false, false, true, false, 0));
117 |
118 | // 0x34 = L3 lookups, high 4 bits = cacheable read | cacheable write | external snoop | irq/ipq
119 | // low 4 bits = M | ES | I, so select I to count misses
120 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx,
121 | GetUncorePerfEvtSelRegisterValue(0x34, 0x88, false, false, true, false, 0));
122 | }
123 | }
124 |
125 | public MonitoringUpdateResults Update()
126 | {
127 | MonitoringUpdateResults results = new MonitoringUpdateResults();
128 | results.unitMetrics = new string[cpu.CboCount][];
129 | cpu.InitializeCboTotals();
130 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
131 | {
132 | cpu.UpdateCboCounterData(cboIdx);
133 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]);
134 | }
135 |
136 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals);
137 | results.overallCounterValues = cpu.GetOverallCounterValues("L3 Lookups", "L3 Misses");
138 | return results;
139 | }
140 |
141 | public string[] columns = new string[] { "Item", "Hitrate", "Hit BW", "All Lookups", "I state", "Total Hit Data" };
142 |
143 | public string GetHelpText() { return ""; }
144 |
145 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData)
146 | {
147 | return new string[] { label,
148 | string.Format("{0:F2}%", 100 * (1 - counterData.ctr1 / counterData.ctr0)),
149 | FormatLargeNumber((counterData.ctr0 - counterData.ctr1) * 64) + "B/s",
150 | FormatLargeNumber(counterData.ctr0),
151 | FormatLargeNumber(counterData.ctr1),
152 | FormatLargeNumber((counterData.ctr0Total - counterData.ctr1Total) * 64) + "B"};
153 | }
154 | }
155 |
156 | public class HitsCategoryConfig : MonitoringConfig
157 | {
158 | private SkylakeClientL3 cpu;
159 | private string category;
160 | private byte umask;
161 | public string GetConfigName() { return "L3 Hits, " + category; }
162 |
163 | public HitsCategoryConfig(SkylakeClientL3 intelCpu, string category, byte umask)
164 | {
165 | this.cpu = intelCpu;
166 | this.category = category;
167 | this.umask = umask;
168 | this.columns = new string[] { "Item", "Hit BW", category + " Hit BW", "% " + category + " hits" };
169 | }
170 |
171 | public string[] GetColumns()
172 | {
173 | return columns;
174 | }
175 |
176 | public void Initialize()
177 | {
178 | cpu.EnableUncoreCounters();
179 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
180 | {
181 | // Event 0x34 = uncore cbo cache lookup
182 | // Bit 0 = Modified state
183 | // Bit 1, 2 = Exclusive, Shared states
184 | // Bit 3 = Invalid state (miss)
185 | // Bit 4 = Read
186 | // Bit 5 = Write
187 | // Bit 6 = ???
188 | // Bit 7 = Any
189 | // 0x34 = L3 lookups, 0xFF = all lookups
190 |
191 | // L3 hits
192 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx,
193 | GetUncorePerfEvtSelRegisterValue(0x34, 0x80 | 0b111, false, false, true, false, 0));
194 |
195 | // Bit one (E or S?)
196 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx,
197 | GetUncorePerfEvtSelRegisterValue(0x34, umask, false, false, true, false, 0));
198 | }
199 | }
200 |
201 | public MonitoringUpdateResults Update()
202 | {
203 | MonitoringUpdateResults results = new MonitoringUpdateResults();
204 | results.unitMetrics = new string[cpu.CboCount][];
205 | cpu.InitializeCboTotals();
206 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
207 | {
208 | cpu.UpdateCboCounterData(cboIdx);
209 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]);
210 | }
211 |
212 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals);
213 | results.overallCounterValues = cpu.GetOverallCounterValues("L3 Hits", this.category + " L3 Hits");
214 | return results;
215 | }
216 |
217 | public string[] columns;
218 |
219 | public string GetHelpText() { return ""; }
220 |
221 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData)
222 | {
223 | return new string[] { label,
224 | FormatLargeNumber(counterData.ctr0 * 64) + "B/s",
225 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s",
226 | FormatPercentage(counterData.ctr1, counterData.ctr0)};
227 | }
228 | }
229 |
230 | public class SnoopHitConfig : MonitoringConfig
231 | {
232 | private SkylakeClientL3 cpu;
233 | public string GetConfigName() { return "Snoop Hits"; }
234 |
235 | public SnoopHitConfig(SkylakeClientL3 intelCpu)
236 | {
237 | cpu = intelCpu;
238 | }
239 |
240 | public string[] GetColumns()
241 | {
242 | return columns;
243 | }
244 |
245 | public void Initialize()
246 | {
247 | ThreadAffinity.Set(0x1);
248 | cpu.EnableUncoreCounters();
249 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
250 | {
251 | // CBo sent a snoop that hit a non-modified line
252 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx,
253 | GetUncorePerfEvtSelRegisterValue(0x22, 0x44, false, false, true, false, 0));
254 |
255 | // CBo sent a snoop that hit a modified line
256 | Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx,
257 | GetUncorePerfEvtSelRegisterValue(0x22, 0x48, false, false, true, false, 0));
258 | }
259 | }
260 |
261 | public MonitoringUpdateResults Update()
262 | {
263 | MonitoringUpdateResults results = new MonitoringUpdateResults();
264 | results.unitMetrics = new string[cpu.CboCount][];
265 | cpu.InitializeCboTotals();
266 | ThreadAffinity.Set(0x1);
267 | for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++)
268 | {
269 | cpu.UpdateCboCounterData(cboIdx);
270 | results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]);
271 | }
272 |
273 | results.overallMetrics = computeMetrics("Overall", cpu.cboTotals);
274 | return results;
275 | }
276 |
277 | public string[] columns = new string[] { "Item", "Snoop Hit BW", "Snoop Hit(M) BW", "Snoop Hit(non-M) BW", "Snoop Hits" };
278 |
279 | public string GetHelpText() { return ""; }
280 |
281 | private string[] computeMetrics(string label, NormalizedCboCounterData counterData)
282 | {
283 | return new string[] { label,
284 | FormatLargeNumber((counterData.ctr0 + counterData.ctr1) * 64) + "B/s",
285 | FormatLargeNumber(counterData.ctr0 * 64) + "B/s",
286 | FormatLargeNumber(counterData.ctr1 * 64) + "B/s",
287 | FormatLargeNumber(counterData.ctr0 + counterData.ctr1)};
288 | }
289 | }
290 | }
291 | }
292 |
--------------------------------------------------------------------------------
/Intel/SkylakeClientUncore.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 |
4 | namespace PmcReader.Intel
5 | {
6 | public class SkylakeClientUncore : ModernIntelCpu
7 | {
8 | public const uint MSR_UNC_PERF_GLOBAL_CTRL = 0xE01;
9 | public const uint MSR_UNC_PERF_FIXED_CTRL = 0x394;
10 | public const uint MSR_UNC_PERF_FIXED_CTR = 0x395;
11 | public const uint MSR_UNC_CBO_CONFIG = 0x396;
12 | public const uint MSR_UNC_CBO_PERFEVTSEL0_base = 0x700;
13 | public const uint MSR_UNC_CBO_PERFEVTSEL1_base = 0x701;
14 | public const uint MSR_UNC_CBO_PERFCTR0_base = 0x706;
15 | public const uint MSR_UNC_CBO_PERFCTR1_base = 0x707;
16 | public const uint MSR_UNC_ARB_PERFCTR0 = 0x3B0;
17 | public const uint MSR_UNC_ARB_PERFCTR1 = 0x3B1;
18 | public const uint MSR_UNC_ARB_PERFEVTSEL0 = 0x3B2;
19 | public const uint MSR_UNC_ARB_PERFEVTSEL1 = 0x3B3;
20 | public const uint MSR_UNC_CBO_increment = 0x10;
21 |
22 | public const ulong BAR_MASK = 0x0007FFFFF8000;
23 | public const uint DRAM_GT_REQUESTS_OFFSET = 0x5040;
24 | public const uint DRAM_IA_REQUESTS_OFFSET = 0x5044;
25 | public const uint DRAM_IO_REQUESTS_OFFSET = 0x5048;
26 | public const uint DRAM_DATA_READS_OFFSET = 0x5050;
27 | public const uint DRAM_DATA_WRITES_OFFSET = 0x5054;
28 |
29 | public ulong barAddress;
30 |
31 | public SkylakeClientUncore()
32 | {
33 | architectureName = "Skylake Client Uncore";
34 | Ring0.ReadPciConfig(Ring0.GetPciAddress(0, 0, 0), 0x48, out uint barLo);
35 | barAddress = (ulong)barLo & BAR_MASK;
36 | }
37 |
38 | ///
39 | /// Enable skylake uncore counters, wtih overflow propagation/freezing disabled
40 | ///
41 | public void EnableUncoreCounters()
42 | {
43 | // Bit 29 - globally enable all PMU counters.
44 | // local counters still have to be individually enabled
45 | // other bits have to do with PMI or are reserved
46 | ulong enableUncoreCountersValue = 1UL << 29;
47 | Ring0.WriteMsr(MSR_UNC_PERF_GLOBAL_CTRL, enableUncoreCountersValue);
48 |
49 | // Bit 22 - locally enable fixed counter
50 | ulong enableUncoreFixedCtrValue = 1UL << 22;
51 | Ring0.WriteMsr(MSR_UNC_PERF_FIXED_CTRL, enableUncoreFixedCtrValue);
52 | }
53 |
54 | ///
55 | /// Get value to put in PERFEVTSEL register, for uncore counters
56 | ///
57 | /// Perf event
58 | /// Perf event qualification (umask)
59 | /// Edge detect
60 | /// Enable overflow forwarding
61 | /// Enable counter
62 | /// Invert cmask
63 | /// Count mask
64 | /// value to put in perfevtsel register
65 | public static ulong GetUncorePerfEvtSelRegisterValue(byte perfEvent,
66 | byte umask,
67 | bool edge,
68 | bool ovf_en,
69 | bool enable,
70 | bool invert,
71 | byte cmask)
72 | {
73 | return perfEvent |
74 | (ulong)umask << 8 |
75 | (edge ? 1UL : 0UL) << 18 |
76 | (ovf_en ? 1UL : 0UL) << 20 |
77 | (enable ? 1UL : 0UL) << 22 |
78 | (invert ? 1UL : 0UL) << 23 |
79 | (ulong)(cmask & 0xF) << 24;
80 | }
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/Interop/AdvApi32.cs:
--------------------------------------------------------------------------------
1 | // From LibreHardwareMonitor
2 | // Mozilla Public License 2.0
3 | // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 | // Copyright (C) LibreHardwareMonitor and Contributors
5 | // All Rights Reserved
6 |
7 | using System;
8 | using System.Runtime.InteropServices;
9 |
10 | // ReSharper disable InconsistentNaming
11 |
12 | namespace PmcReader.Interop
13 | {
14 | internal class AdvApi32
15 | {
16 | private const string DllName = "advapi32.dll";
17 |
18 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi)]
19 | internal static extern IntPtr OpenSCManager(string lpMachineName, string lpDatabaseName, SC_MANAGER_ACCESS_MASK dwDesiredAccess);
20 |
21 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi)]
22 | [return: MarshalAs(UnmanagedType.Bool)]
23 | internal static extern bool CloseServiceHandle(IntPtr hSCObject);
24 |
25 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
26 | internal static extern IntPtr CreateService
27 | (
28 | IntPtr hSCManager,
29 | string lpServiceName,
30 | string lpDisplayName,
31 | SERVICE_ACCESS_MASK dwDesiredAccess,
32 | SERVICE_TYPE dwServiceType,
33 | SERVICE_START dwStartType,
34 | SERVICE_ERROR dwErrorControl,
35 | string lpBinaryPathName,
36 | string lpLoadOrderGroup,
37 | string lpdwTagId,
38 | string lpDependencies,
39 | string lpServiceStartName,
40 | string lpPassword);
41 |
42 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
43 | internal static extern IntPtr OpenService(IntPtr hSCManager, string lpServiceName, SERVICE_ACCESS_MASK dwDesiredAccess);
44 |
45 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
46 | [return: MarshalAs(UnmanagedType.Bool)]
47 | internal static extern bool DeleteService(IntPtr hService);
48 |
49 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
50 | [return: MarshalAs(UnmanagedType.Bool)]
51 | internal static extern bool StartService(IntPtr hService, uint dwNumServiceArgs, string[] lpServiceArgVectors);
52 |
53 | [DllImport(DllName, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
54 | [return: MarshalAs(UnmanagedType.Bool)]
55 | internal static extern bool ControlService(IntPtr hService, SERVICE_CONTROL dwControl, ref SERVICE_STATUS lpServiceStatus);
56 |
57 | [Flags]
58 | internal enum SC_MANAGER_ACCESS_MASK : uint
59 | {
60 | SC_MANAGER_CONNECT = 0x00001,
61 | SC_MANAGER_CREATE_SERVICE = 0x00002,
62 | SC_MANAGER_ENUMERATE_SERVICE = 0x00004,
63 | SC_MANAGER_LOCK = 0x00008,
64 | SC_MANAGER_QUERY_LOCK_STATUS = 0x00010,
65 | SC_MANAGER_MODIFY_BOOT_CONFIG = 0x00020,
66 | SC_MANAGER_ALL_ACCESS = 0xF003F
67 | }
68 |
69 | internal enum SERVICE_ACCESS_MASK : uint
70 | {
71 | SERVICE_QUERY_CONFIG = 0x00001,
72 | SERVICE_CHANGE_CONFIG = 0x00002,
73 | SERVICE_QUERY_STATUS = 0x00004,
74 | SERVICE_ENUMERATE_DEPENDENTS = 0x00008,
75 | SERVICE_START = 0x00010,
76 | SERVICE_STOP = 0x00020,
77 | SERVICE_PAUSE_CONTINUE = 0x00040,
78 | SERVICE_INTERROGATE = 0x00080,
79 | SERVICE_USER_DEFINED_CONTROL = 0x00100,
80 | SERVICE_ALL_ACCESS = 0xF01FF
81 | }
82 |
83 | internal enum SERVICE_TYPE : uint
84 | {
85 | SERVICE_DRIVER = 0x0000000B,
86 | SERVICE_WIN32 = 0x00000030,
87 | SERVICE_ADAPTER = 0x00000004,
88 | SERVICE_FILE_SYSTEM_DRIVER = 0x00000002,
89 | SERVICE_KERNEL_DRIVER = 0x00000001,
90 | SERVICE_RECOGNIZER_DRIVER = 0x00000008,
91 | SERVICE_WIN32_OWN_PROCESS = 0x00000010,
92 | SERVICE_WIN32_SHARE_PROCESS = 0x00000020,
93 | SERVICE_USER_OWN_PROCESS = 0x00000050,
94 | SERVICE_USER_SHARE_PROCESS = 0x00000060,
95 | SERVICE_INTERACTIVE_PROCESS = 0x00000100
96 | }
97 |
98 | internal enum SERVICE_START : uint
99 | {
100 | SERVICE_BOOT_START = 0,
101 | SERVICE_SYSTEM_START = 1,
102 | SERVICE_AUTO_START = 2,
103 | SERVICE_DEMAND_START = 3,
104 | SERVICE_DISABLED = 4
105 | }
106 |
107 | internal enum SERVICE_ERROR : uint
108 | {
109 | SERVICE_ERROR_IGNORE = 0,
110 | SERVICE_ERROR_NORMAL = 1,
111 | SERVICE_ERROR_SEVERE = 2,
112 | SERVICE_ERROR_CRITICAL = 3
113 | }
114 |
115 | internal enum SERVICE_CONTROL : uint
116 | {
117 | SERVICE_CONTROL_STOP = 1,
118 | SERVICE_CONTROL_PAUSE = 2,
119 | SERVICE_CONTROL_CONTINUE = 3,
120 | SERVICE_CONTROL_INTERROGATE = 4,
121 | SERVICE_CONTROL_SHUTDOWN = 5,
122 | SERVICE_CONTROL_PARAMCHANGE = 6,
123 | SERVICE_CONTROL_NETBINDADD = 7,
124 | SERVICE_CONTROL_NETBINDREMOVE = 8,
125 | SERVICE_CONTROL_NETBINDENABLE = 9,
126 | SERVICE_CONTROL_NETBINDDISABLE = 10,
127 | SERVICE_CONTROL_DEVICEEVENT = 11,
128 | SERVICE_CONTROL_HARDWAREPROFILECHANGE = 12,
129 | SERVICE_CONTROL_POWEREVENT = 13,
130 | SERVICE_CONTROL_SESSIONCHANGE = 14
131 | }
132 |
133 | [StructLayout(LayoutKind.Sequential, Pack = 1)]
134 | internal struct SERVICE_STATUS
135 | {
136 | public uint dwServiceType;
137 | public uint dwCurrentState;
138 | public uint dwControlsAccepted;
139 | public uint dwWin32ExitCode;
140 | public uint dwServiceSpecificExitCode;
141 | public uint dwCheckPoint;
142 | public uint dwWaitHint;
143 | }
144 | }
145 | }
146 |
--------------------------------------------------------------------------------
/Interop/KernelDriver.cs:
--------------------------------------------------------------------------------
1 | // From LibreHardwareMonitor
2 | // Mozilla Public License 2.0
3 | // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 | // Copyright (C) LibreHardwareMonitor and Contributors
5 | // All Rights Reserved
6 |
7 | using System;
8 | using System.IO;
9 | using System.Runtime.InteropServices;
10 | using System.Security.AccessControl;
11 | using Microsoft.Win32.SafeHandles;
12 |
13 | namespace PmcReader.Interop
14 | {
15 | internal class KernelDriver
16 | {
17 | private readonly string _id;
18 | private SafeFileHandle _device;
19 |
20 | public int lastError;
21 |
22 | public KernelDriver(string id)
23 | {
24 | _id = id;
25 | }
26 |
27 | public bool IsOpen
28 | {
29 | get { return _device != null; }
30 | }
31 |
32 | public bool Install(string path, out string errorMessage)
33 | {
34 | IntPtr manager = AdvApi32.OpenSCManager(null, null, AdvApi32.SC_MANAGER_ACCESS_MASK.SC_MANAGER_ALL_ACCESS);
35 | if (manager == IntPtr.Zero)
36 | {
37 | errorMessage = "OpenSCManager returned zero.";
38 | return false;
39 | }
40 |
41 | IntPtr service = AdvApi32.CreateService(manager,
42 | _id,
43 | _id,
44 | AdvApi32.SERVICE_ACCESS_MASK.SERVICE_ALL_ACCESS,
45 | AdvApi32.SERVICE_TYPE.SERVICE_KERNEL_DRIVER,
46 | AdvApi32.SERVICE_START.SERVICE_DEMAND_START,
47 | AdvApi32.SERVICE_ERROR.SERVICE_ERROR_NORMAL,
48 | path,
49 | null,
50 | null,
51 | null,
52 | null,
53 | null);
54 |
55 | if (service == IntPtr.Zero)
56 | {
57 | if (Marshal.GetHRForLastWin32Error() == Kernel32.ERROR_SERVICE_EXISTS)
58 | {
59 | errorMessage = "Service already exists";
60 | return false;
61 | }
62 |
63 | errorMessage = "CreateService returned the error: " + Marshal.GetExceptionForHR(Marshal.GetHRForLastWin32Error()).Message;
64 | AdvApi32.CloseServiceHandle(manager);
65 | return false;
66 | }
67 |
68 | if (!AdvApi32.StartService(service, 0, null))
69 | {
70 | if (Marshal.GetHRForLastWin32Error() != Kernel32.ERROR_SERVICE_ALREADY_RUNNING)
71 | {
72 | errorMessage = "StartService returned the error: " + Marshal.GetExceptionForHR(Marshal.GetHRForLastWin32Error()).Message;
73 | AdvApi32.CloseServiceHandle(service);
74 | AdvApi32.CloseServiceHandle(manager);
75 | return false;
76 | }
77 | }
78 |
79 | AdvApi32.CloseServiceHandle(service);
80 | AdvApi32.CloseServiceHandle(manager);
81 |
82 | #if !NETSTANDARD2_0
83 | try
84 | {
85 | // restrict the driver access to system (SY) and builtin admins (BA)
86 | // TODO: replace with a call to IoCreateDeviceSecure in the driver
87 | FileSecurity fileSecurity = File.GetAccessControl(@"\\.\" + _id);
88 | fileSecurity.SetSecurityDescriptorSddlForm("O:BAG:SYD:(A;;FA;;;SY)(A;;FA;;;BA)");
89 | File.SetAccessControl(@"\\.\" + _id, fileSecurity);
90 | }
91 | catch
92 | { }
93 | #endif
94 | errorMessage = null;
95 | return true;
96 | }
97 |
98 | public bool Open()
99 | {
100 | _device = new SafeFileHandle(Kernel32.CreateFile(@"\\.\" + _id, 0xC0000000, FileShare.ReadWrite, IntPtr.Zero, FileMode.Open, FileAttributes.Normal, IntPtr.Zero), true);
101 | if (_device.IsInvalid)
102 | {
103 | _device.Close();
104 | _device.Dispose();
105 | _device = null;
106 | }
107 |
108 | return _device != null;
109 | }
110 |
111 | public bool DeviceIOControl(Kernel32.IOControlCode ioControlCode, object inBuffer)
112 | {
113 | if (_device == null)
114 | return false;
115 |
116 |
117 | bool b = Kernel32.DeviceIoControl(_device, ioControlCode, inBuffer, inBuffer == null ? 0 : (uint)Marshal.SizeOf(inBuffer), null, 0, out uint _, IntPtr.Zero);
118 | return b;
119 | }
120 |
121 | public bool DeviceIOControl(Kernel32.IOControlCode ioControlCode, object inBuffer, ref T outBuffer)
122 | {
123 | if (_device == null)
124 | return false;
125 |
126 |
127 | object boxedOutBuffer = outBuffer;
128 | bool b = Kernel32.DeviceIoControl(_device,
129 | ioControlCode,
130 | inBuffer,
131 | inBuffer == null ? 0 : (uint)Marshal.SizeOf(inBuffer),
132 | boxedOutBuffer,
133 | (uint)Marshal.SizeOf(boxedOutBuffer),
134 | out uint _,
135 | IntPtr.Zero);
136 |
137 | if (!b)
138 | {
139 | int error = Marshal.GetLastWin32Error();
140 | lastError = error;
141 | }
142 |
143 | outBuffer = (T)boxedOutBuffer;
144 | return b;
145 | }
146 |
147 | public void Close()
148 | {
149 | if (_device != null)
150 | {
151 | _device.Close();
152 | _device.Dispose();
153 | _device = null;
154 | }
155 | }
156 |
157 | public bool Delete()
158 | {
159 | IntPtr manager = AdvApi32.OpenSCManager(null, null, AdvApi32.SC_MANAGER_ACCESS_MASK.SC_MANAGER_ALL_ACCESS);
160 | if (manager == IntPtr.Zero)
161 | return false;
162 |
163 |
164 | IntPtr service = AdvApi32.OpenService(manager, _id, AdvApi32.SERVICE_ACCESS_MASK.SERVICE_ALL_ACCESS);
165 | if (service == IntPtr.Zero)
166 | return true;
167 |
168 |
169 | AdvApi32.SERVICE_STATUS status = new AdvApi32.SERVICE_STATUS();
170 | AdvApi32.ControlService(service, AdvApi32.SERVICE_CONTROL.SERVICE_CONTROL_STOP, ref status);
171 | AdvApi32.DeleteService(service);
172 | AdvApi32.CloseServiceHandle(service);
173 | AdvApi32.CloseServiceHandle(manager);
174 |
175 | return true;
176 | }
177 | }
178 | }
179 |
--------------------------------------------------------------------------------
/Interop/OpCode.cs:
--------------------------------------------------------------------------------
1 | // From LibreHardwareMonitor
2 | // Mozilla Public License 2.0
3 | // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 | // Copyright (C) LibreHardwareMonitor and Contributors
5 | // All Rights Reserved
6 |
7 | using System;
8 | using System.Reflection;
9 | using System.Runtime.InteropServices;
10 |
11 | namespace PmcReader.Interop
12 | {
13 | internal static class OpCode
14 | {
15 | public static CpuidDelegate Cpuid;
16 | public static RdtscDelegate Rdtsc;
17 |
18 | private static IntPtr _codeBuffer;
19 | private static ulong _size;
20 |
21 | // void __stdcall cpuidex(unsigned int index, unsigned int ecxValue,
22 | // unsigned int* eax, unsigned int* ebx, unsigned int* ecx,
23 | // unsigned int* edx)
24 | // {
25 | // int info[4];
26 | // __cpuidex(info, index, ecxValue);
27 | // *eax = info[0];
28 | // *ebx = info[1];
29 | // *ecx = info[2];
30 | // *edx = info[3];
31 | // }
32 |
33 | private static readonly byte[] CpuId32 =
34 | {
35 | 0x55, // push ebp
36 | 0x8B,
37 | 0xEC, // mov ebp, esp
38 | 0x83,
39 | 0xEC,
40 | 0x10, // sub esp, 10h
41 | 0x8B,
42 | 0x45,
43 | 0x08, // mov eax, dword ptr [ebp+8]
44 | 0x8B,
45 | 0x4D,
46 | 0x0C, // mov ecx, dword ptr [ebp+0Ch]
47 | 0x53, // push ebx
48 | 0x0F,
49 | 0xA2, // cpuid
50 | 0x56, // push esi
51 | 0x8D,
52 | 0x75,
53 | 0xF0, // lea esi, [info]
54 | 0x89,
55 | 0x06, // mov dword ptr [esi],eax
56 | 0x8B,
57 | 0x45,
58 | 0x10, // mov eax, dword ptr [eax]
59 | 0x89,
60 | 0x5E,
61 | 0x04, // mov dword ptr [esi+4], ebx
62 | 0x89,
63 | 0x4E,
64 | 0x08, // mov dword ptr [esi+8], ecx
65 | 0x89,
66 | 0x56,
67 | 0x0C, // mov dword ptr [esi+0Ch], edx
68 | 0x8B,
69 | 0x4D,
70 | 0xF0, // mov ecx, dword ptr [info]
71 | 0x89,
72 | 0x08, // mov dword ptr [eax], ecx
73 | 0x8B,
74 | 0x45,
75 | 0x14, // mov eax, dword ptr [ebx]
76 | 0x8B,
77 | 0x4D,
78 | 0xF4, // mov ecx, dword ptr [ebp-0Ch]
79 | 0x89,
80 | 0x08, // mov dword ptr [eax], ecx
81 | 0x8B,
82 | 0x45,
83 | 0x18, // mov eax, dword ptr [ecx]
84 | 0x8B,
85 | 0x4D,
86 | 0xF8, // mov ecx, dword ptr [ebp-8]
87 | 0x89,
88 | 0x08, // mov dword ptr [eax], ecx
89 | 0x8B,
90 | 0x45,
91 | 0x1C, // mov eax, dword ptr [edx]
92 | 0x8B,
93 | 0x4D,
94 | 0xFC, // mov ecx, dword ptr [ebp-4]
95 | 0x5E, // pop esi
96 | 0x89,
97 | 0x08, // mov dword ptr [eax], ecx
98 | 0x5B, // pop ebx
99 | 0xC9, // leave
100 | 0xC2,
101 | 0x18,
102 | 0x00 // ret 18h
103 | };
104 |
105 | private static readonly byte[] CpuId64Linux =
106 | {
107 | 0x49,
108 | 0x89,
109 | 0xD2, // mov r10, rdx
110 | 0x49,
111 | 0x89,
112 | 0xCB, // mov r11, rcx
113 | 0x53, // push rbx
114 | 0x89,
115 | 0xF8, // mov eax, edi
116 | 0x89,
117 | 0xF1, // mov ecx, esi
118 | 0x0F,
119 | 0xA2, // cpuid
120 | 0x41,
121 | 0x89,
122 | 0x02, // mov dword ptr [r10], eax
123 | 0x41,
124 | 0x89,
125 | 0x1B, // mov dword ptr [r11], ebx
126 | 0x41,
127 | 0x89,
128 | 0x08, // mov dword ptr [r8], ecx
129 | 0x41,
130 | 0x89,
131 | 0x11, // mov dword ptr [r9], edx
132 | 0x5B, // pop rbx
133 | 0xC3 // ret
134 | };
135 |
136 | private static readonly byte[] CpuId64Windows =
137 | {
138 | 0x48,
139 | 0x89,
140 | 0x5C,
141 | 0x24,
142 | 0x08, // mov qword ptr [rsp+8], rbx
143 | 0x8B,
144 | 0xC1, // mov eax, ecx
145 | 0x8B,
146 | 0xCA, // mov ecx, edx
147 | 0x0F,
148 | 0xA2, // cpuid
149 | 0x41,
150 | 0x89,
151 | 0x00, // mov dword ptr [r8], eax
152 | 0x48,
153 | 0x8B,
154 | 0x44,
155 | 0x24,
156 | 0x28, // mov rax, qword ptr [rsp+28h]
157 | 0x41,
158 | 0x89,
159 | 0x19, // mov dword ptr [r9], ebx
160 | 0x48,
161 | 0x8B,
162 | 0x5C,
163 | 0x24,
164 | 0x08, // mov rbx, qword ptr [rsp+8]
165 | 0x89,
166 | 0x08, // mov dword ptr [rax], ecx
167 | 0x48,
168 | 0x8B,
169 | 0x44,
170 | 0x24,
171 | 0x30, // mov rax, qword ptr [rsp+30h]
172 | 0x89,
173 | 0x10, // mov dword ptr [rax], edx
174 | 0xC3 // ret
175 | };
176 |
177 | // unsigned __int64 __stdcall rdtsc() {
178 | // return __rdtsc();
179 | // }
180 |
181 | private static readonly byte[] Rdtsc32 =
182 | {
183 | 0x0F,
184 | 0x31, // rdtsc
185 | 0xC3 // ret
186 | };
187 |
188 | private static readonly byte[] Rdtsc64 =
189 | {
190 | 0x0F,
191 | 0x31, // rdtsc
192 | 0x48,
193 | 0xC1,
194 | 0xE2,
195 | 0x20, // shl rdx, 20h
196 | 0x48,
197 | 0x0B,
198 | 0xC2, // or rax, rdx
199 | 0xC3 // ret
200 | };
201 |
202 | [UnmanagedFunctionPointer(CallingConvention.StdCall)]
203 | public delegate bool CpuidDelegate(uint index, uint ecxValue, out uint eax, out uint ebx, out uint ecx, out uint edx);
204 |
205 | [UnmanagedFunctionPointer(CallingConvention.StdCall)]
206 | public delegate ulong RdtscDelegate();
207 |
208 | public static void Open()
209 | {
210 | byte[] rdTscCode;
211 | byte[] cpuidCode;
212 | if (IntPtr.Size == 4)
213 | {
214 | rdTscCode = Rdtsc32;
215 | cpuidCode = CpuId32;
216 | }
217 | else
218 | {
219 | rdTscCode = Rdtsc64;
220 |
221 | cpuidCode = CpuId64Windows;
222 | }
223 |
224 | _size = (ulong)(rdTscCode.Length + cpuidCode.Length);
225 |
226 | _codeBuffer = Kernel32.VirtualAlloc(IntPtr.Zero,
227 | (UIntPtr)_size,
228 | Kernel32.MEM.MEM_COMMIT | Kernel32.MEM.MEM_RESERVE,
229 | Kernel32.PAGE.PAGE_EXECUTE_READWRITE);
230 |
231 | Marshal.Copy(rdTscCode, 0, _codeBuffer, rdTscCode.Length);
232 | Rdtsc = Marshal.GetDelegateForFunctionPointer(_codeBuffer, typeof(RdtscDelegate)) as RdtscDelegate;
233 | IntPtr cpuidAddress = (IntPtr)((long)_codeBuffer + rdTscCode.Length);
234 | Marshal.Copy(cpuidCode, 0, cpuidAddress, cpuidCode.Length);
235 | Cpuid = Marshal.GetDelegateForFunctionPointer(cpuidAddress, typeof(CpuidDelegate)) as CpuidDelegate;
236 | }
237 |
238 | public static void Close()
239 | {
240 | Rdtsc = null;
241 | Cpuid = null;
242 |
243 | Kernel32.VirtualFree(_codeBuffer, UIntPtr.Zero, Kernel32.MEM.MEM_RELEASE);
244 | }
245 |
246 | public static bool CpuidTx(uint index, uint ecxValue, out uint eax, out uint ebx, out uint ecx, out uint edx, ulong threadAffinityMask)
247 | {
248 | ulong mask = ThreadAffinity.Set(threadAffinityMask);
249 | if (mask == 0)
250 | {
251 | eax = ebx = ecx = edx = 0;
252 | return false;
253 | }
254 |
255 | Cpuid(index, ecxValue, out eax, out ebx, out ecx, out edx);
256 | ThreadAffinity.Set(mask);
257 | return true;
258 | }
259 |
260 | ///
261 | /// Gets the CPU manufacturer ID string, from cpuid with eax = 0
262 | ///
263 | /// Manufacturer ID string
264 | public static string GetManufacturerId()
265 | {
266 | uint eax, ecx, edx, ebx;
267 | byte[] cpuManufacturerBytes = new byte[12];
268 | Cpuid(0, 0, out eax, out ebx, out ecx, out edx);
269 |
270 | // when you use a managed language and can't play with types
271 | cpuManufacturerBytes[0] = (byte)ebx;
272 | cpuManufacturerBytes[1] = (byte)(ebx >> 8);
273 | cpuManufacturerBytes[2] = (byte)(ebx >> 16);
274 | cpuManufacturerBytes[3] = (byte)(ebx >> 24);
275 | cpuManufacturerBytes[4] = (byte)edx;
276 | cpuManufacturerBytes[5] = (byte)(edx >> 8);
277 | cpuManufacturerBytes[6] = (byte)(edx >> 16);
278 | cpuManufacturerBytes[7] = (byte)(edx >> 24);
279 | cpuManufacturerBytes[8] = (byte)ecx;
280 | cpuManufacturerBytes[9] = (byte)(ecx >> 8);
281 | cpuManufacturerBytes[10] = (byte)(ecx >> 16);
282 | cpuManufacturerBytes[11] = (byte)(ecx >> 24);
283 | return System.Text.Encoding.ASCII.GetString(cpuManufacturerBytes);
284 | }
285 |
286 | public static void GetProcessorVersion(out byte family, out byte model, out byte stepping)
287 | {
288 | uint eax, ecx, edx, ebx;
289 | Cpuid(1, 0, out eax, out ebx, out ecx, out edx);
290 |
291 | stepping = (byte)(eax & 0xF);
292 | family = (byte)((eax >> 8) & 0xF);
293 | model = (byte)((eax >> 4) & 0xF);
294 |
295 | // wikipedia says if family id is 6 or 15, model = model + extended model id shifted left by 4 bits
296 | // extended model id starts on bit 16
297 | if (family == 6 || family == 15)
298 | {
299 | model += (byte)((eax >> 12) & 0xF0);
300 | }
301 |
302 | // if family is 15, family = family + extended family
303 | if (family == 15)
304 | {
305 | family += (byte)(eax >> 20);
306 | }
307 | }
308 | }
309 | }
310 |
--------------------------------------------------------------------------------
/Interop/ThreadAffinity.cs:
--------------------------------------------------------------------------------
1 | // From LibreHardwareMonitor
2 | // Mozilla Public License 2.0
3 | // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 | // Copyright (C) LibreHardwareMonitor and Contributors
5 | // All Rights Reserved
6 |
7 | using System;
8 |
9 | namespace PmcReader.Interop
10 | {
11 | internal static class ThreadAffinity
12 | {
13 | public static ulong Set(ulong mask)
14 | {
15 | if (mask == 0)
16 | return 0;
17 |
18 | UIntPtr uIntPtrMask;
19 | try
20 | {
21 | uIntPtrMask = (UIntPtr)mask;
22 | }
23 | catch (OverflowException)
24 | {
25 | throw new ArgumentOutOfRangeException(nameof(mask));
26 | }
27 | return (ulong)Kernel32.SetThreadAffinityMask(Kernel32.GetCurrentThread(), uIntPtrMask);
28 | }
29 | }
30 | }
31 |
32 |
--------------------------------------------------------------------------------
/Interop/WinRing0.sys:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/Interop/WinRing0.sys
--------------------------------------------------------------------------------
/Interop/WinRing0x64.sys:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/Interop/WinRing0x64.sys
--------------------------------------------------------------------------------
/Interop/winpmem_64.sys:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/PmcReader/bcd7a8a1e82f1d63fecc8f36c0aacf24b1025291/Interop/winpmem_64.sys
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/PmcReader.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | AnyCPU
7 | {E43329E2-1CCB-4276-ADBC-D292EED22170}
8 | WinExe
9 | PmcReader
10 | PmcReader
11 | v4.7.2
12 | 512
13 | true
14 | true
15 |
16 |
17 | x64
18 | true
19 | full
20 | false
21 | bin\Debug\
22 | DEBUG;TRACE
23 | prompt
24 | 4
25 |
26 |
27 | x64
28 | pdbonly
29 | true
30 | bin\Release\
31 | TRACE
32 | prompt
33 | 4
34 |
35 |
36 | app.manifest
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | Form
81 |
82 |
83 | HaswellForm.cs
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 | HaswellForm.cs
116 |
117 |
118 | ResXFileCodeGenerator
119 | Resources.Designer.cs
120 | Designer
121 |
122 |
123 | True
124 | Resources.resx
125 | True
126 |
127 |
128 |
129 |
130 |
131 |
132 | SettingsSingleFileGenerator
133 | Settings.Designer.cs
134 |
135 |
136 | True
137 | Settings.settings
138 | True
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
--------------------------------------------------------------------------------
/PmcReader.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.30011.22
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PmcReader", "PmcReader.csproj", "{E43329E2-1CCB-4276-ADBC-D292EED22170}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Release|Any CPU = Release|Any CPU
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {E43329E2-1CCB-4276-ADBC-D292EED22170}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | {E43329E2-1CCB-4276-ADBC-D292EED22170}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | {E43329E2-1CCB-4276-ADBC-D292EED22170}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | {E43329E2-1CCB-4276-ADBC-D292EED22170}.Release|Any CPU.Build.0 = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {C6109254-7E89-4EA3-865F-09E37919EA09}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/Program.cs:
--------------------------------------------------------------------------------
1 | using PmcReader.Interop;
2 | using System;
3 | using System.Windows.Forms;
4 |
5 | namespace PmcReader
6 | {
7 | static class Program
8 | {
9 | ///
10 | /// The main entry point for the application.
11 | ///
12 | [STAThread]
13 | static void Main()
14 | {
15 | Ring0.Open();
16 | OpCode.Open();
17 | Application.EnableVisualStyles();
18 | Application.SetCompatibleTextRenderingDefault(false);
19 | Application.Run(new HaswellForm());
20 | OpCode.Close();
21 | Ring0.Close();
22 | }
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle("PmcReader")]
9 | [assembly: AssemblyDescription("Reads CPU Performance Monitoring Events")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("")]
12 | [assembly: AssemblyProduct("PmcReader")]
13 | [assembly: AssemblyCopyright("Copyright © 2020")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("e43329e2-1ccb-4276-adbc-d292eed22170")]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/Properties/Resources.Designer.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // This code was generated by a tool.
4 | // Runtime Version:4.0.30319.42000
5 | //
6 | // Changes to this file may cause incorrect behavior and will be lost if
7 | // the code is regenerated.
8 | //
9 | //------------------------------------------------------------------------------
10 |
11 | namespace PmcReader.Properties {
12 | using System;
13 |
14 |
15 | ///
16 | /// A strongly-typed resource class, for looking up localized strings, etc.
17 | ///
18 | // This class was auto-generated by the StronglyTypedResourceBuilder
19 | // class via a tool like ResGen or Visual Studio.
20 | // To add or remove a member, edit your .ResX file then rerun ResGen
21 | // with the /str option, or rebuild your VS project.
22 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "16.0.0.0")]
23 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
24 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
25 | internal class Resources {
26 |
27 | private static global::System.Resources.ResourceManager resourceMan;
28 |
29 | private static global::System.Globalization.CultureInfo resourceCulture;
30 |
31 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
32 | internal Resources() {
33 | }
34 |
35 | ///
36 | /// Returns the cached ResourceManager instance used by this class.
37 | ///
38 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
39 | internal static global::System.Resources.ResourceManager ResourceManager {
40 | get {
41 | if (object.ReferenceEquals(resourceMan, null)) {
42 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("PmcReader.Properties.Resources", typeof(Resources).Assembly);
43 | resourceMan = temp;
44 | }
45 | return resourceMan;
46 | }
47 | }
48 |
49 | ///
50 | /// Overrides the current thread's CurrentUICulture property for all
51 | /// resource lookups using this strongly typed resource class.
52 | ///
53 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
54 | internal static global::System.Globalization.CultureInfo Culture {
55 | get {
56 | return resourceCulture;
57 | }
58 | set {
59 | resourceCulture = value;
60 | }
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/Properties/Resources.resx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 | text/microsoft-resx
107 |
108 |
109 | 2.0
110 |
111 |
112 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
113 |
114 |
115 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
116 |
117 |
--------------------------------------------------------------------------------
/Properties/Settings.Designer.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // This code was generated by a tool.
4 | // Runtime Version:4.0.30319.42000
5 | //
6 | // Changes to this file may cause incorrect behavior and will be lost if
7 | // the code is regenerated.
8 | //
9 | //------------------------------------------------------------------------------
10 |
11 | namespace PmcReader.Properties {
12 |
13 |
14 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
15 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "16.5.0.0")]
16 | internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
17 |
18 | private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
19 |
20 | public static Settings Default {
21 | get {
22 | return defaultInstance;
23 | }
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/Properties/Settings.settings:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MsrUtil
2 | Performance Counter Reader
3 |
4 | # THIS SOFTWARE IS CONSIDERED EXPERIMENTAL. OUTPUT FROM THE APPLICATION MAY BE INACCURATE. NOT ALL CPU ARCHITECTURES ARE SUPPORTED.
5 |
6 | A messy attempt at reading performance counters for various CPUs and displaying derived metrics in real time. Probably due for a rewrite/rethink of how I approach this pretty soon, whenever I have time. The current structure is a bit messy. Winring0 interface code adapted from LibreHardwareMontor at https://github.com/LibreHardwareMonitor/LibreHardwareMonitor
7 |
8 | ## Building
9 | Open the sln in Visual Studio, hit build.
10 |
11 | ## Running
12 | Right click, run as admin. It needs admin privileges to use the winring0 driver.
13 |
14 | ## Supported Platforms
15 | Every CPU has tons of performance monitoring events, and in most cases it's not practical to cover them all. CPUs have been largely supported on an ad-hoc basis whenever I (Clam) wanted to investigate performance characteristics on that platform.
16 |
17 | ### AMD, Core Events
18 | Zen 2 has the most thorough coverage. Piledriver events are also covered, though in a more limited way because of counter restrictions.
19 | Code is present for Zen 1 and 3, but testing has been minimal on those CPUs because I don't have examples of them.
20 |
21 | ### AMD, Non-Core Events
22 | Basic L3 counter support is implemented for all Zen generations, but data fabric (Infinity Fabric) support is mostly not present because those counters are largely undocumented, especially on client platforms.
23 |
24 | Piledriver's northbridge is decently well covered.
25 | ### Intel, Core Events
26 | Sandy Bridge and Haswell have the best core event coverage. Skylake and Goldmont Plus are a work in progress, with most basic events covered. On other Intel cores, I have code that can read "architectural" events (instructions, cycles, branch mispredicts, last level cache misses), but other events won't be supported.
27 |
28 | There might be some code for Alder Lake, but we don't talk about that. Because it has never been tested.
29 |
30 | ### Intel, Non-Core Events
31 | The program can read basic counters on Haswell client/HEDT and Skylake client uncores for L3 hitrate and system agent arbitration queue events.
32 |
33 | There's pretty extensive support for Sandy Bridge HEDT L3 performance counters. Sandy Bridge's Power Control Unit (PCU) can be monitored as well.
34 |
35 | ## Use of Undocumented Events
36 | In some places, I use events and unit mask combinations not explcitly documented by AMD or Intel. In some cases, I use a combination of unit mask bits that isn't directly in Intel's docs (since they provide umask values, and don't document what's selected by individual bits). Or, I set combinations of edge/count mask fields that aren't directly documented. I expect those cases to work fine.
37 |
38 | In others, I might use a completely undocumented event/umask bit, with basic testing to ensure it does count what I think it counts. I think I've marked most of these cases with a '?', but I may have missed some.
39 |
40 | Anyway, it's best to do your own verification before taking the results as truth. For example, you can verify L3 hitrate is reported correctly by reading from an array that fits within L3, and seeing that the hitrate is indeed high.
41 |
42 | ## General Disclaimer
43 | Even documented performance monitoring events may be inaccurate. There's *plenty* of errata around performance monitoring events, and they're often never fixed by the manufacturers because an incorrectly counting perf event won't cause crashes or break user programs. And inaccuracies are usually small enough to not seriously affect code optimization efforts.
44 |
45 | Also, it's good to read about the events in use in Intel/AMD's docs before interpreting them. I don't expect everyone to do this because documentation can be really hard to parse, so there are the major things to be aware of:
46 | - Cache requests and misses are generally tracked per cache line. For example, if three instructions miss L1D but requested data from the same 64B cache line, that'll count as one L1D miss/fill request in the cache hierarchy.
47 | - Many events are "speculative" meaning that counts could be triggered by instructions that are never retired (committed, or have their results made final). For example, instructions could be fetched, pass through rename/execute and cause event count increments there, but then be thrown away before retirement because they came down a mispredicted path. In some cases, similar events on AMD and Intel cannot be directly compared because one is speculative and the other is not.
48 | - Non-core events should always be considered speculative.
49 |
50 | ## Other
51 |
52 | There's testing controls under the 'Do not push these buttons' section. They may or may not work and I generally recommend avoiding them unless you really know what you're doing. They'll most likely decrease performance, and could cause weird behavior.
53 |
54 | ### Intel, Testing Controls
55 | Prefetchers can be turned on and off, using MSRs documented by Intel. Specifically:
56 | - L2 HW PF: L2 hardware prefetcher
57 | - L2 Adj PF: L2 adjacent cache line prefetcher. On a L2 miss, this prefetcher fetches an adjacent cache line as well, taking advantage of spatial locality.
58 | - L1D Adj PF: Adjacent line prefetcher for L1D misses
59 | - L1D IP PF: Instruction pointer based prefetcher that tracks the address of previous load instructions and uses that to prefetch extra cache lines.
60 |
61 | ### AMD, Testing Controls
62 | For 17h and newer CPUs (Zen stuff):
63 | - Op Cache: Can be used to disable the micro-op cache. Not documented by AMD, generally drops performance by a few percent. Use at your own risk.
64 | - Core Performance Boost: Can be used to disable Core Performance Boost, which will prevent the CPU from raising frequencies beyond base clock. Potentially useful for ensuring clock consistency when microbenchmarking, or just making your CPU more power efficient.
65 | - L1D Stream Prefetcher, L2 Stream Prefetcher: Toggles MSR bits that should request the respective prefetchers to be disabled, but I'm not sure if it works.
66 | - Set CPU Name String: Can be used to set the CPU name reported by the CPUID instruction. This can be funny, but can also cause strange behavior. Benchmark apps and CPU-Z may misidentify your CPU. Ryzen Master may think you're on a different CPU and not show your saved profiles.
--------------------------------------------------------------------------------
/app.manifest:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
25 |
26 |
27 | true
28 | true
29 |
30 |
31 |
32 |
33 |
34 |
35 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------