├── Results ├── PermSelectMasks_SKX_16-slice.tbl ├── BaseSequence_SKX_16-slice.tbl ├── PermSelectMasks_SKX_20-slice.tbl ├── PermSelectMasks_ICX_40-slice.tbl ├── PermSelectMasks_SKX_24-slice.tbl ├── PermSelectMasks_SKX_14-slice.tbl ├── PermSelectMasks_SKX_18-slice.tbl ├── PermSelectMasks_ICX_28-slice.tbl ├── PermSelectMasks_KNL_38-slice.tbl ├── PermSelectMasks_SKX_22-slice.tbl ├── PermSelectMasks_SKX_28-slice.tbl ├── PermSelectMasks_SPR_56-slice.tbl ├── PermSelectMasks_SKX_26-slice.tbl ├── PermSelectMasks_SPR_60-slice.tbl ├── BaseSequence_SKX_20-slice.tbl ├── README.md ├── BaseSequence_SKX_24-slice.tbl ├── BaseSequence_ICX_40-slice.tbl ├── BaseSequence_SKX_18-slice.tbl ├── BaseSequence_SKX_28-slice.tbl └── BaseSequence_KNL_38-slice.tbl ├── Makefile ├── PCI_cfg_index.c ├── MSR_Architectural.h ├── cpuid_check_inline.c ├── MSR_ArchPerfMon_v3.h ├── LICENSE ├── read_CHA_counter.c ├── va2pa_lib.c ├── program_CHA_counters.c ├── MSR_defs.h ├── README.md ├── low_overhead_timers.c ├── Map_Addresses_to_L3_Slices.c └── Map_Addresses_to_L3_Slices_ICX.c /Results/PermSelectMasks_SKX_16-slice.tbl: -------------------------------------------------------------------------------- 1 | 10 37 2 | 0x1b5f575400 0x2eb5faa800 0x3cccc93000 0x31aeeb1000 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 3 | -------------------------------------------------------------------------------- /Results/BaseSequence_SKX_16-slice.tbl: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | 10 12 | 11 13 | 12 14 | 13 15 | 14 16 | 15 17 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_SKX_20-slice.tbl: -------------------------------------------------------------------------------- 1 | 14 37 2 | 0x3ecbad4000 0x35cf7c000 0x387242c000 0xe2f28c000 0x1c5e518000 0x38bca30000 0xfb2eb4000 0x1f65d68000 0x0 0x0 0x0 0x0 0x0 0x0 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_ICX_40-slice.tbl: -------------------------------------------------------------------------------- 1 | 15 37 2 | 0x3e074e8000 0x390140000 0x38bea10000 0xee3cb0000 0x1c5e518000 0x38bca30000 0xf7e088000 0x1f65d68000 0xcce38000 0x0 0x0 0x0 0x0 0x0 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_SKX_24-slice.tbl: -------------------------------------------------------------------------------- 1 | 15 37 2 | 0x2b72c98000 0x16e5930000 0x2dcb260000 0x1b964c0000 0x1c5e518000 0x38bca30000 0x1a0b8f8000 0x1f65d68000 0x15b9648000 0x0 0x0 0x0 0x0 0x0 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_SKX_14-slice.tbl: -------------------------------------------------------------------------------- 1 | 20 37 2 | 0x3880c00000 0x263a700000 0x1d14c00000 0x41f500000 0x1025400000 0x2cd5100000 0x1d90300000 0x0 0x3a03500000 0xc7b100000 0x0 0x1469b00000 0x0 0x3c48300000 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_SKX_18-slice.tbl: -------------------------------------------------------------------------------- 1 | 18 37 2 | 0x4c8fc0000 0x1d05380000 0x262b8c0000 0x41f500000 0x2c6d780000 0x2cd5140000 0x21d80c0000 0x3b3f480000 0x3a03500000 0x3033280000 0x0 0x1469b40000 0x0 0x0 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_ICX_28-slice.tbl: -------------------------------------------------------------------------------- 1 | 20 37 2 | 0x3880c00000 0x390100000 0x38bea00000 0x41f500000 0x1025400000 0x2cd5100000 0x1d90300000 0x25aa600000 0x3a03500000 0xc7b100000 0x0 0x1469b00000 0x0 0x3c48300000 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_KNL_38-slice.tbl: -------------------------------------------------------------------------------- 1 | 18 37 2 | 0x32770C0000 0x2BBAC80000 0x39A2900000 0x1B964C0000 0x055B940000 0x05E3F80000 0x1767FC0000 0x3B3F480000 0x258A4C0000 0x3033280000 0x2936EC0000 0x1469B40000 0x0 0x0 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_SKX_22-slice.tbl: -------------------------------------------------------------------------------- 1 | 20 37 2 | 0x3880c00000 0x3433d00000 0xf1d600000 0x41f500000 0x1025400000 0x2cd5100000 0x1d90300000 0x1209a00000 0x3a03500000 0xc7b100000 0x0 0x1469b00000 0x0 0x3c48300000 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_SKX_28-slice.tbl: -------------------------------------------------------------------------------- 1 | 18 37 2 | 0x32770c0000 0x3433d40000 0x39a2900000 0x3857680000 0x1ad2880000 0x1a6ae40000 0x2b2fc40000 0x24b6540000 0x3a03500000 0xc7b100000 0xaf7c80000 0x28218c0000 0x0 0x0 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_SPR_56-slice.tbl: -------------------------------------------------------------------------------- 1 | 20 36 2 | 0x10ba00000 0x390100000 0x135c00000 0x41f500000 0x9ae200000 0x155e700000 0x41b500000 0x1c21000000 0x1a03500000 0xc7b100000 0x198b600000 0x1469b00000 0x0 0x1c48300000 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_SKX_26-slice.tbl: -------------------------------------------------------------------------------- 1 | 20 37 2 | 0xe3f300000 0x3433d00000 0x39a2900000 0x41f500000 0x269ab00000 0x1a6ae00000 0x2b2fc00000 0x24b6500000 0x3a03500000 0xc7b100000 0x36bff00000 0x1469b00000 0x0 0x3c48300000 3 | -------------------------------------------------------------------------------- /Results/PermSelectMasks_SPR_60-slice.tbl: -------------------------------------------------------------------------------- 1 | 20 37 2 | 0x1a5dd00000 0x38c6600000 0x2135c00000 0x41f500000 0x9ae200000 0xe08000000 0x3f4d200000 0x777700000 0x3a03500000 0xc7b100000 0x198b600000 0x2f3fc00000 0x3b56700000 0x3c48300000 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Initial testing using the Intel icc compiler -- probably not necessary 2 | 3 | CC=icc 4 | CFLAGS=-sox -g -O0 5 | CDEFINES=-DMAP_L3 -DMYHUGEPAGE_THP -DCHA_COUNTS 6 | 7 | HELPERS=cpuid_check_inline.c low_overhead_timers.c program_CHA_counters.c read_CHA_counter.c 8 | 9 | default: Map_Addresses_to_L3_Slices.c va2pa_lib.c $(HELPERS) 10 | $(CC) $(CFLAGS) $(CDEFINES) Map_Addresses_to_L3_Slices.c va2pa_lib.c -o Map_Addresses_to_L3_Slices.exe 11 | -------------------------------------------------------------------------------- /PCI_cfg_index.c: -------------------------------------------------------------------------------- 1 | // =========================================================================================================================================================================== 2 | // Convert PCI(bus:device.function,offset) to uint32_t array index 3 | uint32_t PCI_cfg_index(unsigned int Bus, unsigned int Device, unsigned int Function, unsigned int Offset) 4 | { 5 | uint32_t byteaddress; 6 | uint32_t index; 7 | assert (Device >= 0); 8 | assert (Function >= 0); 9 | assert (Offset >= 0); 10 | assert (Device < (1<<5)); 11 | assert (Function < (1<<3)); 12 | assert (Offset < (1<<12)); 13 | byteaddress = (Bus<<20) | (Device<<15) | (Function<<12) | Offset; 14 | index = byteaddress / 4; 15 | return ( index ); 16 | } 17 | 18 | -------------------------------------------------------------------------------- /MSR_Architectural.h: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------- 2 | // Part 2: Performance-related MSRs from "Architectural MSRs" 3 | // (Volume 3B, Table 35-2) excludes those listed above in 4 | // "Architectural Performance Monitoring" 5 | // 6 | // Name, MSR_Address 7 | #define IA32_TIME_STAMP_COUNTER 0x10L 8 | #define IA32_MPERF 0xE7L 9 | #define IA32_APERF 0xE8L 10 | #define IA32_CLOCK_MODULATION 0x19AL 11 | #define IA32_ENERGY_PERF_BIAS 0x1B0L 12 | #define IA32_PACKAGE_THERM_STATUS 0x1B1L 13 | #define IA32_DEBUGCTL 0x1D9L 14 | #define IA32_PLATFORM_DCA_CAP 0x1F8L 15 | #define IA32_CPU_DCA_CAP 0x1F9L 16 | #define IA32_DCA_0_CAP 0x1FAL 17 | #define IA32_PERF_CAPABILITIES 0x345L 18 | #define IA32_PEBS_ENABLE 0x3F1L 19 | #define IA32_A_PMC0 0x4C1L 20 | #define IA32_A_PMC1 0x4C2L 21 | #define IA32_A_PMC2 0x4C2L 22 | #define IA32_A_PMC3 0x4C3L 23 | #define IA32_A_PMC4 0x4C4L 24 | #define IA32_A_PMC5 0x4C5L 25 | #define IA32_A_PMC6 0x4C6L 26 | #define IA32_A_PMC7 0x4C7L 27 | #define IA32_TSC_AUX 0xC0000203L 28 | -------------------------------------------------------------------------------- /cpuid_check_inline.c: -------------------------------------------------------------------------------- 1 | 2 | #define CPUID_SIGNATURE_HASWELL 0x000306f0U 3 | #define CPUID_SIGNATURE_SKX 0x00050650U 4 | #define CPUID_SIGNATURE_ICX 0x000606a0U 5 | #define CPUID_SIGNATURE_SPR 0x000806f0U 6 | 7 | uint32_t cpuid_signature() { 8 | int cpuid_return[4]; 9 | 10 | __cpuid(&cpuid_return[0], 1); 11 | 12 | uint32_t ModelInfo = cpuid_return[0] & 0x0fff0ff0; // mask out the reserved and "stepping" fields, leaving only the base and extended Family/Model fields 13 | 14 | #ifdef DEBUG 15 | if (ModelInfo == CPUID_SIGNATURE_HASWELL) { // expected values for Haswell EP 16 | printf("Haswell EP\n"); 17 | } 18 | else if (ModelInfo == CPUID_SIGNATURE_SKX) { // expected values for SKX/CLX 19 | printf("SKX/CLX\n"); 20 | } 21 | else if (ModelInfo == CPUID_SIGNATURE_ICX) { // expected values for Ice Lake Xeon 22 | printf("ICX\n"); 23 | } 24 | else if (ModelInfo == CPUID_SIGNATURE_SPR) { // expected values for Sapphire Rapids Xeon 25 | printf("SPR\n"); 26 | } else { 27 | printf("Unknown processor 0x%x\n",ModelInfo); 28 | } 29 | #endif 30 | 31 | return ModelInfo; 32 | } 33 | -------------------------------------------------------------------------------- /MSR_ArchPerfMon_v3.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Intel Arch SW Developer's Manual, Volume 3, document 325384-060, September 2016 3 | //------------------------------------------------- 4 | // Part 1: Architectural performance monitoring version 3, Volume 3B, section 18.2 5 | // and Section 35.1 6 | //------------------------------------------------- 7 | #define IA32_PMC0 0xC1L 8 | #define IA32_PMC1 0xC2L 9 | #define IA32_PMC2 0xC3L 10 | #define IA32_PMC3 0xC4L 11 | #define IA32_PMC4 0xC5L 12 | #define IA32_PMC5 0xC6L 13 | #define IA32_PMC6 0xC7L 14 | #define IA32_PMC7 0xC8L 15 | #define IA32_PERFEVTSEL0 0x186L 16 | #define IA32_PERFEVTSEL1 0x187L 17 | #define IA32_PERFEVTSEL2 0x188L 18 | #define IA32_PERFEVTSEL3 0x189L 19 | #define IA32_PERFEVTSEL4 0x18AL 20 | #define IA32_PERFEVTSEL5 0x18BL 21 | #define IA32_PERFEVTSEL6 0x18CL 22 | #define IA32_PERFEVTSEL7 0x18DL 23 | #define IA32_PERF_STATUS 0x198L 24 | #define IA32_THERM_STATUS 0x19CL 25 | #define IA32_PERF_CTL 0x199L 26 | #define IA32_MISC_ENABLE 0x1A0L 27 | #define IA32_FIXED_CTR0 0x309L 28 | #define IA32_FIXED_CTR1 0x30AL 29 | #define IA32_FIXED_CTR2 0x30BL 30 | #define IA32_FIXED_CTR_CTRL 0x38DL 31 | #define IA32_PERF_GLOBAL_STATUS 0x38EL 32 | #define IA32_PERF_GLOBAL_CTRL 0x38FL 33 | #define IA32_PERF_GLOBAL_OVF_CTRL 0x390L 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, John D McCalpin 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /Results/BaseSequence_SKX_20-slice.tbl: -------------------------------------------------------------------------------- 1 | 0 2 | 11 3 | 2 4 | 9 5 | 7 6 | 12 7 | 5 8 | 14 9 | 1 10 | 10 11 | 3 12 | 8 13 | 6 14 | 13 15 | 4 16 | 15 17 | 1 18 | 10 19 | 3 20 | 8 21 | 6 22 | 13 23 | 4 24 | 15 25 | 0 26 | 11 27 | 18 28 | 17 29 | 7 30 | 12 31 | 17 32 | 18 33 | 8 34 | 3 35 | 10 36 | 1 37 | 15 38 | 4 39 | 13 40 | 6 41 | 9 42 | 2 43 | 19 44 | 16 45 | 14 46 | 5 47 | 16 48 | 19 49 | 9 50 | 2 51 | 11 52 | 0 53 | 14 54 | 5 55 | 12 56 | 7 57 | 8 58 | 3 59 | 18 60 | 17 61 | 15 62 | 4 63 | 17 64 | 18 65 | 10 66 | 1 67 | 8 68 | 3 69 | 13 70 | 6 71 | 15 72 | 4 73 | 11 74 | 0 75 | 9 76 | 2 77 | 12 78 | 7 79 | 14 80 | 5 81 | 11 82 | 0 83 | 9 84 | 2 85 | 12 86 | 7 87 | 14 88 | 5 89 | 18 90 | 17 91 | 8 92 | 3 93 | 17 94 | 18 95 | 15 96 | 4 97 | 2 98 | 9 99 | 0 100 | 11 101 | 5 102 | 14 103 | 7 104 | 12 105 | 19 106 | 16 107 | 1 108 | 10 109 | 16 110 | 19 111 | 6 112 | 13 113 | 3 114 | 8 115 | 1 116 | 10 117 | 4 118 | 15 119 | 6 120 | 13 121 | 18 122 | 17 123 | 0 124 | 11 125 | 17 126 | 18 127 | 7 128 | 12 129 | 4 130 | 15 131 | 6 132 | 13 133 | 3 134 | 8 135 | 1 136 | 10 137 | 5 138 | 14 139 | 7 140 | 12 141 | 2 142 | 9 143 | 0 144 | 11 145 | 5 146 | 14 147 | 7 148 | 12 149 | 2 150 | 9 151 | 0 152 | 11 153 | 16 154 | 19 155 | 6 156 | 13 157 | 19 158 | 16 159 | 1 160 | 10 161 | 12 162 | 7 163 | 14 164 | 5 165 | 11 166 | 0 167 | 9 168 | 2 169 | 17 170 | 18 171 | 15 172 | 4 173 | 18 174 | 17 175 | 8 176 | 3 177 | 13 178 | 6 179 | 15 180 | 4 181 | 10 182 | 1 183 | 8 184 | 3 185 | 16 186 | 19 187 | 14 188 | 5 189 | 19 190 | 16 191 | 9 192 | 2 193 | 14 194 | 5 195 | 12 196 | 7 197 | 9 198 | 2 199 | 11 200 | 0 201 | 15 202 | 4 203 | 13 204 | 6 205 | 8 206 | 3 207 | 10 208 | 1 209 | 15 210 | 4 211 | 13 212 | 6 213 | 8 214 | 3 215 | 10 216 | 1 217 | 14 218 | 5 219 | 16 220 | 19 221 | 9 222 | 2 223 | 19 224 | 16 225 | 6 226 | 13 227 | 4 228 | 15 229 | 1 230 | 10 231 | 3 232 | 8 233 | 7 234 | 12 235 | 17 236 | 18 237 | 0 238 | 11 239 | 18 240 | 17 241 | 7 242 | 12 243 | 5 244 | 14 245 | 0 246 | 11 247 | 2 248 | 9 249 | 6 250 | 13 251 | 16 252 | 19 253 | 1 254 | 10 255 | 19 256 | 16 257 | -------------------------------------------------------------------------------- /read_CHA_counter.c: -------------------------------------------------------------------------------- 1 | 2 | // read_CHA_counter() encapsulates the MSR addressing patterns for various Intel processors and 3 | // reads the specified performance counter number in the specified CHA of the specified socket. 4 | 5 | uint64_t read_CHA_counter(uint32_t CurrentCPUIDSignature, int socket, int cha_number, int counter, int *msr_fd) 6 | { 7 | uint64_t msr_val, msr_num, msr_base; 8 | uint64_t msr_stride; 9 | 10 | msr_val = 0; 11 | 12 | switch(CurrentCPUIDSignature) { 13 | case CPUID_SIGNATURE_HASWELL: 14 | // ------------ Haswell EP -- Xeon E5-2xxx v3 -------------- 15 | // printf("CPUID Signature 0x%x identified as Haswell EP\n",CurrentCPUIDSignature); 16 | break; 17 | // ------------ Skylake Xeon and Cascade Lake Xeon -- 1st and 2nd generation Xeon Scalable Processors ------------ 18 | case CPUID_SIGNATURE_SKX: 19 | // printf("CPUID Signature 0x%x identified as Skylake Xeon/Cascade Lake Xeon\n",CurrentCPUIDSignature); 20 | msr_base = 0xe00; 21 | msr_stride = 0x10; // specific to SKX/CLX 22 | msr_num = msr_base + msr_stride*cha_number + counter + 8; // compute MSR number for count register for counter 23 | // printf("DEBUG: socket %d cha_number %d counter %d msr_num 0x%lx msr_val 0x%lx\n",socket,cha_number,counter,msr_num,msr_val); 24 | pread(msr_fd[socket],&msr_val,sizeof(msr_val),msr_num); 25 | return (msr_val); 26 | break; 27 | // ------------- Ice Lake Xeon -- 3rd generation Xeon Scalable Processors ------------ 28 | case CPUID_SIGNATURE_ICX: 29 | // printf("CPUID Signature 0x%x identified as Ice Lake Xeon\n",CurrentCPUIDSignature); 30 | msr_stride = 0x0e; // ICX-specific 31 | 32 | if (cha_number >= 34) { 33 | msr_base = 0x0e00 - 0x47c; // ICX MSRs skip backwards for CHAs 34-39 34 | } else if (cha_number >= 18) { 35 | msr_base = 0x0e00 + 0x0e; // ICX MSRs skip forward for CHAs 18-33 36 | } else { 37 | msr_base = 0x0e00; // MSRs for the first 18 CHAs 38 | } 39 | msr_num = msr_base + msr_stride*cha_number + counter + 8; // compute MSR number for count register for counter 40 | pread(msr_fd[socket],&msr_val,sizeof(msr_val),msr_num); 41 | return(msr_val); 42 | break; 43 | // ------------------ Sapphire Rapids -- 4th generation Xeon Scalable Processors and Xeon CPU Max Processors ------------ 44 | case CPUID_SIGNATURE_SPR: 45 | // printf("CPUID Signature 0x%x identified as Sapphire Rapids Xeon\n",CurrentCPUIDSignature); 46 | msr_base = 0x2000; 47 | msr_stride = 0x10; 48 | msr_num = msr_base + msr_stride*cha_number + 0x8 + counter; 49 | pread(msr_fd[socket],&msr_val,sizeof(msr_val),msr_num); 50 | return(msr_val); 51 | break; 52 | default: 53 | fprintf(stderr,"CHA counters not yet supported for CPUID Signature 0x%x\n",CurrentCPUIDSignature); 54 | exit(1); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /Results/README.md: -------------------------------------------------------------------------------- 1 | # Summary address hash information from a variety of Intel processors. 2 | - John D. McCalpin 3 | - mccalpin@tacc.utexas.edu 4 | - Created 2021-09-09, Revised to 2023-07-14 5 | 6 | ## Context 7 | Nothing in this directory will make sense unless you are familiar with the material presented in the technical report [Mapping Addresses to L3/CHA Slices in Intel Processors](https://dx.doi.org/10.26153/tsw/14539) (more info in "Reference and Citation" section at the bottom of this page). 8 | 9 | ## File format information for Intel_Address_Hash/Results/ files 10 | 11 | ### Base Sequence files 12 | Each file named "BaseSequence_\_\-slice.tbl" contains the "base sequence" of 13 | L3/CHA numbers (as reported by the hardware performance counter in the processor 14 | "uncore") for one of the processor configurations tested. The "base sequence" 15 | in each case is the set of L3/CHA numbers for cache line addresses starting at 16 | physical address zero in the processor's memory space. 17 | 18 | - "\" values are 19 | -- "KNL" Xeon Phi x200 Processor "Knights Landing" 20 | -- "SKX" Xeon Scalable Processor (1st or 2nd gen) "Skylake Xeon" or "Cascade Lake Xeon" 21 | -- "ICX" Xeon Scalable Processor (3rd gen) "Ice Lake Xeon" 22 | -- "SPR" Xeon Scalable Processor (4th gen) "Sapphire Rapids Xeon" or Xeon CPU Max Processor ("Sapphire Rapids with HBM). 23 | 24 | - "\" values are the number of L3/CHA "slices" for the processor (or the number of CHA slices for the KNL processor, which has no L3) 25 | The number of L3/CHA slices is always at least as large as the number of cores. 26 | 27 | Each of the "BaseSequence" files is a text file containing one decimal number per line 28 | and one line for each element of the base sequence. The length of the base sequence 29 | varies from 16 to 16384 lines for these processors. 30 | 31 | **FileNames and Lengths** 32 | - BaseSequence_ICX_28-slice.tbl 16384 33 | - BaseSequence_ICX_40-slice.tbl 512 34 | - BaseSequence_KNL_38-slice.tbl 4096 35 | - BaseSequence_SKX_14-slice.tbl 16384 36 | - BaseSequence_SKX_16-slice.tbl 16 37 | - BaseSequence_SKX_18-slice.tbl 4096 38 | - BaseSequence_SKX_20-slice.tbl 256 39 | - BaseSequence_SKX_22-slice.tbl 16384 40 | - BaseSequence_SKX_24-slice.tbl 512 41 | - BaseSequence_SKX_26-slice.tbl 16384 42 | - BaseSequence_SKX_28-slice.tbl 4096 43 | - BaseSequence_SPR_56-slice.tbl 16384 44 | - BaseSequence_SPR_60-slice.tbl 16384 45 | 46 | ### Permutation Select Mask files 47 | Each file named "PermSelectMasks_\_\-slice.txt" contains two lines related to the "permutation selector masks". 48 | - The first line contains two decimal numbers, the lowest address bit to which the permutation selector masks apply, and the highest address bit to which the permutation selector masks apply. 49 | - The lowest address bit is the first physical address bit above the top of the base sequence and should be 6+log2(BaseSequenceLength). 50 | - The highest address bit indicates the address range over which the mask setprovides correct answers. E.g., the value "37" indicates that the masks are valid for all physical addresses for which the highest bit set is 37 or lower -- addresses below 2^38 = 256 GiB. 51 | - The second line contains 14 hexadecimal numbers. 52 | - These are the permutation selector masks for permutation bit 0 (on the left) to permutation bit 13 (on the right). 53 | - All of the files contain 14 mask values, even if fewer are used. (A permutation selector mask of zero corresponds to the identity permutation, so results are identical if permutation selector masks of 0x0 are used or ignored.) 54 | 55 | ## Reference and Citation: 56 | 57 | John D. McCalpin, "Mapping Addresses to L3/CHA Slices in Intel Processors", Texas Advanced 58 | Computing Center, University of Texas at Austin, Austin, TX, USA, ACELab TR-2021-03, 59 | September 10, 2021, doi: https://dx.doi.org/10.26153/tsw/14539 60 | 61 | (Note that this doi contains links to both the technical report describing the methodology and results and to the data files 62 | that were completed as of the original publication date of 2021-09-10.) 63 | -------------------------------------------------------------------------------- /Results/BaseSequence_SKX_24-slice.tbl: -------------------------------------------------------------------------------- 1 | 0 2 | 3 3 | 10 4 | 9 5 | 7 6 | 20 7 | 13 8 | 22 9 | 5 10 | 6 11 | 15 12 | 12 13 | 2 14 | 17 15 | 8 16 | 19 17 | 1 18 | 2 19 | 11 20 | 8 21 | 6 22 | 21 23 | 12 24 | 23 25 | 4 26 | 23 27 | 14 28 | 21 29 | 3 30 | 16 31 | 9 32 | 18 33 | 0 34 | 3 35 | 10 36 | 9 37 | 23 38 | 4 39 | 21 40 | 14 41 | 21 42 | 6 43 | 23 44 | 12 45 | 18 46 | 1 47 | 16 48 | 11 49 | 1 50 | 2 51 | 11 52 | 8 53 | 22 54 | 5 55 | 20 56 | 15 57 | 20 58 | 7 59 | 22 60 | 13 61 | 19 62 | 0 63 | 17 64 | 10 65 | 14 66 | 13 67 | 4 68 | 7 69 | 17 70 | 10 71 | 19 72 | 0 73 | 11 74 | 8 75 | 1 76 | 2 77 | 20 78 | 15 79 | 22 80 | 5 81 | 15 82 | 12 83 | 5 84 | 6 85 | 16 86 | 11 87 | 18 88 | 1 89 | 18 90 | 9 91 | 16 92 | 3 93 | 21 94 | 14 95 | 23 96 | 4 97 | 14 98 | 13 99 | 4 100 | 7 101 | 9 102 | 18 103 | 3 104 | 16 105 | 11 106 | 16 107 | 1 108 | 18 109 | 12 110 | 23 111 | 6 112 | 21 113 | 15 114 | 12 115 | 5 116 | 6 117 | 8 118 | 19 119 | 2 120 | 17 121 | 10 122 | 17 123 | 0 124 | 19 125 | 13 126 | 22 127 | 7 128 | 20 129 | 8 130 | 19 131 | 2 132 | 17 133 | 15 134 | 12 135 | 5 136 | 6 137 | 13 138 | 22 139 | 7 140 | 20 141 | 10 142 | 9 143 | 0 144 | 3 145 | 9 146 | 18 147 | 3 148 | 16 149 | 14 150 | 13 151 | 4 152 | 7 153 | 12 154 | 23 155 | 6 156 | 21 157 | 11 158 | 16 159 | 1 160 | 18 161 | 16 162 | 11 163 | 18 164 | 1 165 | 15 166 | 12 167 | 5 168 | 6 169 | 21 170 | 14 171 | 23 172 | 4 173 | 18 174 | 9 175 | 16 176 | 3 177 | 17 178 | 10 179 | 19 180 | 0 181 | 14 182 | 13 183 | 4 184 | 7 185 | 20 186 | 15 187 | 22 188 | 5 189 | 19 190 | 8 191 | 17 192 | 2 193 | 22 194 | 5 195 | 20 196 | 15 197 | 1 198 | 2 199 | 11 200 | 8 201 | 19 202 | 0 203 | 17 204 | 10 205 | 4 206 | 7 207 | 14 208 | 13 209 | 23 210 | 4 211 | 21 212 | 14 213 | 0 214 | 3 215 | 10 216 | 9 217 | 18 218 | 1 219 | 16 220 | 11 221 | 21 222 | 6 223 | 23 224 | 12 225 | 6 226 | 21 227 | 12 228 | 23 229 | 1 230 | 2 231 | 11 232 | 8 233 | 3 234 | 16 235 | 9 236 | 18 237 | 4 238 | 23 239 | 14 240 | 21 241 | 7 242 | 20 243 | 13 244 | 22 245 | 0 246 | 3 247 | 10 248 | 9 249 | 2 250 | 17 251 | 8 252 | 19 253 | 5 254 | 22 255 | 15 256 | 20 257 | 1 258 | 18 259 | 11 260 | 16 261 | 6 262 | 5 263 | 12 264 | 15 265 | 4 266 | 23 267 | 14 268 | 21 269 | 3 270 | 0 271 | 9 272 | 10 273 | 0 274 | 19 275 | 10 276 | 17 277 | 7 278 | 20 279 | 13 280 | 22 281 | 5 282 | 22 283 | 15 284 | 20 285 | 2 286 | 1 287 | 8 288 | 11 289 | 17 290 | 2 291 | 19 292 | 8 293 | 22 294 | 5 295 | 20 296 | 15 297 | 20 298 | 7 299 | 22 300 | 13 301 | 3 302 | 0 303 | 9 304 | 10 305 | 16 306 | 3 307 | 18 308 | 9 309 | 23 310 | 4 311 | 21 312 | 14 313 | 21 314 | 6 315 | 23 316 | 12 317 | 2 318 | 1 319 | 8 320 | 11 321 | 23 322 | 12 323 | 21 324 | 6 325 | 8 326 | 11 327 | 2 328 | 1 329 | 18 330 | 9 331 | 16 332 | 3 333 | 13 334 | 14 335 | 7 336 | 4 337 | 22 338 | 13 339 | 20 340 | 7 341 | 17 342 | 10 343 | 19 344 | 0 345 | 19 346 | 8 347 | 17 348 | 2 349 | 12 350 | 15 351 | 6 352 | 5 353 | 15 354 | 20 355 | 5 356 | 22 357 | 8 358 | 19 359 | 2 360 | 17 361 | 10 362 | 17 363 | 0 364 | 19 365 | 13 366 | 14 367 | 7 368 | 4 369 | 14 370 | 21 371 | 4 372 | 23 373 | 9 374 | 18 375 | 3 376 | 16 377 | 11 378 | 16 379 | 1 380 | 18 381 | 12 382 | 15 383 | 6 384 | 5 385 | 9 386 | 10 387 | 3 388 | 0 389 | 14 390 | 21 391 | 4 392 | 23 393 | 12 394 | 15 395 | 6 396 | 5 397 | 11 398 | 16 399 | 1 400 | 18 401 | 8 402 | 19 403 | 2 404 | 17 405 | 15 406 | 20 407 | 5 408 | 22 409 | 13 410 | 14 411 | 7 412 | 4 413 | 10 414 | 17 415 | 0 416 | 19 417 | 17 418 | 10 419 | 19 420 | 0 421 | 22 422 | 13 423 | 20 424 | 7 425 | 12 426 | 15 427 | 6 428 | 5 429 | 19 430 | 8 431 | 17 432 | 2 433 | 16 434 | 11 435 | 18 436 | 1 437 | 23 438 | 12 439 | 21 440 | 6 441 | 13 442 | 14 443 | 7 444 | 4 445 | 18 446 | 9 447 | 16 448 | 3 449 | 7 450 | 4 451 | 13 452 | 14 453 | 16 454 | 3 455 | 18 456 | 9 457 | 2 458 | 1 459 | 8 460 | 11 461 | 21 462 | 6 463 | 23 464 | 12 465 | 22 466 | 5 467 | 20 468 | 15 469 | 17 470 | 2 471 | 19 472 | 8 473 | 3 474 | 0 475 | 9 476 | 10 477 | 20 478 | 7 479 | 22 480 | 13 481 | 7 482 | 20 483 | 13 484 | 22 485 | 0 486 | 19 487 | 10 488 | 17 489 | 2 490 | 1 491 | 8 492 | 11 493 | 5 494 | 22 495 | 15 496 | 20 497 | 6 498 | 21 499 | 12 500 | 23 501 | 1 502 | 18 503 | 11 504 | 16 505 | 3 506 | 0 507 | 9 508 | 10 509 | 4 510 | 23 511 | 14 512 | 21 513 | -------------------------------------------------------------------------------- /Results/BaseSequence_ICX_40-slice.tbl: -------------------------------------------------------------------------------- 1 | 0 2 | 11 3 | 2 4 | 9 5 | 27 6 | 32 7 | 25 8 | 34 9 | 1 10 | 10 11 | 3 12 | 8 13 | 26 14 | 33 15 | 24 16 | 35 17 | 1 18 | 10 19 | 3 20 | 8 21 | 26 22 | 33 23 | 24 24 | 35 25 | 0 26 | 11 27 | 18 28 | 17 29 | 27 30 | 32 31 | 37 32 | 38 33 | 28 34 | 23 35 | 30 36 | 21 37 | 15 38 | 4 39 | 13 40 | 6 41 | 29 42 | 22 43 | 39 44 | 36 45 | 14 46 | 5 47 | 16 48 | 19 49 | 29 50 | 22 51 | 31 52 | 20 53 | 14 54 | 5 55 | 12 56 | 7 57 | 28 58 | 23 59 | 38 60 | 37 61 | 15 62 | 4 63 | 17 64 | 18 65 | 10 66 | 1 67 | 8 68 | 3 69 | 33 70 | 26 71 | 35 72 | 24 73 | 11 74 | 0 75 | 9 76 | 2 77 | 32 78 | 27 79 | 34 80 | 25 81 | 11 82 | 0 83 | 9 84 | 2 85 | 32 86 | 27 87 | 34 88 | 25 89 | 18 90 | 17 91 | 8 92 | 3 93 | 37 94 | 38 95 | 35 96 | 24 97 | 22 98 | 29 99 | 20 100 | 31 101 | 5 102 | 14 103 | 7 104 | 12 105 | 39 106 | 36 107 | 21 108 | 30 109 | 16 110 | 19 111 | 6 112 | 13 113 | 23 114 | 28 115 | 21 116 | 30 117 | 4 118 | 15 119 | 6 120 | 13 121 | 38 122 | 37 123 | 20 124 | 31 125 | 17 126 | 18 127 | 7 128 | 12 129 | 4 130 | 15 131 | 6 132 | 13 133 | 23 134 | 28 135 | 21 136 | 30 137 | 5 138 | 14 139 | 7 140 | 12 141 | 22 142 | 29 143 | 20 144 | 31 145 | 5 146 | 14 147 | 7 148 | 12 149 | 22 150 | 29 151 | 20 152 | 31 153 | 16 154 | 19 155 | 6 156 | 13 157 | 39 158 | 36 159 | 21 160 | 30 161 | 32 162 | 27 163 | 34 164 | 25 165 | 11 166 | 0 167 | 9 168 | 2 169 | 37 170 | 38 171 | 35 172 | 24 173 | 18 174 | 17 175 | 8 176 | 3 177 | 33 178 | 26 179 | 35 180 | 24 181 | 10 182 | 1 183 | 8 184 | 3 185 | 36 186 | 39 187 | 34 188 | 25 189 | 19 190 | 16 191 | 9 192 | 2 193 | 14 194 | 5 195 | 12 196 | 7 197 | 29 198 | 22 199 | 31 200 | 20 201 | 15 202 | 4 203 | 13 204 | 6 205 | 28 206 | 23 207 | 30 208 | 21 209 | 15 210 | 4 211 | 13 212 | 6 213 | 28 214 | 23 215 | 30 216 | 21 217 | 14 218 | 5 219 | 16 220 | 19 221 | 29 222 | 22 223 | 39 224 | 36 225 | 26 226 | 33 227 | 24 228 | 35 229 | 1 230 | 10 231 | 3 232 | 8 233 | 27 234 | 32 235 | 37 236 | 38 237 | 0 238 | 11 239 | 18 240 | 17 241 | 27 242 | 32 243 | 25 244 | 34 245 | 0 246 | 11 247 | 2 248 | 9 249 | 26 250 | 33 251 | 36 252 | 39 253 | 1 254 | 10 255 | 19 256 | 16 257 | 5 258 | 14 259 | 7 260 | 12 261 | 22 262 | 29 263 | 20 264 | 31 265 | 4 266 | 15 267 | 6 268 | 13 269 | 23 270 | 28 271 | 21 272 | 30 273 | 4 274 | 15 275 | 18 276 | 17 277 | 23 278 | 28 279 | 37 280 | 38 281 | 5 282 | 14 283 | 7 284 | 12 285 | 22 286 | 29 287 | 20 288 | 31 289 | 33 290 | 26 291 | 39 292 | 36 293 | 10 294 | 1 295 | 16 296 | 19 297 | 32 298 | 27 299 | 34 300 | 25 301 | 11 302 | 0 303 | 9 304 | 2 305 | 32 306 | 27 307 | 38 308 | 37 309 | 11 310 | 0 311 | 17 312 | 18 313 | 33 314 | 26 315 | 35 316 | 24 317 | 10 318 | 1 319 | 8 320 | 3 321 | 15 322 | 4 323 | 13 324 | 6 325 | 28 326 | 23 327 | 30 328 | 21 329 | 14 330 | 5 331 | 12 332 | 7 333 | 29 334 | 22 335 | 31 336 | 20 337 | 18 338 | 17 339 | 12 340 | 7 341 | 37 342 | 38 343 | 31 344 | 20 345 | 15 346 | 4 347 | 13 348 | 6 349 | 28 350 | 23 351 | 30 352 | 21 353 | 39 354 | 36 355 | 25 356 | 34 357 | 16 358 | 19 359 | 2 360 | 9 361 | 26 362 | 33 363 | 24 364 | 35 365 | 1 366 | 10 367 | 3 368 | 8 369 | 38 370 | 37 371 | 24 372 | 35 373 | 17 374 | 18 375 | 3 376 | 8 377 | 27 378 | 32 379 | 25 380 | 34 381 | 0 382 | 11 383 | 2 384 | 9 385 | 1 386 | 10 387 | 3 388 | 8 389 | 26 390 | 33 391 | 24 392 | 35 393 | 0 394 | 11 395 | 2 396 | 9 397 | 27 398 | 32 399 | 25 400 | 34 401 | 16 402 | 19 403 | 2 404 | 9 405 | 39 406 | 36 407 | 25 408 | 34 409 | 1 410 | 10 411 | 3 412 | 8 413 | 26 414 | 33 415 | 24 416 | 35 417 | 37 418 | 38 419 | 31 420 | 20 421 | 18 422 | 17 423 | 12 424 | 7 425 | 28 426 | 23 427 | 30 428 | 21 429 | 15 430 | 4 431 | 13 432 | 6 433 | 36 434 | 39 435 | 30 436 | 21 437 | 19 438 | 16 439 | 13 440 | 6 441 | 29 442 | 22 443 | 31 444 | 20 445 | 14 446 | 5 447 | 12 448 | 7 449 | 11 450 | 0 451 | 9 452 | 2 453 | 32 454 | 27 455 | 34 456 | 25 457 | 10 458 | 1 459 | 8 460 | 3 461 | 33 462 | 26 463 | 35 464 | 24 465 | 10 466 | 1 467 | 16 468 | 19 469 | 33 470 | 26 471 | 39 472 | 36 473 | 11 474 | 0 475 | 9 476 | 2 477 | 32 478 | 27 479 | 34 480 | 25 481 | 23 482 | 28 483 | 37 484 | 38 485 | 4 486 | 15 487 | 18 488 | 17 489 | 22 490 | 29 491 | 20 492 | 31 493 | 5 494 | 14 495 | 7 496 | 12 497 | 22 498 | 29 499 | 36 500 | 39 501 | 5 502 | 14 503 | 19 504 | 16 505 | 23 506 | 28 507 | 21 508 | 30 509 | 4 510 | 15 511 | 6 512 | 13 513 | -------------------------------------------------------------------------------- /va2pa_lib.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | 3 | #define _XOPEN_SOURCE 500 // required by pread 4 | #include // required by printf 5 | #include // required by pread, close, getpid 6 | #include // required by open, getpid 7 | #include // required by open 8 | #include // required by open 9 | 10 | // declarations that calling routines will need 11 | void print_pagemap_entry(unsigned long long pagemap_entry); 12 | unsigned long long get_pagemap_entry( void * va ); 13 | 14 | // ----------------------------------------------------------------------------------------- 15 | // Function to take any pointer and look up the entry in /proc/$pid/pagemap 16 | // No error handling -- caller should check errno on a 0 return value. 17 | // Does not attempt to interpret bits -- returns them all in an unsigned long long 18 | // Returns 0 if the page is not currently mapped, or if an error occurs. 19 | // Note that the page shift bits are wrong in 2.6.32 kernels (and 2.6.34 on MIC). 20 | // I have not been able to figger out if these bits are correct in any Linux version. 21 | // 22 | // John D. McCalpin, mccalpin@tacc.utexas.edu 23 | // Revised to 2013-04-18 24 | 25 | unsigned long long get_pagemap_entry( void * va ) 26 | { 27 | ssize_t ret; 28 | off_t myoffset; 29 | 30 | pid_t mypid; 31 | char filename[32]; // needs 15 characters for the "/proc/" and "/pagemap", plus enough for the PID 32 | unsigned long long result; 33 | static int pagemap_fd; 34 | static int initialized=0; 35 | 36 | // on first call: get process pid, open /proc/$pid/pagemap, and save the file descriptor for subsequent calls 37 | if (initialized == 0) { 38 | mypid = getpid(); 39 | sprintf(filename,"/proc/%d/pagemap",mypid); 40 | pagemap_fd = open(filename, O_RDONLY); 41 | if (pagemap_fd == 0) { 42 | return(0UL); // user must check errno if a zero value is returned 43 | } 44 | initialized = 1; 45 | } 46 | 47 | myoffset = ((long long) va >> 12) << 3; // required to cast void pointer before using it 48 | 49 | ret = pread(pagemap_fd, &result, 8, myoffset); 50 | if (ret != 8) { 51 | return (0UL); // user must check errno if a zero value is returned 52 | } 53 | return(result); 54 | } 55 | // ----------------------------------------------------------------------------------------- 56 | 57 | 58 | // ----------------------------------------------------------------------------------------- 59 | // McCalpin's function to print the PFN (Page Frame Number) from entries returned by get_pagemap_entry() 60 | // Warnings are printed if 61 | // a. the page is not present (bit 63 != 1), or 62 | // b. the page is swapped (bit 62 == 1) 63 | // Note that the page shift bits are wrong in 2.6.25 through 2.6.32 kernels, fixed in 2.6.33 64 | // Therefore this only prints the page frame number (bits 0..55) 65 | // The PFN value will not make sense if the page is swapped (bit 62 set) 66 | // The value should be all 0's if the page is unmapped, or if get_pagemap_entry() returned an error. 67 | // 68 | // John D. McCalpin, mccalpin@tacc.utexas.edu 69 | // Revised to 2013-04-18 70 | 71 | #define BIT_IS_SET(x, n) (((x) & (1UL<<(n)))?1:0) // more convenient 0/1 result 72 | 73 | void print_pagemap_entry(unsigned long long pagemap_entry) 74 | { 75 | int logpagesize; 76 | int pagesize; 77 | unsigned long framenumber; 78 | unsigned long tmp; 79 | 80 | tmp = BIT_IS_SET(pagemap_entry, 63); // could also use ( (pagemap_entry >> 63) != 1 ) as the test 81 | if (tmp == 0) { 82 | printf("WARNING in print_pagemap_entry: page is not present. Result = %.16lx\n",pagemap_entry); 83 | } 84 | tmp = BIT_IS_SET(pagemap_entry, 62); 85 | if (tmp != 0) { 86 | printf("WARNING in print_pagemap_entry: page is swapped. Result = %.16lx\n",pagemap_entry); 87 | } 88 | 89 | framenumber = ( (pagemap_entry<<9) >> 9); // clear upper 9 bits -- only works for unsigned types 90 | 91 | #ifdef OLDKERNEL 92 | // Page size bits are broken in 2.6.32 kernels (Stampede) and 2.6.34 kernels (MIC) 93 | printf("print_pagemap_entry: argument = 0x%.16lx, framenumber = 0x%.16lx\n",pagemap_entry,framenumber); 94 | #else 95 | // Dunno if this is fixed in newer kernels -- clearly broken in 2.6.32 and 2.6.34 (MIC) 96 | logpagesize = ( (pagemap_entry<<3) >> 58); // clear bits 61-63, then shift (original) bit 55 down to 0; 97 | pagesize = 1 << logpagesize; 98 | printf("print_pagemap_entry: logpagesize = %d, pagesize = %d, framenumber = 0x%.16lx\n",logpagesize,pagesize,framenumber); 99 | #endif 100 | } 101 | 102 | -------------------------------------------------------------------------------- /program_CHA_counters.c: -------------------------------------------------------------------------------- 1 | 2 | // program_CHA_counters() encapsulates the MSR addressing patterns for various Intel 3 | // processors and programs the counters in each CHA box with the provided PerfEvtSel values. 4 | 5 | int program_CHA_counters(uint32_t CurrentCPUIDSignature, int num_chas, uint64_t *cha_perfevtsel, int num_counters, int *msr_fd, int num_sockets) 6 | { 7 | int pkg,tile,counter; 8 | uint64_t msr_val, msr_num, msr_base; 9 | uint64_t msr_stride; 10 | 11 | switch(CurrentCPUIDSignature) { 12 | case CPUID_SIGNATURE_HASWELL: 13 | // ------------ Haswell EP -- Xeon E5-2xxx v3 -------------- 14 | printf("CPUID Signature 0x%x identified as Haswell EP\n",CurrentCPUIDSignature); 15 | break; 16 | // ------------ Skylake Xeon and Cascade Lake Xeon -- 1st and 2nd generation Xeon Scalable Processors ------------ 17 | case CPUID_SIGNATURE_SKX: 18 | printf("CPUID Signature 0x%x identified as Skylake Xeon/Cascade Lake Xeon\n",CurrentCPUIDSignature); 19 | msr_base = 0xe00; 20 | msr_stride = 0x10; // specific to SKX/CLX 21 | for (pkg=0; pkg= 34) { 51 | msr_base = 0x0e00 - 0x47c; // ICX MSRs skip backwards for CHAs 34-39 52 | } else if (tile >= 18) { 53 | msr_base = 0x0e00 + 0x0e; // ICX MSRs skip forward for CHAs 18-33 54 | } else { 55 | msr_base = 0x0e00; // MSRs for the first 18 CHAs 56 | } 57 | 58 | // unit control register -- optional write bit 1 (value 0x2) to clear counters 59 | msr_num = msr_base + msr_stride*tile; 60 | msr_val = 0x2; 61 | pwrite(msr_fd[pkg],&msr_val,sizeof(msr_val),msr_num); 62 | 63 | // program the control registers for counters 0..num_counters-1 64 | for (counter=0; counter>12; 57 | *core = c & 0xFFFUL; 58 | 59 | return (a | (d << 32)); 60 | } 61 | 62 | 63 | extern inline __attribute__((always_inline)) int get_core_number() 64 | { 65 | unsigned long a, d, c; 66 | 67 | __asm__ volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c)); 68 | 69 | return ( c & 0xFFFUL ); 70 | } 71 | 72 | extern inline __attribute__((always_inline)) int get_socket_number() 73 | { 74 | unsigned long a, d, c; 75 | 76 | __asm__ volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c)); 77 | 78 | return ( (c & 0xF000UL)>>12 ); 79 | } 80 | 81 | 82 | extern inline __attribute__((always_inline)) unsigned long rdpmc_instructions() 83 | { 84 | unsigned long a, d, c; 85 | 86 | c = (1UL<<30); 87 | __asm__ volatile("rdpmc" : "=a" (a), "=d" (d) : "c" (c)); 88 | 89 | return (a | (d << 32)); 90 | } 91 | 92 | extern inline __attribute__((always_inline)) unsigned long rdpmc_actual_cycles() 93 | { 94 | unsigned long a, d, c; 95 | 96 | c = (1UL<<30)+1; 97 | __asm__ volatile("rdpmc" : "=a" (a), "=d" (d) : "c" (c)); 98 | 99 | return (a | (d << 32)); 100 | } 101 | 102 | extern inline __attribute__((always_inline)) unsigned long rdpmc_reference_cycles() 103 | { 104 | unsigned long a, d, c; 105 | 106 | c = (1UL<<30)+2; 107 | __asm__ volatile("rdpmc" : "=a" (a), "=d" (d) : "c" (c)); 108 | 109 | return (a | (d << 32)); 110 | } 111 | 112 | extern inline __attribute__((always_inline)) unsigned long rdpmc(int c) 113 | { 114 | unsigned long a, d; 115 | 116 | __asm__ volatile("rdpmc" : "=a" (a), "=d" (d) : "c" (c)); 117 | 118 | return (a | (d << 32)); 119 | } 120 | 121 | // number of core performance counters per logical processor 122 | // varies by model and mode of operation (HT often splits the 123 | // counters across threads). 124 | // The number of counters per logical processor is contained in 125 | // bits 15:8 of EAX after executing the CPUID instruction 126 | // with an initial EAX value of 0x0a (optional input in ECX is not used). 127 | int get_num_core_counters() 128 | { 129 | unsigned int eax, ebx, ecx, edx; 130 | unsigned int leaf, subleaf; 131 | int width; 132 | 133 | leaf = 0x0000000a; 134 | subleaf = 0x0; 135 | __asm__ __volatile__ ("cpuid" : \ 136 | "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (leaf), "c" (subleaf)); 137 | 138 | return((eax & 0x0000ff00) >> 8); 139 | } 140 | 141 | // core performance counter width varies by processor 142 | // the width is contained in bits 23:16 of the EAX register 143 | // after executing the CPUID instruction with an initial EAX 144 | // argument of 0x0a (subleaf 0x0 in ECX). 145 | int get_core_counter_width() 146 | { 147 | unsigned int eax, ebx, ecx, edx; 148 | unsigned int leaf, subleaf; 149 | int width; 150 | 151 | leaf = 0x0000000a; 152 | subleaf = 0x0; 153 | __asm__ __volatile__ ("cpuid" : \ 154 | "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (leaf), "c" (subleaf)); 155 | 156 | return((eax & 0x00ff0000) >> 16); 157 | } 158 | 159 | // fixed-function performance counter width varies by processor 160 | // the width is contained in bits 12:5 of the EDX register 161 | // after executing the CPUID instruction with an initial EAX 162 | // argument of 0x0a (subleaf 0x0 in ECX). 163 | int get_fixed_counter_width() 164 | { 165 | unsigned int eax, ebx, ecx, edx; 166 | unsigned int leaf, subleaf; 167 | int width; 168 | 169 | leaf = 0x0000000a; 170 | subleaf = 0x0; 171 | __asm__ __volatile__ ("cpuid" : \ 172 | "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (leaf), "c" (subleaf)); 173 | 174 | return((edx & 0x00001fe0) >> 5); 175 | } 176 | 177 | // assume that these functions will automatically do the right thing if they are 178 | // included more than once.... 179 | #include 180 | #include 181 | 182 | // Utility routine to compute counter differences taking into account rollover 183 | // when the performance counter width is not known at compile time. 184 | // Use the "get_counter_width()" function to get the counter width on the 185 | // current system, then use that as the third argument to this function. 186 | // 64-bit counters don't generally roll over, but I added a special case 187 | // for this 188 | unsigned long corrected_pmc_delta(unsigned long end, unsigned long start, int pmc_width) 189 | { 190 | unsigned long error_return=0xffffffffffffffff; 191 | unsigned long result; 192 | // sanity checks 193 | if ((pmc_width <= 0) || (pmc_width > 64)) { 194 | fprintf(stderr,"ERROR: corrected_pmc_delta() called with illegal performance counter width %d\n",pmc_width); 195 | return(error_return); 196 | } 197 | // Due to the specifics of unsigned arithmetic, for pmc_width == sizeof(unsigned long), 198 | // the simple calculation (end-start) gives the correct delta even if the counter has 199 | // rolled (leaving end < start). 200 | if (pmc_width == 64) { 201 | return (end - start); 202 | } else { 203 | // for pmc_width < sizeof(unsigned long), rollover must be detected and corrected explicitly 204 | if (end >= start) { 205 | result = end - start; 206 | } else { 207 | // I think this works independent of ordering, but this makes the most intuitive sense 208 | result = (end + (1UL<0; base--){ 264 | if (buffer[base] == 0x7a) { 265 | // printf("Found z at location %d\n",base); 266 | if (buffer[base-1] == 0x48) { 267 | // printf("Found H at location %d\n",base-1); 268 | if (buffer[base-2] == 0x47) { 269 | // printf("Found G at location %d\n",base-2); 270 | // printf(" -- need to extract string now\n"); 271 | i = base-3; 272 | stop = base-3; 273 | // printf("begin reverse search at stop character location %d\n",i); 274 | while(buffer[i] != 0x20) { 275 | // printf("found a non-blank character %c (%x) at location %d\n",buffer[i],buffer[i],i); 276 | i--; 277 | } 278 | start = i+1; 279 | length = stop - start + 1; 280 | k = length+1; 281 | // for (j=stop; j // printf, etc 6 | #include // standard integer types, e.g., uint32_t 7 | #include // for signal handler 8 | #include // exit() and EXIT_FAILURE 9 | #include // strerror() function converts errno to a text string for printing 10 | #include // for open() 11 | #include // errno support 12 | #include // assert() function 13 | #include // sysconf() function, sleep() function 14 | #include // support for mmap() function 15 | #include // required for 1GiB page support in mmap() 16 | #include // for pow() function used in RAPL computations 17 | #include 18 | #include // for gettimeofday 19 | 20 | #define MYPAGESIZE 2097152L 21 | #define NUMPAGES 2048L // 40960L (80 GiB) for big production runs 22 | #define PAGES_MAPPED 16L // 128L or 256L for production runs 23 | 24 | 25 | // interfaces for va2pa_lib.c 26 | void print_pagemap_entry(unsigned long long pagemap_entry); 27 | unsigned long long get_pagemap_entry( void * va ); 28 | 29 | double *array; // array pointer to mmap on 1GiB pages 30 | double *page_pointers[NUMPAGES]; // one pointer for each page allocated 31 | uint64_t pageframenumber[NUMPAGES]; // one PFN entry for each page allocated 32 | 33 | // constant value defines for pre-allocated arrays 34 | # define NUM_SOCKETS 2 35 | # define NUM_CHA_BOXES 60 // largest number of CHAs per socket in current product line (2023-07-30) 36 | # define NUM_CHA_COUNTERS 4 37 | 38 | long cha_counts[NUM_SOCKETS][NUM_CHA_BOXES][NUM_CHA_COUNTERS][2]; // 2 sockets, 28 tiles per socket, 4 counters per tile, 2 times (before and after) 39 | uint64_t cha_perfevtsel[NUM_CHA_COUNTERS]; 40 | long cha_pkg_sums[NUM_SOCKETS][NUM_CHA_COUNTERS]; 41 | 42 | int8_t cha_by_page[NUMPAGES][32768]; // L3 numbers for each of the 32,768 cache lines in each of the first PAGES_MAPPED 2MiB pages 43 | uint64_t paddr_by_page[NUMPAGES]; // physical addresses of the base of each of the first PAGES_MAPPED 2MiB pages used 44 | long lines_by_cha[NUM_CHA_BOXES]; // bulk count of lines assigned to each CHA 45 | 46 | # ifndef MIN 47 | # define MIN(x,y) ((x)<(y)?(x):(y)) 48 | # endif 49 | # ifndef MAX 50 | # define MAX(x,y) ((x)>(y)?(x):(y)) 51 | # endif 52 | 53 | #include "MSR_defs.h" // includes MSR_Architectural.h and MSR_ArchPerfMon_v3.h -- very few of these defines are used here 54 | #include "low_overhead_timers.c" // probably need to link this to my official github version 55 | #include "cpuid_check_inline.c" // CPUID "signatures" (CPUID/leaf 0x01, return value in eax with stepping masked out) 56 | // #include "program_CHA_PMC_ICX.c" // off-loading code with details of CHA PMON MSR indexing 57 | #include "program_CHA_counters.c" // program all CHA counters -- contains model-specific code 58 | #include "read_CHA_counter.c" // read one CHA counter from one CHA in one socket -- contains model-specific code 59 | 60 | // =========================================================================================================================================================================== 61 | int main(int argc, char *argv[]) 62 | { 63 | // local declarations 64 | // int cpuid_return[4]; 65 | int i; 66 | int tag; 67 | int rc; 68 | ssize_t rc64; 69 | size_t len; 70 | unsigned long pagemapentry; 71 | unsigned long paddr, basephysaddr; 72 | uint32_t socket, counter; 73 | long count,delta; 74 | long j,k,page_number,page_base_index,line_number; 75 | uint32_t low_0, high_0, low_1, high_1; 76 | char filename[100]; 77 | int pkg, tile; 78 | int nr_cpus; 79 | int CHA_per_socket; 80 | uint64_t msr_val, msr_num; 81 | int mem_fd; 82 | int msr_fd[2]; // one for each socket 83 | int proc_in_pkg[2]; // one Logical Processor number for each socket 84 | uid_t my_uid; 85 | gid_t my_gid; 86 | double sum; 87 | unsigned long tsc_start; 88 | 89 | uint32_t CurrentCPUIDSignature; // CPUID Signature for the current system -- save for later processor-dependent conditionals 90 | 91 | // =============================================================================================================================== 92 | // allocate working array on a huge pages -- either 1GiB or 2MiB 93 | len = NUMPAGES * MYPAGESIZE; // Bytes 94 | rc = posix_memalign((void **)&array, (size_t) 2097152, (size_t) len); 95 | if (rc != 0) { 96 | printf("ERROR: posix_memalign call failed with error code %d\n",rc); 97 | exit(3); 98 | } 99 | if (array == (void *)(-1)) { 100 | perror("ERROR: mmap of array a failed! "); 101 | exit(1); 102 | } 103 | // initialize working array 104 | for (j=0; j 100) { 347 | backoffs += 1; 348 | sleep(1); 349 | if ( backoffs > 10 ) { 350 | printf("ERROR: No good results for line %d after %d tries and %d backoffs\n",line_number,numtries,backoffs); 351 | exit(101); 352 | } 353 | } 354 | totaltries++; 355 | 356 | // 1. read L3 counters before starting test 357 | for (tile=0; tile= 34) { 365 | msr_base = 0x0e00 - 0x47c; // ICX MSRs skip backwards for CHAs 34-39 366 | } else if (tile >= 18) { // ICX MSRs skil forward for CHAs 18-33 367 | msr_base = 0x0e00 + 0x0e; 368 | } else { 369 | msr_base = 0x0e00; 370 | } 371 | msr_num = msr_base + msr_stride*tile + 0x8 + 1; // counter 1 is the LLC_LOOKUPS.READ event 372 | pread(msr_fd[socket_under_test],&msr_val,sizeof(msr_val),msr_num); 373 | cha_counts[socket_under_test][tile][1][0] = msr_val; // use the array I have already declared for cha counts 374 | // printf("DEBUG: page %ld line %ld msr_num 0x%x msr_val %ld cha_counter1 %lu\n", 375 | // page_number,line_number,msr_num,msr_val,cha_counts[0][tile][1][0]); 376 | } 377 | #endif 378 | 379 | // 2. Access the line NFLUSHES times 380 | sum = 0; 381 | for (i=0; i= 34) { 402 | msr_base = 0x0e00 - 0x47c; // ICX MSRs skip backwards for CHAs 34-39 403 | } else if (tile >= 18) { // ICX MSRs skil forward for CHAs 18-33 404 | msr_base = 0x0e00 + 0x0e; 405 | } else { 406 | msr_base = 0x0e00; 407 | } 408 | msr_num = msr_base + msr_stride*tile + 0x8 + 1; // counter 1 is the LLC_LOOKUPS.READ event 409 | pread(msr_fd[socket_under_test],&msr_val,sizeof(msr_val),msr_num); 410 | cha_counts[socket_under_test][tile][1][1] = msr_val; // use the array I have already declared for cha counts 411 | } 412 | #endif 413 | 414 | 415 | 416 | 417 | #ifdef VERBOSE 418 | for (tile=0; tile95%) 431 | // goodness2 = min/NFLUSHES (pass if <20%) 432 | // goodness3 = avg/NFLUSHES (pass if <40%) 433 | max_count = 0; 434 | min_count = 1<<30; 435 | sum_count = 0; 436 | for (tile=0; tile 0.95 ) pass1 = 1; 451 | if ( goodness2 < 0.20 ) pass2 = 1; 452 | if ( goodness3 < 0.40 ) pass3 = 1; 453 | good_new = pass1 * pass2 * pass3; 454 | #ifdef VERBOSE 455 | printf("GOODNESS: line_number %ld max_count %d min_count %d sum_count %d avg_count %f goodness1 %f goodness2 %f goodness3 %f pass123 %d %d %d\n", 456 | line_number, max_count, min_count, sum_count, avg_count, goodness1, goodness2, goodness3, pass1, pass2, pass3); 457 | if (good_new == 0) printf("DEBUG: one or more of the sanity checks failed for line=%ld: %d %d %d goodness values %f %f %f\n", 458 | line_number,pass1,pass2,pass3,goodness1,goodness2,goodness3); 459 | #endif // VERBOSE 460 | 461 | // test to see if more than one CHA reports > 0.95*NFLUSHES events 462 | found = 0; 463 | old_cha = -1; 464 | int min_counts = (NFLUSHES*19)/20; 465 | for (tile=0; tile= min_counts) { 468 | old_cha = cha_by_page[page_number][line_number]; 469 | cha_by_page[page_number][line_number] = tile; 470 | found++; 471 | #ifdef VERBOSE 472 | if (found > 1) { 473 | printf("WARNING: Multiple (%d) CHAs found using counter 1 for cache line %ld, index %ld: old_cha %d new_cha %d\n",found,line_number,page_base_index+line_number*8,old_cha,cha_by_page[page_number][line_number]); 474 | } 475 | #endif // VERBOSE 476 | } 477 | } 478 | if (found == 0) { 479 | good_old = 0; 480 | #ifdef VERBOSE 481 | printf("WARNING: no CHA entry has been found for line %ld!\n",line_number); 482 | printf("DEBUG dump for no CHA found\n"); 483 | for (tile=0; tile= PAGES_MAPPED) break; 533 | } 534 | } 535 | printf("INFO: %d new 2MiB pages have been mapped\n",new_pages_mapped); 536 | printf("DUMMY: globalsum %d\n",globalsum); 537 | printf("VERBOSE: L3 Mapping Complete in %ld tries for %d cache lines ratio %f\n",totaltries,32768*PAGES_MAPPED,(double)totaltries/(double)(32768*PAGES_MAPPED)); 538 | 539 | // Accumulate the number of lines mapped to each CHA slice in each of the new pages mapped 540 | for (i=0; i // printf, etc 6 | #include // standard integer types, e.g., uint32_t 7 | #include // for signal handler 8 | #include // exit() and EXIT_FAILURE 9 | #include // strerror() function converts errno to a text string for printing 10 | #include // for open() 11 | #include // errno support 12 | #include // assert() function 13 | #include // sysconf() function, sleep() function 14 | #include // support for mmap() function 15 | #include // required for 1GiB page support in mmap() 16 | #include // for pow() function used in RAPL computations 17 | #include 18 | #include // for gettimeofday 19 | 20 | #define MYPAGESIZE 2097152L 21 | #define NUMPAGES 2048L // 40960L (80 GiB) for big production runs 22 | #define PAGES_MAPPED 16L // 128L or 256L for production runs 23 | 24 | 25 | #define SPECIAL_VALUE (-1) 26 | 27 | // interfaces for va2pa_lib.c 28 | void print_pagemap_entry(unsigned long long pagemap_entry); 29 | unsigned long long get_pagemap_entry( void * va ); 30 | 31 | double *array; // array pointer to mmap on 1GiB pages 32 | double *page_pointers[NUMPAGES]; // one pointer for each page allocated 33 | uint64_t pageframenumber[NUMPAGES]; // one PFN entry for each page allocated 34 | 35 | // constant value defines 36 | # define NUM_SOCKETS 2 // 37 | # define NUM_CHA_BOXES 40 38 | # define NUM_CHA_USED 40 39 | # define NUM_CHA_COUNTERS 4 40 | 41 | long cha_counts[NUM_SOCKETS][NUM_CHA_BOXES][NUM_CHA_COUNTERS][2]; // 2 sockets, 28 tiles per socket, 4 counters per tile, 2 times (before and after) 42 | uint64_t cha_perfevtsel[NUM_CHA_COUNTERS]; 43 | long cha_pkg_sums[NUM_SOCKETS][NUM_CHA_COUNTERS]; 44 | 45 | int8_t cha_by_page[NUMPAGES][32768]; // L3 numbers for each of the 32,768 cache lines in each of the first PAGES_MAPPED 2MiB pages 46 | uint64_t paddr_by_page[NUMPAGES]; // physical addresses of the base of each of the first PAGES_MAPPED 2MiB pages used 47 | long lines_by_cha[NUM_CHA_USED]; // bulk count of lines assigned to each CHA 48 | 49 | # ifndef MIN 50 | # define MIN(x,y) ((x)<(y)?(x):(y)) 51 | # endif 52 | # ifndef MAX 53 | # define MAX(x,y) ((x)>(y)?(x):(y)) 54 | # endif 55 | 56 | #include "MSR_defs.h" // includes MSR_Architectural.h and MSR_ArchPerfMon_v3.h -- very few of these defines are used here 57 | #include "low_overhead_timers.c" // probably need to link this to my official github version 58 | #include "cpuid_check_inline.c" // CPUID "signatures" (CPUID/leaf 0x01, return value in eax with stepping masked out) 59 | 60 | // =========================================================================================================================================================================== 61 | int main(int argc, char *argv[]) 62 | { 63 | // local declarations 64 | // int cpuid_return[4]; 65 | int i; 66 | int tag; 67 | int rc; 68 | ssize_t rc64; 69 | size_t len; 70 | unsigned long pagemapentry; 71 | unsigned long paddr, basephysaddr; 72 | uint32_t socket, counter; 73 | long count,delta; 74 | long j,k,page_number,page_base_index,line_number; 75 | uint32_t low_0, high_0, low_1, high_1; 76 | char filename[100]; 77 | int pkg, tile; 78 | int nr_cpus; 79 | uint64_t msr_val, msr_num; 80 | int mem_fd; 81 | int msr_fd[2]; // one for each socket 82 | int proc_in_pkg[2]; // one Logical Processor number for each socket 83 | uid_t my_uid; 84 | gid_t my_gid; 85 | double sum; 86 | unsigned long tsc_start; 87 | 88 | uint32_t CurrentCPUIDSignature; // CPUID Signature for the current system -- save for later processor-dependent conditionals 89 | 90 | // =============================================================================================================================== 91 | // allocate working array on a huge pages -- either 1GiB or 2MiB 92 | len = NUMPAGES * MYPAGESIZE; // Bytes 93 | rc = posix_memalign((void **)&array, (size_t) 2097152, (size_t) len); 94 | if (rc != 0) { 95 | printf("ERROR: posix_memalign call failed with error code %d\n",rc); 96 | exit(3); 97 | } 98 | if (array == (void *)(-1)) { 99 | perror("ERROR: mmap of array a failed! "); 100 | exit(1); 101 | } 102 | // initialize working array 103 | for (j=0; j= 34) { 246 | msr_base = 0x0e00 - 0x47c; // ICX MSRs skip backwards for CHAs 34-39 247 | } else if (tile >= 18) { // ICX MSRs skiward for CHAs 18-33 248 | msr_base = 0x0e00 + 0x0e; 249 | } else { 250 | msr_base = 0x0e00; 251 | } 252 | msr_num = msr_base + msr_stride*tile; // unit control register -- write bit 2 to clear counters 253 | msr_val = 0x2; 254 | // printf("DEBUG: %d %lx %ld %lx\n",msr_fd[pkg],msr_val,sizeof(msr_val),msr_num); 255 | pwrite(msr_fd[pkg],&msr_val,sizeof(msr_val),msr_num); 256 | msr_num = msr_base + msr_stride*tile + 1; // ctl0 257 | msr_val = cha_perfevtsel[0]; 258 | // printf("DEBUG: %d %lx %ld %lx\n",msr_fd[pkg],msr_val,sizeof(msr_val),msr_num); 259 | pwrite(msr_fd[pkg],&msr_val,sizeof(msr_val),msr_num); 260 | msr_num = msr_base + msr_stride*tile + 2; // ctl1 261 | msr_val = cha_perfevtsel[1]; 262 | // printf("DEBUG: %d %lx %ld %lx\n",msr_fd[pkg],msr_val,sizeof(msr_val),msr_num); 263 | pwrite(msr_fd[pkg],&msr_val,sizeof(msr_val),msr_num); 264 | msr_num = msr_base + msr_stride*tile + 3; // ctl2 265 | msr_val = cha_perfevtsel[2]; 266 | // printf("DEBUG: %d %lx %ld %lx\n",msr_fd[pkg],msr_val,sizeof(msr_val),msr_num); 267 | pwrite(msr_fd[pkg],&msr_val,sizeof(msr_val),msr_num); 268 | msr_num = msr_base + msr_stride*tile + 4; // ctl3 269 | msr_val = cha_perfevtsel[3]; 270 | // printf("DEBUG: %d %lx %ld %lx\n",msr_fd[pkg],msr_val,sizeof(msr_val),msr_num); 271 | pwrite(msr_fd[pkg],&msr_val,sizeof(msr_val),msr_num); 272 | // msr_num = msr_base + msr_stride*tile + 5; // filter0 # NOT USED IN THE SAME WAY IN ICX 273 | // msr_val = cha_filter0; // bits 24:21,17 FMESI -- all LLC lookups, not not SF lookups 274 | // pwrite(msr_fd[pkg],&msr_val,sizeof(msr_val),msr_num); 275 | } 276 | } 277 | // document CHA counter programming in output 278 | for (counter=0; counter 100) { 385 | backoffs += 1; 386 | sleep(1); 387 | if ( backoffs > 10 ) { 388 | printf("ERROR: No good results for line %d after %d tries and %d backoffs\n",line_number,numtries,backoffs); 389 | exit(101); 390 | } 391 | } 392 | totaltries++; 393 | // 1. read L3 counters before starting test 394 | for (tile=0; tile= 34) { 396 | msr_base = 0x0e00 - 0x47c; // ICX MSRs skip backwards for CHAs 34-39 397 | } else if (tile >= 18) { // ICX MSRs skil forward for CHAs 18-33 398 | msr_base = 0x0e00 + 0x0e; 399 | } else { 400 | msr_base = 0x0e00; 401 | } 402 | msr_num = msr_base + msr_stride*tile + 0x8 + 1; // counter 1 is the LLC_LOOKUPS.READ event 403 | pread(msr_fd[socket_under_test],&msr_val,sizeof(msr_val),msr_num); 404 | cha_counts[socket_under_test][tile][1][0] = msr_val; // use the array I have already declared for cha counts 405 | // printf("DEBUG: page %ld line %ld msr_num 0x%x msr_val %ld cha_counter1 %lu\n", 406 | // page_number,line_number,msr_num,msr_val,cha_counts[0][tile][1][0]); 407 | } 408 | 409 | // 2. Access the line many times 410 | sum = 0; 411 | for (i=0; i= 34) { 424 | msr_base = 0x0e00 - 0x47c; // ICX MSRs skip backwards for CHAs 34-39 425 | } else if (tile >= 18) { // ICX MSRs skil forward for CHAs 18-33 426 | msr_base = 0x0e00 + 0x0e; 427 | } else { 428 | msr_base = 0x0e00; 429 | } 430 | msr_num = msr_base + msr_stride*tile + 0x8 + 1; // counter 1 is the LLC_LOOKUPS.READ event 431 | pread(msr_fd[socket_under_test],&msr_val,sizeof(msr_val),msr_num); 432 | cha_counts[socket_under_test][tile][1][1] = msr_val; // use the array I have already declared for cha counts 433 | } 434 | 435 | #ifdef VERBOSE 436 | for (tile=0; tile95%) 449 | // goodness2 = min/NFLUSHES (pass if <20%) 450 | // goodness3 = avg/NFLUSHES (pass if <40%) 451 | max_count = 0; 452 | min_count = 1<<30; 453 | sum_count = 0; 454 | for (tile=0; tile 0.95 ) pass1 = 1; 469 | if ( goodness2 < 0.20 ) pass2 = 1; 470 | if ( goodness3 < 0.40 ) pass3 = 1; 471 | good_new = pass1 * pass2 * pass3; 472 | #ifdef VERBOSE 473 | printf("GOODNESS: line_number %ld max_count %d min_count %d sum_count %d avg_count %f goodness1 %f goodness2 %f goodness3 %f pass123 %d %d %d\n", 474 | line_number, max_count, min_count, sum_count, avg_count, goodness1, goodness2, goodness3, pass1, pass2, pass3); 475 | if (good_new == 0) printf("DEBUG: one or more of the sanity checks failed for line=%ld: %d %d %d goodness values %f %f %f\n", 476 | line_number,pass1,pass2,pass3,goodness1,goodness2,goodness3); 477 | #endif // VERBOSE 478 | 479 | // test to see if more than one CHA reports > 0.95*NFLUSHES events 480 | found = 0; 481 | old_cha = -1; 482 | int min_counts = (NFLUSHES*19)/20; 483 | for (tile=0; tile= min_counts) { 486 | old_cha = cha_by_page[page_number][line_number]; 487 | cha_by_page[page_number][line_number] = tile; 488 | found++; 489 | #ifdef VERBOSE 490 | if (found > 1) { 491 | printf("WARNING: Multiple (%d) CHAs found using counter 1 for cache line %ld, index %ld: old_cha %d new_cha %d\n",found,line_number,page_base_index+line_number*8,old_cha,cha_by_page[page_number][line_number]); 492 | } 493 | #endif // VERBOSE 494 | } 495 | } 496 | if (found == 0) { 497 | good_old = 0; 498 | #ifdef VERBOSE 499 | printf("WARNING: no CHA entry has been found for line %ld!\n",line_number); 500 | printf("DEBUG dump for no CHA found\n"); 501 | for (tile=0; tile= PAGES_MAPPED) break; 551 | } 552 | } 553 | printf("INFO: %d new 2MiB pages have been mapped\n",new_pages_mapped); 554 | printf("DUMMY: globalsum %d\n",globalsum); 555 | printf("VERBOSE: L3 Mapping Complete in %ld tries for %d cache lines ratio %f\n",totaltries,32768*PAGES_MAPPED,(double)totaltries/(double)(32768*PAGES_MAPPED)); 556 | 557 | // Accumulate the number of lines mapped to each CHA slice in each of the new pages mapped 558 | for (i=0; i