├── AUTHORS ├── .gitignore ├── include ├── cache.h ├── histogram.h ├── cachequery.h ├── lists.h ├── parser.h ├── msrdrv.h ├── config.h └── x86.h ├── Makefile ├── tool ├── cachequery.ini └── cachequery.py ├── codemeta.json ├── src ├── cache.c ├── histogram.c ├── msrdrv.c ├── lists.c ├── config.c ├── parser.c └── main.c ├── config ├── i7-8550u.h ├── i5-6500.h ├── i7-4790.h └── settings.h ├── README.md └── LICENSE /AUTHORS: -------------------------------------------------------------------------------- 1 | - Pepe Vila 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # hidden files 2 | .* 3 | 4 | # binary compile stuff 5 | *.ko 6 | *.o 7 | *.mod.c 8 | modules.order 9 | Module.symvers 10 | 11 | # vscode 12 | .vscode/ 13 | -------------------------------------------------------------------------------- /include/cache.h: -------------------------------------------------------------------------------- 1 | #ifndef __CACHE_H 2 | #define __CACHE_H 3 | 4 | unsigned int get_l3_slice(void *vaddr); 5 | unsigned int get_l3_set(void *vaddr); 6 | unsigned int get_l2_set(void *vaddr); 7 | unsigned int get_l1_set(void *vaddr); 8 | 9 | #endif /* __CACHE_H */ 10 | -------------------------------------------------------------------------------- /include/histogram.h: -------------------------------------------------------------------------------- 1 | #ifndef __HISTOGRAM_H 2 | #define __HISTOGRAM_H 3 | 4 | int get_threshold(int *h, int perc); 5 | int get_min(int *h); 6 | int get_mean(int *h, int n); 7 | int get_mode(int *h); 8 | void print_hist(int *h); 9 | int get_n_below(int *h, int threshold); 10 | 11 | #endif /* __HISTOGRAM_H */ 12 | -------------------------------------------------------------------------------- /include/cachequery.h: -------------------------------------------------------------------------------- 1 | #ifndef __CACHEQUERY_H 2 | #define __CACHEQUERY_H 3 | 4 | // Definitions 5 | typedef struct block Block; 6 | 7 | struct block 8 | { 9 | Block *next; 10 | Block *prev; 11 | unsigned int set1, set2, set3; 12 | unsigned int slice; 13 | Block **evict2, **evict1; 14 | unsigned int evict2_sz, evict1_sz; 15 | char pad[8]; // up to 64B 16 | }; 17 | 18 | // Macros 19 | #define TRUE 1 20 | #define FALSE 0 21 | 22 | #define MIN(a,b) (((a)<(b))?(a):(b)) 23 | #define MAX(a,b) (((a)>(b))?(a):(b)) 24 | #endif /* __CACHEQUERY_H */ -------------------------------------------------------------------------------- /include/lists.h: -------------------------------------------------------------------------------- 1 | #ifndef __LISTS_H 2 | #define __LISTS_H 3 | 4 | #include 5 | #include "../config/settings.h" 6 | #include "cachequery.h" 7 | 8 | void init_lists(char *pool_l3, char *pool_l2, char *pool_l1); 9 | void init_evictionsets(void); 10 | void reset_sets(void); 11 | 12 | Block **get_sets_l1(void); 13 | Block *get_set_l1(int i); 14 | Block **get_sets_l2(void); 15 | Block *get_set_l2(int i); 16 | Block **get_sets_l3(void); 17 | Block *get_set_l3(int i); 18 | 19 | ssize_t list_cacheset_addresses (char *buf, Block *set); 20 | void find_l2_eviction_set(Block *set, Block **l2); 21 | void find_l1_eviction_set(Block *set, Block **l1); 22 | void clean_l3_set(int i); 23 | void clean_l2_set(int i); 24 | int list_length(Block *ptr); 25 | 26 | #endif /* __HISTOGRAM_H */ 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | TARGET = mod_cachequery 2 | 3 | KERNEL := /lib/modules/$(shell uname -r)/build 4 | 5 | PWD := $(shell pwd) 6 | 7 | SOURCES := $(addprefix src/, main.c msrdrv.c parser.c cache.c config.c histogram.c lists.c) 8 | $(TARGET)-objs += $(SOURCES:.c=.o) 9 | 10 | # fix error with new gcc version 11 | ccflags-y += -I/usr/lib/gcc/x86_64-pc-linux-gnu/9.2.0/include 12 | ccflags-y += -Wall 13 | 14 | obj-m := $(TARGET).o 15 | 16 | # list of supported cpus: i7-4790 i5-6500 i7-8550u 17 | cpu ?= i5-6500 18 | EXTRA_CFLAGS += -DCPU=$(cpu) 19 | 20 | all: 21 | make -C $(KERNEL) M=$(PWD) modules 22 | cp *.ko $(PWD)/out 23 | 24 | qemu: 25 | make -C ../../linux/obj/linux-x86-basic M=$(PWD) modules 26 | cp *.ko $(PWD)/out 27 | 28 | clean: 29 | make -C $(KERNEL) M=$(PWD) clean 30 | 31 | install: 32 | sudo rmmod mod_cachequery || true 33 | sudo insmod mod_cachequery.ko 34 | sudo chown -R $(USER) /sys/kernel/cachequery/ 35 | 36 | uninstall: 37 | sudo rmmod mod_cachequery 38 | -------------------------------------------------------------------------------- /tool/cachequery.ini: -------------------------------------------------------------------------------- 1 | [General] 2 | # Default level 3 | level = L1 4 | # Repeat same query several times 5 | # (cachequery has another inner loop for repetitions) 6 | repeat = 100 7 | # Set to False to repeat w/o regenerating the code each time 8 | refresh = False 9 | # Alphabet used for macros '@' and wildcard '_' 10 | alphabet = abcdefghijklmnopqrstuvwxyz 11 | # Path to LevelDB cache mechanism (disables repeat) 12 | db_cache = 13 | # Path to session log file 14 | log_file = 15 | 16 | [System] 17 | # Requires sudo and 'msr' 'acpi-cpufreq' modules 18 | # Write into MSR 0x1a4 to disable HW prefetchers 19 | disable_prefetch = False 20 | # Write into `/sys/devices/system/cpu/cpu*/online` to disable hyperthreading and multi-core 21 | disable_ht = False 22 | # Write into MSR 0x1a0 to disable Intel's TurboBoost 23 | disable_turboboost = False 24 | # Uses `cpupower frequency-set` to fix CPU's frequency (in MHz) 0 to avoid 25 | frequency_set = 0 26 | 27 | [L3] 28 | ways = 12 29 | set = 264 30 | [L2] 31 | ways = 4 32 | set = 24 33 | [L1] 34 | ways = 8 35 | set = 16 36 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", 3 | "@type": "SoftwareSourceCode", 4 | "license": "https://spdx.org/licenses/Apache-2.0", 5 | "codeRepository": "https://github.com/cgvwzq/cachequery/", 6 | "dateCreated": "2019-12-17", 7 | "datePublished": "2019-12-17", 8 | "issueTracker": "https://github.com/cgvwzq/cachequery/issues", 9 | "name": "CacheQuery", 10 | "description": "A tool for interacting with hardware memory caches in modern Intel CPUs.", 11 | "developmentStatus": "concept", 12 | "referencePublication": "https://doi.org/10.1145/3385412.33860081", 13 | "programmingLanguage": [ 14 | "C", 15 | "x86", 16 | "Python3" 17 | ], 18 | "operatingSystem": [ 19 | "Linux" 20 | ], 21 | "softwareRequirements": [ 22 | "lark-parser, LevelDB, Plyvel" 23 | ], 24 | "author": [ 25 | { 26 | "@type": "Person", 27 | "givenName": "Pepe", 28 | "familyName": "Vila", 29 | "email": "pepe.vila@imdea.org", 30 | "affiliation": { 31 | "@type": "Organization", 32 | "name": "IMDEA Software Institute" 33 | } 34 | } 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /src/cache.c: -------------------------------------------------------------------------------- 1 | #include "../include/cache.h" 2 | 3 | #include 4 | 5 | #include "../config/settings.h" 6 | 7 | unsigned long long vtop(void *vaddr) 8 | { 9 | return (vmalloc_to_pfn (vaddr) << 12) | ((unsigned long long)vaddr & (PAGE_SIZE-1)); 10 | } 11 | 12 | unsigned int count_bits(unsigned long long n) 13 | { 14 | unsigned int count = 0; 15 | while (n) 16 | { 17 | n &= (n-1); 18 | count++; 19 | } 20 | return count; 21 | } 22 | 23 | unsigned int get_l3_slice(void *vaddr) 24 | { 25 | int i = L3_SLICE_BITS - 1; 26 | unsigned int ret = 0; 27 | unsigned long long paddr = vtop (vaddr); 28 | unsigned long long mask[3] = {0x1b5f575440ULL, 0x2eb5faa880ULL, 0x3cccc93100ULL}; 29 | while (i >= 0 && i < 3) 30 | { 31 | ret = (ret << 1) | (count_bits (mask[i] & paddr) % 2); 32 | i--; 33 | } 34 | return ret; 35 | } 36 | 37 | unsigned int get_l1_set(void *vaddr) 38 | { 39 | return ((unsigned long long) (vaddr) >> L1_LINE_BITS) & ((L1_CACHE_SETS/L1_CACHE_SLICES) - 1); 40 | } 41 | 42 | unsigned int get_l2_set(void *vaddr) 43 | { 44 | // is virtual or physically indexed? 45 | #if L2_PHYS_INDEXED == 1 46 | return (vtop (vaddr) >> L2_LINE_BITS) & ((L2_CACHE_SETS/L2_CACHE_SLICES) - 1); 47 | #else 48 | return ((unsigned long long)(vaddr) >> L2_LINE_BITS) & ((L2_CACHE_SETS/L2_CACHE_SLICES) - 1); 49 | #endif 50 | } 51 | 52 | unsigned int get_l3_set(void *vaddr) 53 | { 54 | return (vtop (vaddr) >> L3_LINE_BITS) & ((L3_CACHE_SETS/L3_CACHE_SLICES) - 1); 55 | } 56 | 57 | -------------------------------------------------------------------------------- /config/i7-8550u.h: -------------------------------------------------------------------------------- 1 | /* KabyLake R laptop i7-8550U */ 2 | #define SERIAL 1 // 0 - lfence, 1 - mfence, 2 - cpuid 3 | #define USE_RDTSCP 0 // 0 - not supported, 1 - supported 4 | 5 | #define L3_CACHE_WAYS 16 6 | #define L3_WAYS_BITS 4 7 | #define L3_SET_BITS 10 8 | #define L3_LINE_BITS 6 9 | #define L3_SLICE_BITS 3 10 | 11 | #define L2_PHYS_INDEXED 1 12 | #define L2_CACHE_WAYS 4 13 | #define L2_WAYS_BITS 3 14 | #define L2_SET_BITS 10 15 | #define L2_LINE_BITS 6 16 | #define L2_SLICE_BITS 0 17 | 18 | #define L1_CACHE_WAYS 8 19 | #define L1_WAYS_BITS 3 20 | #define L1_SET_BITS 6 21 | #define L1_LINE_BITS 6 22 | #define L1_SLICE_BITS 0 23 | 24 | //#define IS_LEADER_SET(x,s) (FALSE) 25 | #define IS_LEADER_SET(x,s) ((((x>>5)^(x&0x1f)) == (x&0x2))) 26 | 27 | /* 28 | // Automatic calibration 29 | #define L3_HIT_THRESHOLD 0 30 | #define L3_MISS_THRESHOLD 0 31 | #define L2_HIT_THRESHOLD 0 32 | #define L2_MISS_THRESHOLD 0 33 | #define L1_HIT_THRESHOLD 0 34 | #define L1_MISS_THRESHOLD 0 35 | */ 36 | 37 | // core cycles 38 | #define L3_HIT_THRESHOLD 150 // 168-174 39 | #define L3_MISS_THRESHOLD 250 // > 290 40 | #define L2_HIT_THRESHOLD 141 // 144-145 41 | #define L2_MISS_THRESHOLD 160 // 175-.. 42 | #define L1_HIT_THRESHOLD 1 // 139-141 43 | #define L1_MISS_THRESHOLD 144 //144-145 44 | 45 | /* 46 | // rftscp + 2ghz 47 | #define L3_HIT_THRESHOLD 78 // 79-96 48 | #define L3_MISS_THRESHOLD 197 // >197 49 | #define L2_HIT_THRESHOLD 52 // 53-65 50 | #define L2_MISS_THRESHOLD 88 // 88-102 51 | #define L1_HIT_THRESHOLD 1 // 47- 57 52 | #define L1_MISS_THRESHOLD 55 // 53 - 65 53 | */ -------------------------------------------------------------------------------- /config/i5-6500.h: -------------------------------------------------------------------------------- 1 | /* Skylake desktop i5-6500 */ 2 | #define SERIAL 1 // 0 - lfence, 1 - mfence, 2 - cpuid 3 | #define USE_RDTSCP 0 // 0 - not supported, 1 - supported 4 | 5 | #define L3_CACHE_WAYS 12 6 | #define L3_WAYS_BITS 4 // actually 3.58 7 | #define L3_SET_BITS 10 8 | #define L3_LINE_BITS 6 9 | #define L3_SLICE_BITS 3 10 | 11 | #define L2_PHYS_INDEXED 1 12 | #define L2_WAYS_BITS 2 13 | #define L2_CACHE_WAYS 4 14 | #define L2_SET_BITS 10 15 | #define L2_LINE_BITS 6 16 | #define L2_SLICE_BITS 0 17 | 18 | #define L1_CACHE_WAYS 8 19 | #define L1_WAYS_BITS 3 20 | #define L1_SET_BITS 6 21 | #define L1_LINE_BITS 6 22 | #define L1_SLICE_BITS 0 23 | 24 | //#define IS_LEADER_SET(x,s) (FALSE) 25 | #define IS_LEADER_SET(x,s) ((((x>>5)^(x&0x1f)) == (x&0x2))) 26 | 27 | /* 28 | // Automatic calibration 29 | #define L3_HIT_THRESHOLD 0 30 | #define L3_MISS_THRESHOLD 0 31 | #define L2_HIT_THRESHOLD 0 32 | #define L2_MISS_THRESHOLD 0 33 | #define L1_HIT_THRESHOLD 0 34 | #define L1_MISS_THRESHOLD 0 35 | */ 36 | 37 | // core cycles 38 | #define L3_HIT_THRESHOLD 160 // 113-131 39 | #define L3_MISS_THRESHOLD 300 // 290-... 40 | #define L2_HIT_THRESHOLD 140 // 79-91 41 | #define L2_MISS_THRESHOLD 160 // 123-237 42 | #define L1_HIT_THRESHOLD 1 // 69-79 43 | #define L1_MISS_THRESHOLD 144 // 79-91 44 | 45 | /* 46 | // rdtsc + 2ghz 47 | #define L3_HIT_THRESHOLD 112 // 113-131 48 | #define L3_MISS_THRESHOLD 250 // 290-... 49 | #define L2_HIT_THRESHOLD 78 // 79-91 50 | #define L2_MISS_THRESHOLD 113 // 123-237 51 | #define L1_HIT_THRESHOLD 1 // 69-79 52 | #define L1_MISS_THRESHOLD 79 // 79-91 53 | */ -------------------------------------------------------------------------------- /config/i7-4790.h: -------------------------------------------------------------------------------- 1 | /* Haswell desktop i7-4790 */ 2 | #define SERIAL 1 // 0 - lfence, 1 - mfence, 2 - cpuid 3 | #define USE_RDTSCP 0 // 0 - not supported, 1 - supported 4 | 5 | #define L3_CACHE_WAYS 16 6 | #define L3_WAYS_BITS 4 7 | #define L3_SET_BITS 11 8 | #define L3_LINE_BITS 6 9 | #define L3_SLICE_BITS 2 10 | 11 | #define L2_PHYS_INDEXED 1 12 | #define L2_CACHE_WAYS 8 13 | #define L2_WAYS_BITS 3 14 | #define L2_SET_BITS 9 15 | #define L2_LINE_BITS 6 16 | #define L2_SLICE_BITS 0 17 | 18 | #define L1_CACHE_WAYS 8 19 | #define L1_WAYS_BITS 3 20 | #define L1_SET_BITS 6 21 | #define L1_LINE_BITS 6 22 | #define L1_SLICE_BITS 0 23 | 24 | #define IS_LEADER_SET(x,s) ((s == 0) && (((x>>5) == 8)) || (((x>>5) == 12))) // 512-575 & 768-831 in slice0 25 | 26 | /* 27 | // Automatic calibration 28 | #define L3_HIT_THRESHOLD 0 29 | #define L3_MISS_THRESHOLD 0 30 | #define L2_HIT_THRESHOLD 0 31 | #define L2_MISS_THRESHOLD 0 32 | #define L1_HIT_THRESHOLD 0 33 | #define L1_MISS_THRESHOLD 0 34 | */ 35 | 36 | // core cycles 37 | #define L3_HIT_THRESHOLD 190 //231 // 232-240 38 | #define L3_MISS_THRESHOLD 250 //400 // 440-... 39 | #define L2_HIT_THRESHOLD 170 // 192-200 40 | #define L2_MISS_THRESHOLD 194 // 232-240-248 41 | #define L1_HIT_THRESHOLD 1 // 184 42 | #define L1_MISS_THRESHOLD 173 // 192-200 43 | 44 | /* 45 | // rdtsc + 2ghz 46 | #define L3_HIT_THRESHOLD 231 // 232-240 47 | #define L3_MISS_THRESHOLD 400 // 440-... 48 | #define L2_HIT_THRESHOLD 191 // 192-200 49 | #define L2_MISS_THRESHOLD 231 // 232-240-248 50 | #define L1_HIT_THRESHOLD 1 // 184 51 | #define L1_MISS_THRESHOLD 192 // 192-200 52 | */ -------------------------------------------------------------------------------- /include/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef __PARSER_H 2 | #define __PARSER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cachequery.h" 9 | #include "config.h" 10 | 11 | /* token */ 12 | 13 | typedef struct block_t { 14 | unsigned int id; 15 | unsigned int set; 16 | char ask; 17 | char flush; 18 | char invalidate; 19 | } block_t; 20 | 21 | /* lexer */ 22 | 23 | typedef struct block_list { 24 | block_t block; 25 | struct block_list *next; 26 | } block_list; 27 | 28 | typedef struct lexer_state { 29 | const char *ptr; 30 | const char *eob; 31 | block_list *head; 32 | block_list *tail; 33 | int size; 34 | } lexer_state; 35 | 36 | /* generator */ 37 | 38 | struct smart_buffer { 39 | char *start; 40 | char *p; 41 | char *limit; 42 | size_t len; 43 | size_t asks; 44 | }; 45 | 46 | /* funcs */ 47 | 48 | block_list *read_block(lexer_state *state, int max_set); 49 | 50 | void lexer_init(lexer_state *lexer); 51 | 52 | int parse(lexer_state *lexer, const char *source, int size, int max_set); 53 | 54 | void *allocate_code_pages(struct smart_buffer *code, Block **sets, unsigned char level, int s, int pages); 55 | 56 | void free_code_pages(struct smart_buffer *code); 57 | 58 | void smart_buffer_init(struct smart_buffer *code); 59 | 60 | int generate_code(lexer_state *lexer, unsigned int target_set, struct smart_buffer *code, Block **sets, unsigned char level, int t_up, int t_low, Config *conf); 61 | 62 | int opcode(struct smart_buffer *code, char *ops, size_t len); 63 | #define OPCODE(code, ops) { if (opcode(code, ops, sizeof(ops)/sizeof(char)) != 0) { goto err; } } 64 | 65 | #endif /* __PARSER_H */ 66 | -------------------------------------------------------------------------------- /src/histogram.c: -------------------------------------------------------------------------------- 1 | #include "../include/histogram.h" 2 | 3 | #include 4 | 5 | #include "../config/settings.h" 6 | #include "../include/cachequery.h" 7 | 8 | int get_threshold(int *h, int perc) 9 | { 10 | size_t sum_h = 0, p_sum_h = 0; 11 | int i; 12 | for (i = 0; i < MAX_TIME; i++) 13 | { 14 | sum_h += h[i]; 15 | } 16 | if (sum_h == 0) 17 | { 18 | return -1; 19 | } 20 | for (i = 0; i < MAX_TIME; i++) 21 | { 22 | p_sum_h += h[i]; 23 | if ((100 * p_sum_h / sum_h) >= perc) 24 | { 25 | return i; 26 | } 27 | } 28 | return -1; 29 | } 30 | 31 | int get_min(int *h) 32 | { 33 | int i; 34 | for (i = 0; i < MAX_TIME; i++) 35 | { 36 | if (h[i] > 0) 37 | { 38 | return i; 39 | } 40 | } 41 | return i; 42 | } 43 | 44 | int get_mean(int *h, int n) 45 | { 46 | size_t sum = 0; 47 | int i; 48 | for (i = 0; i < MAX_TIME; i++) 49 | { 50 | sum += i * h[i]; 51 | } 52 | return sum / n; 53 | } 54 | 55 | int get_mode(int *h) 56 | { 57 | int i, j = 0, m = 0; 58 | for (i = 0; i < MAX_TIME; i++) 59 | { 60 | if (h[i] > m) 61 | { 62 | m = h[i]; 63 | j = i; 64 | } 65 | } 66 | return j; 67 | } 68 | 69 | void print_hist(int *h) 70 | { 71 | char *out; 72 | int i, l = 0; 73 | out = vmalloc (12096); 74 | for (i=0; i 0) 77 | { 78 | l += sprintf (&out[l], "%d(%d) ", i, h[i]); 79 | } 80 | } 81 | PRINT ("[debug] %s\n", out); 82 | vfree (out); 83 | } 84 | 85 | int get_n_below(int *h, int threshold) 86 | { 87 | int i, ret = 0; 88 | for (i = 0; i < MIN(threshold, MAX_TIME); i++) 89 | { 90 | ret += h[i]; 91 | } 92 | return ret; 93 | } 94 | -------------------------------------------------------------------------------- /config/settings.h: -------------------------------------------------------------------------------- 1 | #ifndef __SETTINGS_H 2 | #define __SETTINGS_H 3 | 4 | /* SELECT ARCHITECTURE */ 5 | #define STR(s) STR2(s) 6 | #define STR2(s) #s 7 | #define EXPAND(s) s 8 | 9 | #include STR(EXPAND(CPU).h) 10 | /* END OF SELECTION */ 11 | 12 | /* DEFAULT RUNTIME OPTIONS */ 13 | /* We ignore memory access taking longer than MAX_TIME */ 14 | #define MAX_TIME 10000 15 | /* Number of repetitions used during calibration phase */ 16 | #define NUM_CALIBRATIONS 10000 17 | /* Number of repetitions for distinguishing HIT/MISS */ 18 | #define NUM_REPETITIONS 100 19 | /* Force a TLB preload for the address to profile */ 20 | #define TLB_PRELOAD TRUE 21 | /* Choose a cache set to thrash before any memory access in the query */ 22 | #define THRASH_SET -1 23 | /* Number of blocks used for the thrashing sequence */ 24 | #define THRASHING_SIZE 32 25 | /* Only return the concrete result for the latest access. Ignores profiling. */ 26 | #define ONLY_ONE_TIME FALSE 27 | /* Use performance counters instead of rdtsc */ 28 | #define USE_PMC FALSE 29 | /* Use core cycle instead of TSC */ 30 | #define CORE_CYCLES TRUE 31 | 32 | /* COMPILE TIME OPTIONS */ 33 | /* Maximum numbers of pages used for code allocation */ 34 | #define JIT_CODE_SZ 12 35 | /* Size of eviction sets */ 36 | #define EVICTION_SZ(LVL) ((LVL##_CACHE_WAYS/2)*(LVL##_WAYS_BITS+1)) // for PLRU http://www.rw.cdl.uni-saarland.de/~grund/papers/rts07-predictability.pdf 37 | /* Multiplicative factor for size of eviction sets, required for non-temporal accesses */ 38 | #define MARGIN 6 39 | /* Extra L1D non-temporal memory accesses after a cache eviction (clean LFB) */ 40 | #define LFB 0 41 | /* Extra L2 non-temporal memory accesses after cache victim (clean SQ) */ 42 | #define SQ 0 43 | /* VERBOSE MODE */ 44 | #define VERBOSE 1 // 0 - disabled, 1 - verbose, 2 - very verbose 45 | #define PRINT(...) if (VERBOSE > 0) printk(__VA_ARGS__); 46 | #define WPRINT(...) if (VERBOSE > 1) printk(__VA_ARGS__); 47 | 48 | /* POOL SIZES FOR EVICTION SETS */ 49 | #define LVL_3 3 50 | #define L3_POOL_SZ (18*1024*1024) 51 | #define L3_CACHE_SLICES (1< 5 | #include 6 | 7 | // List of MSR event selection registers IA32_PERFECTSELx 8 | #define MSR_IA32_PERFEVTSEL0 0x00000186 9 | #define MSR_IA32_PERFEVTSEL1 0x00000187 10 | #define MSR_IA32_PERFEVTSEL2 0x00000188 11 | #define MSR_IA32_PERFEVTSEL3 0x00000189 12 | 13 | // List of MSR counter registers IA32_PPMCx 14 | #define _MSR_IA32_PMC0 0x000000c1 15 | #define _MSR_IA32_PMC1 0x000000c2 16 | #define _MSR_IA32_PMC2 0x000000c3 17 | #define _MSR_IA32_PMC3 0x000000c4 18 | 19 | // 20 | #define MSR_IA32_PERF_CABABILITIES 0x00000345 21 | #define MSR_IA32_PERF_GLOBAL_STATUS 0x0000038e 22 | #define MSR_IA32_PERF_GLOBAL_CTRL 0x0000038f 23 | #define MSR_IA32_PERF_GLOBAL_OVF_CTRL 0x00000390 24 | 25 | // Precise Events for the Skylake, Kaby Lake and Coffee Lake Microarchitectures 26 | // This subset is the same also for Haswell 27 | // PERFECT | UMASK 28 | #define MEM_LOAD_RETIRED_L1_HIT() PERFEVTSELx(0xD1, 0x01) 29 | #define MEM_LOAD_RETIRED_L1_MISS() PERFEVTSELx(0xD1, 0x08) 30 | #define MEM_LOAD_RETIRED_L2_HIT() PERFEVTSELx(0xD1, 0x02) 31 | #define MEM_LOAD_RETIRED_L2_MISS() PERFEVTSELx(0xD1, 0x10) 32 | #define MEM_LOAD_RETIRED_L3_HIT() PERFEVTSELx(0xD1, 0x04) 33 | #define MEM_LOAD_RETIRED_L3_MISS() PERFEVTSELx(0xD1, 0x20) 34 | #define CPU_CLK_UNHALTED() PERFEVTSELx(0x3c, 0x00) 35 | 36 | 37 | // I32_PERFEVTSELx layout (PMC v3) 38 | // 7:0 Event Select (event number fom event tables, for the event we are interested in) 39 | // 15:8 Unit Mask UMASK (umask value from event tables, for the event we are interested in) 40 | #define EVTSEL_USR BIT(16) // if set, counts during exec in ring != 0 41 | #define EVTSEL_OS BIT(17) // if set, counts during exec in ring = 0 42 | #define EVTSEL_EDGE BIT(18) // if set, enables edge detection of the event 43 | #define EVTSEL_PC BIT(19) // pin control 44 | #define EVTSEL_INT BIT(20) // generate interrupt through APIC on overflow (usually 48-bit) 45 | #define EVTSEL_ANY BIT(21) // increment counter with event on any hardware therad on any physical core 46 | #define EVTSEL_EN BIT(22) // enable the counter 47 | #define EVTSEL_INV BIT(23) // invert counter mask, changes meaning of CMASK 48 | // 31:24 CMASK 49 | #define EVTSEL_CMASK 0x0 // if non-zero, PMC only increment when event is triggered >= (or < if INV is set) CMASK times in a single cycle 50 | // 63:31 reserved 51 | 52 | // EVT = [EVTSEL, UMASK] 53 | #define PERFEVTSELx(EVTSEL,UMASK) (EVTSEL|(UMASK<<8)|EVTSEL_USR|EVTSEL_OS|EVTSEL_EN) 54 | 55 | enum MsrOperation { 56 | MSR_NOP = 0, 57 | MSR_READ = 1, 58 | MSR_WRITE = 2, 59 | MSR_STOP = 3, 60 | MSR_RDTSC = 4 61 | }; 62 | 63 | struct MsrInOut { 64 | unsigned int op; // MsrOperation 65 | unsigned int ecx; // msr identifier 66 | union { 67 | struct { 68 | unsigned int eax; // low double word 69 | unsigned int edx; // high double word 70 | }; 71 | unsigned long long value; // quad word 72 | }; 73 | }; // msrdrv.h:27:1: warning: packed attribute is unnecessary for ‘MsrInOut’ [-Wpacked] 74 | 75 | void prepare_counters(int lvl); 76 | void disable_counters(void); 77 | 78 | #endif /* __MSRDRV_H */ -------------------------------------------------------------------------------- /src/msrdrv.c: -------------------------------------------------------------------------------- 1 | #include "../include/msrdrv.h" 2 | 3 | static struct MsrInOut msr_start_l3[] = { 4 | { .op=MSR_WRITE, .ecx=MSR_IA32_PERFEVTSEL0, { .eax=MEM_LOAD_RETIRED_L3_MISS(), .edx=0x00 }}, 5 | { .op=MSR_WRITE, .ecx=MSR_IA32_PERF_GLOBAL_CTRL, { .eax=0x1, .edx=0x0 }}, // enables PMC0 6 | { .op=MSR_STOP, .ecx=0x00, .value=0x00 }, 7 | }; 8 | static struct MsrInOut msr_start_l2[] = { 9 | { .op=MSR_WRITE, .ecx=MSR_IA32_PERFEVTSEL0, { .eax=MEM_LOAD_RETIRED_L2_MISS(), .edx=0x00 }}, 10 | { .op=MSR_WRITE, .ecx=MSR_IA32_PERF_GLOBAL_CTRL, { .eax=0x1, .edx=0x0 }}, // enables PMC0 11 | { .op=MSR_STOP, .ecx=0x00, .value=0x00 }, 12 | }; 13 | static struct MsrInOut msr_start_l1[] = { 14 | { .op=MSR_WRITE, .ecx=MSR_IA32_PERFEVTSEL0, { .eax=MEM_LOAD_RETIRED_L1_MISS(), .edx=0x00 }}, 15 | { .op=MSR_WRITE, .ecx=MSR_IA32_PERF_GLOBAL_CTRL, { .eax=0x1, .edx=0x0 }}, // enables PMC0 16 | { .op=MSR_STOP, .ecx=0x00, .value=0x00 }, 17 | }; 18 | static struct MsrInOut msr_start_cycles[] = { 19 | { .op=MSR_WRITE, .ecx=MSR_IA32_PERFEVTSEL0, { .eax=CPU_CLK_UNHALTED(), .edx=0x00 }}, 20 | { .op=MSR_WRITE, .ecx=MSR_IA32_PERF_GLOBAL_CTRL, { .eax=0x1, .edx=0x0 }}, // enables PMC0 21 | { .op=MSR_STOP, .ecx=0x00, .value=0x00 }, 22 | }; 23 | static struct MsrInOut msr_stop[] = { 24 | { .op=MSR_READ, .ecx=_MSR_IA32_PMC0, { .eax=0x00, .edx=0x00 }}, 25 | { .op=MSR_WRITE, .ecx=MSR_IA32_PERF_GLOBAL_CTRL, { .eax=0x00, .edx=0x00 }}, 26 | { .op=MSR_STOP, .ecx=0x00, .value=0x00 }, 27 | }; 28 | 29 | long long read_msr(unsigned int ecx) { 30 | unsigned int edx = 0, eax = 0; 31 | unsigned long long result = 0; 32 | __asm__ __volatile__("rdmsr" : "=a"(eax), "=d"(edx) : "c"(ecx)); 33 | result = eax | (unsigned long long)edx << 0x20; 34 | // printk(KERN_ALERT "Module msrdrv: Read 0x%016llx (0x%08x:0x%08x) from MSR 0x%08x\n", result, edx, eax, ecx); 35 | return result; 36 | } 37 | 38 | void write_msr(int ecx, unsigned int eax, unsigned int edx) { 39 | // printk(KERN_ALERT "Module msrdrv: Writing 0x%08x:0x%08x to MSR 0x%04x\n", edx, eax, ecx); 40 | __asm__ __volatile__("wrmsr" : : "c"(ecx), "a"(eax), "d"(edx)); 41 | } 42 | 43 | long msrdrv_run(struct MsrInOut *msrops, int n) 44 | { 45 | int i; 46 | for (i = 0; i < n; i++) 47 | { 48 | switch (msrops[i].op) 49 | { 50 | case MSR_NOP: 51 | // printk(KERN_ALERT "Module msrdrv: seen MSR_NOP command\n"); 52 | break; 53 | case MSR_STOP: 54 | // printk(KERN_ALERT "Module msrdrv: seen MSR_STOP command\n"); 55 | goto label_end; 56 | case MSR_READ: 57 | // printk(KERN_ALERT "Module msrdrv: seen MSR_READ command\n"); 58 | msrops[i].value = read_msr(msrops[i].ecx); 59 | break; 60 | case MSR_WRITE: 61 | // printk(KERN_ALERT "Module msrdrv: seen MSR_WRITE command\n"); 62 | write_msr(msrops[i].ecx, msrops[i].eax, msrops[i].edx); 63 | break; 64 | default: 65 | // printk(KERN_ALERT "Module msrdrv: Unknown option 0x%x\n", msrops[i].op); 66 | return 1; 67 | } 68 | 69 | } 70 | label_end: 71 | 72 | return 0; 73 | } 74 | 75 | void 76 | prepare_counters (int level) 77 | { 78 | switch (level) 79 | { 80 | case 1: 81 | msrdrv_run (msr_start_l1, 2); 82 | break; 83 | case 2: 84 | msrdrv_run (msr_start_l2, 2); 85 | break; 86 | case 3: 87 | msrdrv_run (msr_start_l3, 2); 88 | break; 89 | default: 90 | msrdrv_run (msr_start_cycles, 2); 91 | break; 92 | } 93 | } 94 | 95 | void 96 | disable_counters (void) 97 | { 98 | msrdrv_run (msr_stop, 2); 99 | } 100 | -------------------------------------------------------------------------------- /include/config.h: -------------------------------------------------------------------------------- 1 | #ifndef __CONFIG_H 2 | #define __CONFIG_H 3 | 4 | #include 5 | 6 | // Definitions 7 | typedef struct config Config; 8 | 9 | enum Debug {DISABLED = 1, ENABLED = 2, VERBOSE = 3}; 10 | enum Serialization { LFENCE = 1, MFENCE = 2, CPUID = 3}; 11 | 12 | struct conf_property { 13 | const char *name; 14 | ssize_t (*cb_read)(Config*,char*); 15 | void (*cb_write)(Config*,char*,ssize_t); 16 | } typedef t_property; 17 | 18 | struct config 19 | { 20 | t_property *properties; 21 | unsigned int length; 22 | 23 | /* maximum time considered for a memory access (remove outliers) */ 24 | unsigned int max_access_time; 25 | 26 | /* number of repetitions during calibration */ 27 | unsigned int num_calibrations; 28 | 29 | /* number of repetitions per query */ 30 | unsigned int num_repetitions; 31 | 32 | /* preload TLB entry */ 33 | unsigned char tlb_preload; 34 | 35 | /* cause thrashing on set (-1 for disabled) */ 36 | int thrash_set; 37 | 38 | /* number of addresses used for thrashing */ 39 | unsigned int thrash_size; 40 | 41 | /* if true print in syslog the time distribution of last access */ 42 | unsigned char only_one_time; 43 | 44 | /* if true use cache related PMC instead of time */ 45 | unsigned char use_pmc; 46 | 47 | /* if true use PMC CTR1 instead of RDTSC */ 48 | unsigned char core_cycles; 49 | 50 | /* DISABLED, ENABLED, VERBOSE */ 51 | enum Debug debug; 52 | 53 | /* LFENCE, MFENCE, CPUID */ 54 | enum Serialization serialization; 55 | 56 | /* time thresholds */ 57 | unsigned int l3_hit_threshold; 58 | unsigned int l3_miss_threshold; 59 | unsigned int l2_hit_threshold; 60 | unsigned int l2_miss_threshold; 61 | unsigned int l1_hit_threshold; 62 | unsigned int l1_miss_threshold; 63 | 64 | }; 65 | 66 | /* Initialize config with defualt values given in settings.h */ 67 | void init_config(Config *conf); 68 | 69 | /* Wrappers for sysfs */ 70 | ssize_t conf_show_property (Config *conf, unsigned int index, char *buf); 71 | ssize_t conf_store_property (Config *conf, unsigned int index, char *buf, ssize_t count); 72 | 73 | /* Getters and setters */ 74 | unsigned int get_max_access_time(Config *conf); 75 | void set_max_access_time(Config *conf, unsigned int max_access_time); 76 | 77 | unsigned int get_num_calibrations(Config *conf); 78 | void set_num_calibrations(Config *conf, unsigned int num_calibrations); 79 | 80 | unsigned int get_num_repetitions(Config *conf); 81 | void set_num_repetitions(Config *conf, unsigned int num_repetitions); 82 | 83 | unsigned char get_tlb_preload(Config *conf); 84 | void set_tlb_preload(Config *conf, unsigned char tlb_preload); 85 | 86 | int get_thrash_set(Config *conf); 87 | void set_thrash_set(Config *conf, int thrash_set); 88 | 89 | unsigned int get_thrash_size(Config *conf); 90 | void set_thrash_size(Config *conf, unsigned int thrash_size); 91 | 92 | unsigned char get_only_one_time(Config *conf); 93 | void set_only_one_time(Config *conf, unsigned char only_one_time); 94 | 95 | unsigned char get_use_pmc(Config *conf); 96 | void set_use_pmc(Config *conf, unsigned char use_pmc); 97 | 98 | unsigned char get_core_cycles(Config *conf); 99 | void set_core_cycles(Config *conf, unsigned char core_cycles); 100 | 101 | enum Debug get_debug(Config *conf); 102 | void set_debug(Config *conf, enum Debug debug); 103 | 104 | enum Serialization get_serialization(Config *conf); 105 | void set_serialization(Config *conf, enum Serialization serialization); 106 | 107 | unsigned int get_l3_hit_threshold(Config *conf); 108 | void set_l3_hit_threshold(Config *conf, unsigned int t); 109 | 110 | unsigned int get_l3_miss_threshold(Config *conf); 111 | void set_l3_miss_threshold(Config *conf, unsigned int t); 112 | 113 | unsigned int get_l2_hit_threshold(Config *conf); 114 | void set_l2_hit_threshold(Config *conf, unsigned int t); 115 | 116 | unsigned int get_l2_miss_threshold(Config *conf); 117 | void set_l2_miss_threshold(Config *conf, unsigned int t); 118 | 119 | unsigned int get_l1_hit_threshold(Config *conf); 120 | void set_l1_hit_threshold(Config *conf, unsigned int t); 121 | 122 | unsigned int get_l1_miss_threshold(Config *conf); 123 | void set_l1_miss_threshold(Config *conf, unsigned int t); 124 | 125 | #endif /* __CONFIG_H */ 126 | -------------------------------------------------------------------------------- /include/x86.h: -------------------------------------------------------------------------------- 1 | #include "../config/settings.h" 2 | #include "parser.h" 3 | 4 | #ifndef __X86_H 5 | #define __X86_H 6 | 7 | #define C(...) (char[]){__VA_ARGS__} 8 | 9 | #define PUSH_RBP() C(0x55) 10 | #define MOV_RBP_RSP() C(0x48, 0x89, 0xe5) 11 | #define PUSH_RBX() C(0x53) 12 | #define MOV_ECX_DWORD(...) C(0xb9, __VA_ARGS__) 13 | #define XOR_RSI_RSI() C(0x48, 0x31, 0xf6) 14 | #define WRMSR() C(0x0f, 0x30) 15 | #define MOV_RDI_DWORD(...) C(0x48, 0xc7, 0xc7, __VA_ARGS__) 16 | 17 | #define LOAD_RAX(Q) C(0x48, 0xa1,\ 18 | Q&0xff, (Q>>8)&0xff, (Q>>16)&0xff, (Q>>24)&0xff,\ 19 | (Q>>32)&0xff, (Q>>40)&0xff, (Q>>48)&0xff, (Q>>56)&0xff) 20 | #define MOV_RAX_CT(Q) C(0x48, 0xb8,\ 21 | (Q&0xff), (Q>>8)&0xff, (Q>>16)&0xff, (Q>>24)&0xff,\ 22 | (Q>>32)&0xff, (Q>>40)&0xff, (Q>>48)&0xff, (Q>>56)&0xff) 23 | #define CLFLUSH_RAX() C(0x0f, 0xae, 0x38) 24 | #define XOR_EAX_EAX() C(0x31, 0xc0) 25 | #define XOR_RAX_RAX() C(0x48, 0x31, 0xc0) 26 | #define XOR_EDX_EDX() C(0x31, 0xd2) 27 | #define XOR_RDX_RDX() C(0x48, 0x31, 0xd2) 28 | #define XOR_RDI_RDI() C(0x48, 0x31, 0xff) 29 | #define NEG_RDI() C(0x48, 0xf7, 0xdf) 30 | #define CPUID() C(0x48, 0x31, 0xc0, 0x0f, 0xa2) 31 | #define LFENCE() C(0x0f, 0xae, 0xe8) 32 | #define MFENCE() C(0x0f, 0xae, 0xf0) 33 | #define SFENCE() C(0x0f, 0xae, 0xf8) 34 | #define RDMSR() C(0x0f, 0x32) 35 | #define RDPMC() C(0x0f, 0x33) 36 | #define SHL_RSI() C(0x48, 0xd1, 0xe6) 37 | #define CMP_EAX_EDI() C(0x39, 0xf8) 38 | #define CMOVGE_EAX_EDI() C(0x0f, 0x4d, 0xc7) 39 | #define CMOVAE_EAX_EDI() C(0x0f, 0x43, 0xc7) 40 | #define CMOVAE_RAX_RDI() C(0x48, 0x0f, 0x43, 0xc7) 41 | #define OR_RSI_RAX() C(0x48, 0x09, 0xc6) 42 | #define MOV_RAX_RSI() C(0x48, 0x89, 0xf0) 43 | #define POP_RBX() C(0x5b) 44 | #define POP_RBP() C(0x5d) 45 | #define RETQ() C(0xc3) 46 | #define RDTSCP() C(0x0f, 0x01, 0xf9) // warning: not supported in old architectures 47 | #define RDTSC() C(0x0f, 0x31) 48 | #define SHL_RDX_CT(_0) C(0x48, 0xc1, 0xe2, _0&0xff) 49 | #define OR_RAX_RDX() C(0x48, 0x09, 0xd0) 50 | #define MOV_RDI_RAX() C(0x48, 0x89, 0xc7) 51 | #define MOV_RDX_RDI() C(0x48, 0x89, 0xfa) 52 | #define MOV_RAX_RDI() C(0x48, 0x89, 0xf8) 53 | #define SUB_RAX_RDI() C(0x48, 0x29, 0xf8) 54 | #define SUB_RDI_RAX() C(0x48, 0x29, 0xc7) 55 | #define MOV_EDX_DWORD(...) C(0xba, __VA_ARGS__) 56 | #define MOV_ECX_DWORD(...) C(0xb9, __VA_ARGS__) 57 | #define CMP_RDX_CT(_0) C(0x48, 0x81, 0xfa, _0&0xff, (_0>>8)&0xff, (_0>>16)&0xff, (_0>>24)&0xff) 58 | #define MOV_RDX_RAX() C(0x48, 0x89, 0xc2) 59 | #define JBE_NEAR(_0) C(0x0f, 0x86,(_0-6)&0xff, ((_0-6)>>8)&0xff, ((_0-6)>>16)&0xff, ((_0-6)>>24)&0xff) 60 | #define JMP_SHORT(_0) C(0xeb, (_0-2)&0xff) 61 | #define MOVNTDQA_RAX() C(0x66, 0x0f, 0x38, 0x2a, 0x08) 62 | #define MOV_RAX_CR0() C(0x0f, 0x20, 0xc0) 63 | #define MOV_CR0_RAX() C(0x0f, 0x22, 0xc0) 64 | #define WBINVD() C(0x0f, 0x09) 65 | 66 | #if SERIAL == 0 67 | #define SERIALIZE() LFENCE() 68 | #elif SERIAL == 1 69 | #define SERIALIZE() MFENCE() 70 | #else 71 | #define SERIALIZE() CPUID() 72 | #endif 73 | 74 | #define MEASURE_PRE_TSC(code) \ 75 | { \ 76 | OPCODE(code, CPUID()); \ 77 | OPCODE(code, RDTSC()); \ 78 | OPCODE(code, SERIALIZE()); \ 79 | OPCODE(code, SHL_RDX_CT(32)); \ 80 | OPCODE(code, OR_RAX_RDX()); \ 81 | OPCODE(code, MOV_RDI_RAX()); \ 82 | } 83 | 84 | #if USE_RDTSCP == 1 85 | #define MEASURE_POST_TSC(code) \ 86 | { \ 87 | OPCODE(code, RDTSCP()); \ 88 | OPCODE(code, SHL_RDX_CT(32)); \ 89 | OPCODE(code, OR_RAX_RDX()); \ 90 | OPCODE(code, SUB_RDI_RAX()); \ 91 | OPCODE(code, NEG_RDI()); \ 92 | } 93 | #else 94 | #define MEASURE_POST_TSC(code) \ 95 | { \ 96 | OPCODE(code, SERIALIZE()); \ 97 | OPCODE(code, RDTSC()); \ 98 | OPCODE(code, SHL_RDX_CT(32)); \ 99 | OPCODE(code, OR_RAX_RDX()); \ 100 | OPCODE(code, SUB_RDI_RAX()); \ 101 | OPCODE(code, NEG_RDI()); \ 102 | } 103 | #endif 104 | 105 | // wrmsr does not serialize well in all archs, add fence anyway 106 | #define RESET_PMC0(code) \ 107 | { \ 108 | OPCODE(code, MOV_ECX_DWORD(_MSR_IA32_PMC0, 0x00, 0x00, 0x00)); \ 109 | OPCODE(code, XOR_EAX_EAX()); \ 110 | OPCODE(code, XOR_EDX_EDX()); \ 111 | OPCODE(code, SERIALIZE()); \ 112 | OPCODE(code, WRMSR()); \ 113 | } 114 | 115 | #define MEASURE_POST_CORE(code) \ 116 | { \ 117 | OPCODE(code, SERIALIZE()); \ 118 | OPCODE(code, MOV_ECX_DWORD(_MSR_IA32_PMC0, 0x00, 0x00, 0x00)); \ 119 | OPCODE(code, RDMSR()); \ 120 | OPCODE(code, SHL_RDX_CT(32)); \ 121 | OPCODE(code, OR_RAX_RDX()); \ 122 | OPCODE(code, MOV_RDI_RAX()); \ 123 | } 124 | 125 | #endif /* __X86_H */ 126 | -------------------------------------------------------------------------------- /src/lists.c: -------------------------------------------------------------------------------- 1 | #include "../include/lists.h" 2 | #include "../config/settings.h" 3 | #include "../include/cache.h" 4 | #include "../include/cachequery.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | static Block *sets_l3[L3_CACHE_SETS]; 14 | static Block *sets_l2[L2_CACHE_SETS]; 15 | static Block *sets_l1[L1_CACHE_SETS]; 16 | 17 | Block** get_sets_l3(void) 18 | { 19 | return sets_l3; 20 | } 21 | 22 | Block* get_set_l3(int i) 23 | { 24 | return sets_l3[i]; 25 | } 26 | 27 | Block** get_sets_l2(void) 28 | { 29 | return sets_l2; 30 | } 31 | 32 | Block* get_set_l2(int i) 33 | { 34 | return sets_l2[i]; 35 | } 36 | 37 | Block** get_sets_l1(void) 38 | { 39 | return sets_l1; 40 | } 41 | 42 | Block* get_set_l1(int i) 43 | { 44 | return sets_l1[i]; 45 | } 46 | 47 | void buffer_to_cachesets(char *pool, size_t length, Block **sets, int lvl) 48 | { 49 | Block *ptr = NULL; 50 | size_t i = 0; 51 | unsigned int set = 0; 52 | while (i < length) 53 | { 54 | ptr = (Block*)(&pool[i]); 55 | ptr->set3 = get_l3_set (ptr); 56 | ptr->slice = get_l3_slice (ptr); 57 | ptr->set2 = get_l2_set (ptr); 58 | ptr->set1 = get_l1_set (ptr); 59 | ptr->evict2 = NULL; 60 | ptr->evict2_sz = 0; 61 | ptr->evict1 = NULL; 62 | ptr->evict1_sz = 0; 63 | switch (lvl) 64 | { 65 | case LVL_1: 66 | set = ptr->set1; 67 | break; 68 | case LVL_2: 69 | set = ptr->set2; 70 | break; 71 | case LVL_3: 72 | default: 73 | set = (ptr->set3 << L3_SLICE_BITS) | ptr->slice; 74 | break; 75 | } 76 | // add virtual address to corresponding list 77 | if (sets[set]) 78 | { 79 | ptr->next = sets[set]; 80 | sets[set]->prev = ptr; 81 | } 82 | else 83 | { 84 | ptr->next = NULL; 85 | } 86 | ptr->prev = NULL; 87 | sets[set] = ptr; 88 | i += sizeof(Block); 89 | } 90 | } 91 | 92 | void shuffle_list(Block **ptr, int n) 93 | { 94 | Block *a, *b, *tmp; 95 | int i, j, r, c; 96 | if (*ptr && n > 2) 97 | { 98 | for (i = n - 1; i > 1; i--) 99 | { 100 | get_random_bytes(&r, sizeof(r)); 101 | j = 1 + ((r & 0x7FFFFFFF) % (i - 1)); 102 | for (b = *ptr, c = 0; b && c < i; b = b->next, c++); // b points to i-th element 103 | for (a = *ptr, c = 0; a && c < j; a = a->next, c++); // a points to j-th element 104 | if (a->next == b || b->prev == a || a->next == b->prev) continue; 105 | // swap siblings 106 | if (a->next) 107 | a->next->prev = b; 108 | if (a->prev) 109 | a->prev->next = b; 110 | if (b->next) 111 | b->next->prev = a; 112 | if (b->prev) 113 | b->prev->next = a; 114 | // swap elements 115 | tmp = a->prev; 116 | a->prev = b->prev; 117 | b->prev = tmp; 118 | tmp = a->next; 119 | a->next = b->next; 120 | b->next = tmp; 121 | } 122 | } 123 | } 124 | 125 | void init_lists(char *pool_l3, char *pool_l2, char *pool_l1) 126 | { 127 | int i; 128 | // Empty lists 129 | for (i = 0; i < L3_CACHE_SETS; i++) 130 | { 131 | sets_l3[i] = NULL; 132 | } 133 | for (i = 0; i < L2_CACHE_SETS; i++) 134 | { 135 | sets_l2[i] = NULL; 136 | } 137 | for (i = 0; i < L1_CACHE_SETS; i++) 138 | { 139 | sets_l1[i] = NULL; 140 | } 141 | 142 | // Init structure 143 | buffer_to_cachesets (pool_l3, L3_POOL_SZ, sets_l3, LVL_3); 144 | for (i = 0; i < L3_CACHE_SETS; i++) 145 | { 146 | shuffle_list (&sets_l3[i], list_length (sets_l3[i])); 147 | } 148 | buffer_to_cachesets (pool_l2, L2_POOL_SZ, sets_l2, LVL_2); 149 | for (i = 0; i < L2_CACHE_SETS; i++) 150 | { 151 | shuffle_list (&sets_l2[i], list_length (sets_l2[i])); 152 | } 153 | buffer_to_cachesets (pool_l1, L1_POOL_SZ, sets_l1, LVL_1); 154 | for (i = 0; i < L1_CACHE_SETS; i++) 155 | { 156 | shuffle_list (&sets_l1[i], list_length (sets_l1[i])); 157 | } 158 | } 159 | 160 | void init_evictionsets(void) 161 | { 162 | int i; 163 | Block *head = NULL, *ptr = NULL; 164 | // Find L1 eviction sets for L3 addresses, L2's are computed lazily on code generation 165 | for (i = 0; i < L3_CACHE_SETS; i++) 166 | { 167 | head = sets_l3[i]; 168 | find_l1_eviction_set (head, sets_l1); 169 | ptr = head; 170 | while (ptr) 171 | { 172 | // L3 congruent addresses share L1 eviction sets 173 | ptr->evict1 = head->evict1; 174 | ptr->evict1_sz = head->evict1_sz; 175 | ptr = ptr->next; 176 | } 177 | } 178 | // Find eviction sets for L2 addresses 179 | head = NULL; 180 | ptr = NULL; 181 | for (i = 0; i < L2_CACHE_SETS; i++) 182 | { 183 | head = sets_l2[i]; 184 | find_l1_eviction_set (head, sets_l1); 185 | ptr = head; 186 | while (ptr) 187 | { 188 | // L2 congruent addresses share L1 eviction sets 189 | ptr->evict1 = head->evict1; 190 | ptr->evict1_sz = head->evict1_sz; 191 | ptr = ptr->next; 192 | } 193 | } 194 | } 195 | 196 | void clean_l3_set (int i) 197 | { 198 | Block *ptr = sets_l3[i];; 199 | while (ptr) 200 | { 201 | kfree (ptr->evict2); 202 | ptr = ptr->next; 203 | } 204 | kfree (sets_l3[i]->evict1); 205 | } 206 | 207 | void clean_l2_set(int i) 208 | { 209 | Block *ptr = sets_l2[i]; 210 | kfree (ptr->evict1); 211 | } 212 | 213 | ssize_t list_cacheset_addresses (char *buf, Block *set) 214 | { 215 | ssize_t ret = 0; 216 | Block *ptr = set; 217 | while (ptr && ret < PAGE_SIZE - 1) 218 | { 219 | ret += sprintf(&buf[ret], "%px ", ptr); 220 | ptr = ptr->next; 221 | } 222 | ret += sprintf(&buf[ret], "\n"); 223 | return ret; 224 | } 225 | 226 | void find_l2_eviction_set(Block *set, Block **l2) 227 | { 228 | unsigned int i, count; 229 | Block *ptr; 230 | // Eviction set for L2 231 | set->evict2 = kmalloc (MARGIN * EVICTION_SZ(L2) * sizeof(Block*), GFP_KERNEL); 232 | count = 0; 233 | for (i = 0; i < L2_CACHE_SETS && count < MARGIN * EVICTION_SZ(L2); i++) 234 | { 235 | ptr = l2[i]; 236 | while (ptr && count < MARGIN * EVICTION_SZ(L2)) 237 | { 238 | if ((ptr->set2 == set->set2) && (ptr->set3 != set->set3 || ptr->slice != set->slice)) 239 | { 240 | set->evict2[count] = ptr; 241 | count++; 242 | } 243 | ptr = ptr->next; 244 | } 245 | } 246 | set->evict2_sz = count; 247 | } 248 | 249 | void find_l1_eviction_set(Block *set, Block **l1) 250 | { 251 | unsigned int i, count; 252 | Block *ptr; 253 | set->evict1 = kmalloc (MARGIN * EVICTION_SZ(L1) * sizeof(Block*), GFP_KERNEL); 254 | count = 0; 255 | for (i = 0; i < L1_CACHE_SETS && count < MARGIN * EVICTION_SZ(L1); i++) 256 | { 257 | ptr = l1[i]; 258 | while (ptr && count < MARGIN * EVICTION_SZ(L1)) 259 | { 260 | // find same L1 set, but different L3 and L2 261 | if ((ptr->set1 == set->set1) && (ptr->set2 != set->set2) && (ptr->set3 != set->set3 || ptr->slice != set->slice)) 262 | { 263 | set->evict1[count] = ptr; 264 | count++; 265 | } 266 | ptr = ptr->next; 267 | } 268 | } 269 | set->evict1_sz = count; 270 | } 271 | 272 | int list_length(Block *ptr) 273 | { 274 | int l = 0; 275 | while (ptr) 276 | { 277 | l = l + 1; 278 | ptr = ptr->next; 279 | } 280 | return l; 281 | } 282 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CacheQuery 2 | [![DOI](https://zenodo.org/badge/227137498.svg)](https://zenodo.org/badge/latestdoi/227137498) 3 | 4 | A tool for interacting with hardware memory caches in modern Intel CPUs. 5 | 6 | * Linux Kernel module: generate non-interfering x86 code of arbitrary memory access sequences automatically profiled. 7 | * Low noise environment: disable hardware prefetchers, hyperthreading, frequency scaling, etc. 8 | * Support for TSC, core cycle (default), and performance counters (L3, L2, and L1, misses) (see `config/settings.h` or `/sys/kernel/cachequery/config/[use_pmc|core_cycles]/val` booleans). 9 | * Sysfs at `/sys/kernel/cachequery///run` accepts queries of logical blocks produced by the fronted and returns sequence of hits and misses for the target cache set and level. Note that `` is `((index << slice_bits) | slice)`. 10 | * `tool/cachequery.py` provides a high-level interface with a REPL environment. 11 | 12 | 13 | # Run 14 | 15 | The following command runs a single MemBlockLang (MBL) query against L3's set 33: 16 | 17 | ``` 18 | $ cd tool/ 19 | $ ./cachequery.py -l l3 -s 33 @ M _? 20 | (L3:33) r @ M _? 21 | 0 1 2 3 4 5 6 7 8 9 10 11 12 0? -> 0 22 | 0 1 2 3 4 5 6 7 8 9 10 11 12 1? -> 100 23 | 0 1 2 3 4 5 6 7 8 9 10 11 12 2? -> 100 24 | 0 1 2 3 4 5 6 7 8 9 10 11 12 3? -> 100 25 | 0 1 2 3 4 5 6 7 8 9 10 11 12 4? -> 100 26 | 0 1 2 3 4 5 6 7 8 9 10 11 12 5? -> 100 27 | 0 1 2 3 4 5 6 7 8 9 10 11 12 6? -> 100 28 | 0 1 2 3 4 5 6 7 8 9 10 11 12 7? -> 100 29 | 0 1 2 3 4 5 6 7 8 9 10 11 12 8? -> 100 30 | 0 1 2 3 4 5 6 7 8 9 10 11 12 9? -> 100 31 | 0 1 2 3 4 5 6 7 8 9 10 11 12 10? -> 100 32 | 0 1 2 3 4 5 6 7 8 9 10 11 12 11? -> 100 33 | ``` 34 | 35 | Example of a 12-ways L3 cache set, where the LRU block is evicted by `M`. Output value is the number of measured HITs (change number of repetitions as you like in `config/settings.h` or `/sys/kernel/cachequery/config/num_repetitions/val`). 36 | 37 | 38 | # Install 39 | 40 | Tested on Linux kernel >= 4.9.x branches. 41 | 42 | Modify `config/settings.h` as required and select the specific architecture. Some settings can be dynamically modified later on via `/sys/kernel/cachequery/config/`. 43 | 44 | If no timing thresholds are given it will automatically compute some, but calibration takes time and is done on each execution. 45 | 46 | (WARNING: The code is unstable and it can crash your system. Use it under your own risk.) 47 | 48 | ``` 49 | $ make cpu=iX-yyyy 50 | $ make install 51 | ``` 52 | 53 | Current support for `i7-4790`, `i5-6500` (default), and `i7-8550u`. Add header file in `config/` and build with corresponding `make cpu=iX-yyyy`. 54 | 55 | ### Create new config file 56 | 57 | The main parameters required for building a new config file are the cache associativity (`L?_CACHE_WAYS`), the number of set index bits (`L?_SET_BITS`), and the number of bits used for slicing (`L?_SLICE_BITS`). The associativity (or ways) and number of cache sets can be obtained with the `cpuid` command, although the sets need to be divided by the number of slices. Slices are not documented and might require manual inference, but for post-Skylake Intel machines seems to be `8`. 58 | 59 | We recommend the default values (copy an existent file) for everything else, and manually tune them if required. 60 | 61 | Initially we recommend to use the automatic calibration for the thresholds, perform some test runs, and check the computed threshold from the system logs. Once we are confident with the threshold, we can set it statically in the config file or dynamically via the virtual file system. 62 | 63 | 64 | ## Dependencies 65 | 66 | Lark parser: `pip3 install lark-parser` 67 | 68 | LevelDB + Plyvel: https://plyvel.readthedocs.io/en/latest/installation.html 69 | 70 | 71 | ## Help: 72 | 73 | ``` 74 | $ ./cachequery.py -h 75 | 76 | [!] ./cachequery [options] 77 | 78 | Options: 79 | -h --help 80 | -i --interactive 81 | -v --verbose 82 | 83 | -c --config=filename path to filename with config (default: 'cachequery.ini') 84 | -b --batch path to filename with list of commands 85 | -o --output path to output file for session log 86 | 87 | -l --level target cache level: L3|L2|L1 88 | -s --set target cache set number 89 | ``` 90 | 91 | By default it loads `tool/cachequery.ini` configuration file. 92 | 93 | 94 | Current support for `i7-4790`, `i5-6500` (default), and `i7-8550u`. Add header file in `config/` and build with corresponding `make cpu=iX-yyyy`. 95 | 96 | ## Uninstall 97 | ``` 98 | $ make uninstall 99 | ``` 100 | 101 | # MemBlockLang 102 | 103 | Simple language to facilitate manual writing of cache queries. 104 | 105 | A query is a sequence of one or more memory operations. Each memory operation is specified as a *block* (represented by arbitary identifiers), and it is decorated with an optional *tag* (`?` for profiling, or `!` for flushing, no tag means just access). 106 | 107 | MBL features several macros: 108 | 109 | * *Expansion* macro `@`, that produces a sequence of associativity many different blocks in increasing orders. For example, for associativity 8, `@` expands to `a b c d e f g h`. 110 | * A *wildcard* macro `_`, that produces associativity many different queries, each one consisting of a different block. For example, for associativity 8, `_` expands to the set of single-block queries `a, b, c, d, e, f, g, h`. 111 | * Concatenation of queries is implicit. 112 | * An *extension* macro, `s1 [s2]` that takes as input queries `s1` and `s2` and creates `|s2|` copies of `s1` extending each of them with a different element of `s2`. For example, `(a b c d)[e f]` expands to `a b c d e, a b c d f`. 113 | * A *power* operator, `(s1)N` that repeats a query `n` times. For example, `(a b c)3` expands to `a b c a b c a b c`. 114 | * A tag over `(s1)` or `[s1]` applies to every block. For example, `(a b)?` expands to `a? b?`. 115 | 116 | Extensions: 117 | 118 | * A single `!` without a preceding block executes `wbinvd`. 119 | 120 | # Reduce system's noise 121 | 122 | Install `msr-tool` and `acpi-cpufreq` and load the modules with `modprobe`. 123 | 124 | Set options to `True` in `tool/cachequery.ini` to load modules and enable/disable noise by default. 125 | 126 | ## Disable multi-core and hyperthreading 127 | 128 | Disable: `echo 0 | sudo tee /sys/devices/system/cpu/cpu*/online` 129 | 130 | Enable: `echo 1 | sudo tee /sys/devices/system/cpu/cpu*/online` 131 | 132 | ## HW prefetching 133 | 134 | Disable: `wrmsr -a 0x1a4 15` 135 | 136 | Enable: `wrmsr -a 0x1a4 0` 137 | 138 | ## Turbo Boost 139 | 140 | Disable: `wrmsr -a 0x1a0 0x4000850089` 141 | 142 | Enable: `wrmsr -a 0x1a0 0x850089` 143 | 144 | ## Disable frequency scaling 145 | 146 | Recommended when using RDTSC. 147 | 148 | Disable: `sudo cpupower frequency-set -d 2000MHz; sudo cpupower frequency-set -u 2000MHz` 149 | 150 | Enable: `sudo cpupower frequency-set -d 1Mhz; sudo cpupower frequency-set -u 5000MHz` (use hw default limits) 151 | 152 | ## CAT of L3 cache sets 153 | 154 | Reduce to assoc 4: `wrmsr -a 0xc90 0x000f` 155 | 156 | Restore to assoc 16: `wrmsr -a 0xc90 0xffff` 157 | -------------------------------------------------------------------------------- /src/config.c: -------------------------------------------------------------------------------- 1 | #include "../include/config.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "../config/settings.h" 7 | #include "../include/cachequery.h" 8 | 9 | #define WRAP_GET_UINT(name) ssize_t wrap_get_##name(Config *conf, char *input) { return (ssize_t) sprintf (input, "%u\n", get_##name(conf)); } 10 | #define WRAP_SET_UINT(name) void wrap_set_##name(Config *conf, char *input, ssize_t count) { unsigned int res; if (kstrtouint(input,10,&res) == 0) { set_##name(conf, res);} } 11 | 12 | // TO FIX: Not working with negatives 13 | #define WRAP_GET_INT(name) ssize_t wrap_get_##name(Config *conf, char *input) { return (ssize_t) sprintf (input, "%d\n", get_##name(conf)); } 14 | #define WRAP_SET_INT(name) void wrap_set_##name(Config *conf, char *input, ssize_t count) { int res; if (kstrtoint(input,10,&res) == 0) { set_##name(conf, res);} } 15 | 16 | #define WRAP_GET_BOOL(name) ssize_t wrap_get_##name(Config *conf, char *input) { return (ssize_t) sprintf (input, "%x\n", get_##name(conf)); } 17 | #define WRAP_SET_BOOL(name) void wrap_set_##name(Config *conf, char *input, ssize_t count) { unsigned char res; if (input[0] == '0') { res = 0; } else { res = 1; } set_##name(conf, res); } 18 | 19 | /* Wrappers to parse/return strings */ 20 | // TODO: refactor macros, still missing some for enums 21 | WRAP_GET_UINT(max_access_time) 22 | WRAP_SET_UINT(max_access_time); 23 | WRAP_GET_UINT(num_calibrations) 24 | WRAP_SET_UINT(num_calibrations); 25 | WRAP_GET_UINT(num_repetitions); 26 | WRAP_SET_UINT(num_repetitions); 27 | WRAP_GET_BOOL(tlb_preload); 28 | WRAP_SET_BOOL(tlb_preload); 29 | WRAP_GET_INT(thrash_set); 30 | WRAP_SET_INT(thrash_set); 31 | WRAP_GET_UINT(thrash_size); 32 | WRAP_SET_UINT(thrash_size); 33 | WRAP_GET_BOOL(only_one_time); 34 | WRAP_SET_BOOL(only_one_time); 35 | WRAP_GET_BOOL(use_pmc); 36 | WRAP_SET_BOOL(use_pmc); 37 | WRAP_GET_BOOL(core_cycles); 38 | WRAP_SET_BOOL(core_cycles); 39 | WRAP_GET_UINT(l3_hit_threshold); 40 | WRAP_SET_UINT(l3_hit_threshold); 41 | WRAP_GET_UINT(l3_miss_threshold); 42 | WRAP_SET_UINT(l3_miss_threshold); 43 | WRAP_GET_UINT(l2_hit_threshold); 44 | WRAP_SET_UINT(l2_hit_threshold); 45 | WRAP_GET_UINT(l2_miss_threshold); 46 | WRAP_SET_UINT(l2_miss_threshold); 47 | WRAP_GET_UINT(l1_hit_threshold); 48 | WRAP_SET_UINT(l1_hit_threshold); 49 | WRAP_GET_UINT(l1_miss_threshold); 50 | WRAP_SET_UINT(l1_miss_threshold); 51 | 52 | static t_property conf_properties[] = { 53 | { .name = "max_access_time", .cb_read = &wrap_get_max_access_time, .cb_write = &wrap_set_max_access_time }, 54 | { .name = "num_calibrations", .cb_read = &wrap_get_num_calibrations, .cb_write = &wrap_set_num_calibrations }, 55 | { .name = "num_repetitions", .cb_read = &wrap_get_num_repetitions, .cb_write = &wrap_set_num_repetitions }, 56 | { .name = "tlb_preload", .cb_read = &wrap_get_tlb_preload, .cb_write = &wrap_set_tlb_preload }, 57 | { .name = "thrash_set", .cb_read = &wrap_get_thrash_set, .cb_write = &wrap_set_thrash_set }, 58 | { .name = "thrash_size", .cb_read = &wrap_get_thrash_size, .cb_write = &wrap_set_thrash_size }, 59 | { .name = "only_one_time", .cb_read = &wrap_get_only_one_time, .cb_write = &wrap_set_only_one_time}, 60 | { .name = "use_pmc", .cb_read = &wrap_get_use_pmc, .cb_write = &wrap_set_use_pmc }, 61 | { .name = "core_cycles", .cb_read = &wrap_get_core_cycles, .cb_write = &wrap_set_core_cycles }, 62 | { .name = "debug", .cb_read = NULL, .cb_write = NULL }, 63 | { .name = "serialization", .cb_read = NULL, .cb_write = NULL }, 64 | { .name = "l3_hit_threshold", .cb_read = &wrap_get_l3_hit_threshold, .cb_write = &wrap_set_l3_hit_threshold }, 65 | { .name = "l3_miss_threshold", .cb_read = &wrap_get_l3_miss_threshold, .cb_write = &wrap_set_l3_miss_threshold }, 66 | { .name = "l2_hit_threshold", .cb_read = &wrap_get_l2_hit_threshold, .cb_write = &wrap_set_l2_hit_threshold }, 67 | { .name = "l2_miss_threshold", .cb_read = &wrap_get_l2_miss_threshold, .cb_write = &wrap_set_l2_miss_threshold }, 68 | { .name = "l1_hit_threshold", .cb_read = &wrap_get_l1_hit_threshold, .cb_write = &wrap_set_l1_hit_threshold }, 69 | { .name = "l1_miss_threshold", .cb_read = &wrap_get_l1_miss_threshold, .cb_write = &wrap_set_l1_miss_threshold }, 70 | }; 71 | 72 | ssize_t conf_show_property (Config *conf, unsigned int index, char *buf) 73 | { 74 | // write string on buf and return number of bytes written 75 | if (conf_properties[index].cb_read) 76 | { 77 | return conf_properties[index].cb_read(conf, buf); 78 | } 79 | return 0; 80 | } 81 | 82 | ssize_t conf_store_property (Config *conf, unsigned int index, char *buf, ssize_t count) 83 | { 84 | // read from buf and return bytes read 85 | if (conf_properties[index].cb_write) 86 | { 87 | conf_properties[index].cb_write(conf, buf, count); 88 | } 89 | return 0; 90 | } 91 | 92 | /* Initialize config with default values */ 93 | void init_config(Config *conf) 94 | { 95 | conf->properties = conf_properties; 96 | conf->length = sizeof(conf_properties)/sizeof(t_property); 97 | set_max_access_time(conf, MAX_TIME); 98 | set_num_calibrations(conf, NUM_CALIBRATIONS); 99 | set_num_repetitions(conf, NUM_REPETITIONS); 100 | set_tlb_preload(conf, TLB_PRELOAD); 101 | set_thrash_set(conf, THRASH_SET); 102 | set_thrash_size(conf, THRASHING_SIZE); 103 | set_only_one_time(conf, ONLY_ONE_TIME); 104 | set_use_pmc(conf, USE_PMC); 105 | set_core_cycles(conf, CORE_CYCLES); 106 | set_debug(conf, ENABLED); // TODO 107 | set_serialization(conf, MFENCE); // TODO 108 | set_l3_hit_threshold(conf, L3_HIT_THRESHOLD); 109 | set_l3_miss_threshold(conf, L3_MISS_THRESHOLD); 110 | set_l2_hit_threshold(conf, L2_HIT_THRESHOLD); 111 | set_l2_miss_threshold(conf, L2_MISS_THRESHOLD); 112 | set_l1_hit_threshold(conf, L1_HIT_THRESHOLD); 113 | set_l1_miss_threshold(conf, L1_MISS_THRESHOLD); 114 | } 115 | 116 | /* Getters and setters */ 117 | unsigned int get_max_access_time(Config *conf) 118 | { 119 | return conf->max_access_time; 120 | } 121 | void set_max_access_time(Config *conf, unsigned int max_access_time) 122 | { 123 | conf->max_access_time = max_access_time; 124 | } 125 | 126 | unsigned int get_num_calibrations(Config *conf) 127 | { 128 | return conf->num_calibrations; 129 | } 130 | void set_num_calibrations(Config *conf, unsigned int num_calibrations) 131 | { 132 | conf->num_calibrations = num_calibrations; 133 | } 134 | 135 | unsigned int get_num_repetitions(Config *conf) 136 | { 137 | return conf->num_repetitions; 138 | } 139 | void set_num_repetitions(Config *conf, unsigned int num_repetitions) 140 | { 141 | conf->num_repetitions = num_repetitions; 142 | } 143 | 144 | unsigned char get_tlb_preload(Config *conf) 145 | { 146 | return conf->tlb_preload; 147 | } 148 | void set_tlb_preload(Config *conf, unsigned char tlb_preload) 149 | { 150 | conf->tlb_preload = tlb_preload; 151 | } 152 | 153 | int get_thrash_set(Config *conf) 154 | { 155 | return conf->thrash_set; 156 | } 157 | void set_thrash_set(Config *conf, int thrash_set) 158 | { 159 | conf->thrash_set = thrash_set; 160 | } 161 | 162 | unsigned int get_thrash_size(Config *conf) 163 | { 164 | return conf->thrash_size; 165 | } 166 | void set_thrash_size(Config *conf, unsigned int thrash_size) 167 | { 168 | conf->thrash_size = thrash_size; 169 | } 170 | 171 | unsigned char get_only_one_time(Config *conf) 172 | { 173 | return conf->only_one_time; 174 | } 175 | void set_only_one_time(Config *conf, unsigned char only_one_time) 176 | { 177 | conf->only_one_time = only_one_time; 178 | } 179 | 180 | unsigned char get_use_pmc(Config *conf) 181 | { 182 | return conf->use_pmc; 183 | } 184 | void set_use_pmc(Config *conf, unsigned char use_pmc) 185 | { 186 | conf->use_pmc = use_pmc; 187 | } 188 | 189 | unsigned char get_core_cycles(Config *conf) 190 | { 191 | return conf->core_cycles; 192 | } 193 | void set_core_cycles(Config *conf, unsigned char core_cycles) 194 | { 195 | conf->core_cycles = core_cycles; 196 | } 197 | 198 | enum Debug get_debug(Config *conf) 199 | { 200 | return conf->debug; 201 | } 202 | void set_debug(Config *conf, enum Debug debug) 203 | { 204 | conf->debug = debug; 205 | } 206 | 207 | enum Serialization get_serialization(Config *conf) 208 | { 209 | return conf->serialization; 210 | } 211 | void set_serialization(Config *conf, enum Serialization serialization) 212 | { 213 | conf->serialization = serialization; 214 | } 215 | 216 | unsigned int get_l3_hit_threshold(Config *conf) 217 | { 218 | return conf->l3_hit_threshold; 219 | } 220 | void set_l3_hit_threshold(Config *conf, unsigned int t) 221 | { 222 | conf->l3_hit_threshold = t; 223 | } 224 | 225 | unsigned int get_l3_miss_threshold(Config *conf) 226 | { 227 | return conf->l3_miss_threshold; 228 | } 229 | void set_l3_miss_threshold(Config *conf, unsigned int t) 230 | { 231 | conf->l3_miss_threshold = t; 232 | } 233 | 234 | unsigned int get_l2_hit_threshold(Config *conf) 235 | { 236 | return conf->l2_hit_threshold; 237 | } 238 | void set_l2_hit_threshold(Config *conf, unsigned int t) 239 | { 240 | conf->l2_hit_threshold = t; 241 | } 242 | 243 | unsigned int get_l2_miss_threshold(Config *conf) 244 | { 245 | return conf->l2_miss_threshold; 246 | } 247 | void set_l2_miss_threshold(Config *conf, unsigned int t) 248 | { 249 | conf->l2_miss_threshold = t; 250 | } 251 | 252 | unsigned int get_l1_hit_threshold(Config *conf) 253 | { 254 | return conf->l1_hit_threshold; 255 | } 256 | void set_l1_hit_threshold(Config *conf, unsigned int t) 257 | { 258 | conf->l1_hit_threshold = t; 259 | } 260 | 261 | unsigned int get_l1_miss_threshold(Config *conf) 262 | { 263 | return conf->l1_miss_threshold; 264 | } 265 | void set_l1_miss_threshold(Config *conf, unsigned int t) 266 | { 267 | conf->l1_miss_threshold = t; 268 | } 269 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/parser.c: -------------------------------------------------------------------------------- 1 | #include "../include/parser.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "../include/x86.h" 7 | #include "../include/msrdrv.h" 8 | #include "../include/config.h" 9 | #include "../include/cache.h" 10 | #include "../include/lists.h" 11 | 12 | static int is_digit_char(char c) { 13 | return c >= '0' && c <= '9'; 14 | } 15 | 16 | static int is_separator_char(char c) 17 | { 18 | return c == '_'; 19 | } 20 | 21 | static int is_query_char(char c) 22 | { 23 | return c == '?'; 24 | } 25 | 26 | static int is_flush_char(char c) 27 | { 28 | return c == '!'; 29 | } 30 | 31 | #define MAX_NUM_DIGITS 6 32 | int str_to_num(const char *ptr, int len) 33 | { 34 | int res; 35 | char buf[MAX_NUM_DIGITS]; 36 | if (!ptr || len > MAX_NUM_DIGITS) 37 | { 38 | return -1; 39 | } 40 | strncpy (buf, ptr, len); 41 | buf[len] = 0; 42 | if (!kstrtoint (buf, 10, &res)) 43 | { 44 | return res; 45 | } 46 | return -1; 47 | } 48 | 49 | block_list *read_block(lexer_state *state, int max_set) 50 | { 51 | const char *left, *right, *eob; 52 | char c; 53 | block_list *t; 54 | 55 | if (!state || !state->ptr || !(state->ptr < state->eob) || (*(state->ptr) == '\0')) 56 | { 57 | return NULL; 58 | } 59 | t = kmalloc (sizeof(block_list), GFP_KERNEL); 60 | if (!t) 61 | { 62 | printk (KERN_INFO "Error on block allocation\n"); 63 | return NULL; 64 | } 65 | left = right = state->ptr; 66 | eob = state->eob; 67 | c = 0; 68 | // ignore padding 69 | while ((left < eob) && (c = *left) && ((c == ' ') || (c == '\n') || (c == '\t'))) left++; 70 | right = left; 71 | if (!(left < eob) || (*left == '\0')) 72 | { 73 | goto sink; 74 | } 75 | if (is_flush_char(c)) 76 | { 77 | t->block.id = 0; // set to something 78 | t->block.ask = 0; 79 | t->block.flush = 0; 80 | t->block.invalidate = 1; 81 | left = ++right; 82 | // insert in list 83 | state->ptr = left; 84 | if (state->tail) 85 | { 86 | state->tail->next = t; 87 | state->tail = t; 88 | } 89 | else 90 | { 91 | state->tail = state->head = t; 92 | } 93 | } 94 | // TODO: remove code to ignore cacheset after _, now is defined by path 95 | // or add proper support to interleave mem accesses 96 | else if (is_digit_char(c)) 97 | { 98 | while ((right < eob) && is_digit_char(c)) c = *(++right); 99 | // assign unique id for name from left-to-right 100 | t->block.id = str_to_num(left, right-left); 101 | left = right; 102 | if (!is_separator_char(c)) { 103 | goto err; 104 | } 105 | left = ++right; 106 | if ((left < eob) && (c = *left) && is_digit_char(c)) 107 | { 108 | while ((right < eob) && is_digit_char(c)) c = *(++right); 109 | t->block.set = str_to_num(left, right-left); 110 | // fix this! :S 111 | if (t->block.set >= max_set) 112 | { 113 | goto err; 114 | } 115 | left = right; 116 | if (is_query_char(c)) 117 | { 118 | t->block.ask = 1; 119 | t->block.flush = 0; 120 | t->block.invalidate = 0; 121 | left = ++right; 122 | } 123 | else if (is_flush_char(c)) 124 | { 125 | t->block.flush = 1; 126 | t->block.ask = 0; 127 | t->block.invalidate = 0; 128 | left = ++right; 129 | } 130 | else 131 | { 132 | t->block.ask = 0; 133 | t->block.flush = 0; 134 | t->block.invalidate = 0; 135 | } 136 | // insert in list 137 | state->ptr = left; 138 | if (state->tail) 139 | { 140 | state->tail->next = t; 141 | state->tail = t; 142 | } 143 | else 144 | { 145 | state->tail = state->head = t; 146 | } 147 | } 148 | else 149 | { 150 | goto err; 151 | } 152 | } 153 | else 154 | { 155 | goto err; 156 | } 157 | t->next = NULL; 158 | return t; 159 | err: 160 | printk (KERN_ERR "Invalid syntax\n"); 161 | sink: 162 | kfree (t); 163 | return NULL; 164 | } 165 | 166 | void lexer_init(lexer_state *lexer) 167 | { 168 | lexer->ptr = NULL; 169 | lexer->eob = NULL; 170 | lexer->size = 0; 171 | lexer->head = NULL; 172 | lexer->tail = NULL; 173 | } 174 | 175 | void lexer_clean(lexer_state *lexer) 176 | { 177 | block_list *t, *tmp; 178 | 179 | if (lexer == NULL) 180 | { 181 | return; 182 | } 183 | t = lexer->head; 184 | while (t) 185 | { 186 | tmp = t->next; 187 | kfree (t); 188 | t = tmp; 189 | } 190 | lexer->size = 0; 191 | lexer->ptr = lexer->eob = NULL; 192 | lexer->tail = lexer->head = NULL; 193 | } 194 | 195 | int parse(lexer_state *lexer, const char *source, int size, int max_set) 196 | { 197 | unsigned int n = 0; 198 | block_list *b; 199 | 200 | // reset for reuse 201 | lexer_clean (lexer); 202 | 203 | lexer->ptr = source; 204 | lexer->eob = source + size; 205 | lexer->size = size; 206 | while ((b = read_block (lexer, max_set))) n++; 207 | return n; 208 | } 209 | 210 | void* allocate_code_pages(struct smart_buffer *code, Block **sets, unsigned char level, int s, int pages) 211 | { 212 | void *ret; 213 | int i, code_l3, code_slice, code_l2, code_l1, count = 0, max_attempts = 10000; 214 | 215 | alloc: 216 | count++; 217 | if (!(ret = __vmalloc (pages*PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_EXEC))) 218 | { 219 | return NULL; 220 | } 221 | 222 | // check that code doesn't interfer with eviction sets 223 | for (i = 0; i < pages && count < max_attempts; i++) 224 | { 225 | code_l3 = get_l3_set ((void*) (ret+(i*PAGE_SIZE))); 226 | code_slice = get_l3_slice ((void*) (ret+(i*PAGE_SIZE))); 227 | code_l2 = get_l2_set ((void*) (ret+(i*PAGE_SIZE))); 228 | code_l1 = get_l1_set ((void*) (ret+(i*PAGE_SIZE))); 229 | 230 | // can't create large portions of code w/o filling l2 231 | if (level == 3) 232 | { 233 | if (((code_l3>>6) == (sets[s]->set3>>6)) && (code_slice == sets[s]->slice)) 234 | { 235 | WPRINT ("[debug] Warning: code interfering with debugged cache set\n"); 236 | vfree (ret); 237 | goto alloc; 238 | } 239 | } 240 | else if (level == 2 || level == 1) 241 | { 242 | if ((((code_l3>>6) == (sets[s]->set3>>6)) && (code_slice == sets[s]->slice)) 243 | || ((code_l2>>6) == (sets[s]->set2>>6))) 244 | { 245 | WPRINT ("[debug] Warning: code interfering with debugged cache set\n"); 246 | vfree (ret); 247 | goto alloc; 248 | } 249 | } 250 | } 251 | 252 | code->start = ret; 253 | code->p = ret; 254 | code->limit = (char*)((unsigned long long)(ret) + pages*PAGE_SIZE); 255 | code->len = 0; 256 | 257 | if (!(count < max_attempts)) 258 | { 259 | PRINT ("[debug] couldn't find a non-interference blob\n"); 260 | } 261 | PRINT ("[debug] code allocation at 0x%p-0x%p\n", code->p, code->limit); 262 | 263 | return ret; 264 | } 265 | 266 | void free_code_pages (struct smart_buffer *code) 267 | { 268 | vfree (code->start); 269 | smart_buffer_init (code); 270 | } 271 | 272 | void smart_buffer_init (struct smart_buffer *code) 273 | { 274 | code->start = NULL; 275 | code->p = NULL; 276 | code->limit = NULL; 277 | code->len = 0; 278 | code->asks = 0; 279 | } 280 | 281 | // Just use this region for all sets to avoid wasting time with dynamic memory 282 | static unsigned int banned_l3_for_l1[L3_CACHE_SETS]; 283 | static unsigned int banned_l3_for_l2[L3_CACHE_SETS]; 284 | static unsigned int banned_l2_for_l1[L2_CACHE_SETS]; 285 | static Block* query_blocks[128]; 286 | 287 | // copy array of opcodes (x86.h) into buffer if still fits, if not return -1 288 | int opcode(struct smart_buffer *code, char *ops, size_t len) 289 | { 290 | if ((code->p + len) >= code->limit) 291 | { 292 | return -1; 293 | } 294 | memcpy (code->p, ops, len); 295 | code->p += len; 296 | code->len += len; 297 | return 0; 298 | } 299 | 300 | int generate_code(lexer_state *lexer, unsigned int target_set, struct smart_buffer *code, Block **sets, unsigned char level, int t_up, int t_low, Config *conf) 301 | { 302 | block_list *b; 303 | Block *tmp, *sub; 304 | int i, w1 = 0, w2 = 0; 305 | size_t asks = 0; 306 | int error_label = 0, jmp_at, s; 307 | unsigned long long err_code = -1; 308 | Block *others; 309 | 310 | // Check lexer state 311 | if (!lexer || !lexer->head || !lexer->tail) 312 | { 313 | goto err; 314 | } 315 | b = lexer->head; 316 | 317 | // If necessary, compute L2 eviction sets on demand 318 | s = target_set; 319 | if (level == 3 && sets[s]->evict2 == NULL) 320 | { 321 | tmp = sets[s]; 322 | while (tmp) 323 | { 324 | find_l2_eviction_set (tmp, get_sets_l2()); 325 | tmp = tmp->next; 326 | } 327 | } 328 | 329 | // If cacheset had code, free it and generate new 330 | if (code->start != NULL) 331 | { 332 | free_code_pages (code); 333 | } 334 | 335 | if (allocate_code_pages (code, sets, level, s, JIT_CODE_SZ) == NULL) 336 | { 337 | goto err; 338 | } 339 | 340 | // Preamble 341 | OPCODE(code, PUSH_RBP()); 342 | OPCODE(code, MOV_RBP_RSP()); 343 | OPCODE(code, PUSH_RBX()); 344 | 345 | jmp_at = 15; 346 | OPCODE(code, JMP_SHORT(jmp_at)); // skip error handler 347 | // exception handler 348 | error_label = code->len; 349 | OPCODE(code, MOV_RAX_CT(err_code)); 350 | OPCODE(code, POP_RBX()); 351 | OPCODE(code, POP_RBP()); 352 | OPCODE(code, RETQ()); 353 | // end-of-exception (total size: 2 + 10 + 1 + 1 + 1) 354 | 355 | OPCODE(code, XOR_RSI_RSI()); // set return value to zero 356 | 357 | // Identify cache sets that need to be avoided by eviction sets 358 | memset (banned_l3_for_l1, 0, sizeof(banned_l3_for_l1)); 359 | memset (banned_l3_for_l2, 0, sizeof(banned_l3_for_l2)); 360 | memset (banned_l2_for_l1, 0, sizeof(banned_l2_for_l1)); 361 | memset (query_blocks, 0, sizeof(query_blocks)); 362 | 363 | // ban sets used by code to avoid them when forming eviction sets 364 | for (i = 0; i < JIT_CODE_SZ; i+=sizeof(Block)) 365 | { 366 | banned_l3_for_l2[get_l3_set ((void*) (code->start+i)) | get_l3_slice ((void*) (code->start+i))]++; 367 | banned_l3_for_l1[get_l3_set ((void*) (code->start+i)) | get_l3_slice ((void*) (code->start+i))]++; 368 | banned_l2_for_l1[get_l2_set ((void*) (code->start+i))]++; 369 | } 370 | 371 | // NOTE: RIGHT NOW WE IGNORE THE CACHE_SET 372 | // Safe pointers to query blocks 373 | tmp = sets[s]; 374 | 375 | while (b) 376 | { 377 | if (query_blocks[b->block.id]) 378 | { 379 | b = b->next; 380 | continue; 381 | } 382 | while (tmp) 383 | { 384 | // if block doesn't collide in lower levels with previous query block 385 | // (for L2 blocks we fix slice=4) 386 | if (((level == 1) && banned_l3_for_l1[(tmp->set3 << L3_SLICE_BITS) | tmp->slice] == 0 && banned_l2_for_l1[tmp->set2] == 0) 387 | || ((level == 2) && (tmp->slice == L3_CACHE_SLICES-1) && banned_l3_for_l2[(tmp->set3 << L3_SLICE_BITS) | tmp->slice] == 0) 388 | || (level == 3)) 389 | { 390 | break; 391 | } 392 | else 393 | { 394 | tmp = tmp->next; 395 | } 396 | } 397 | if (!tmp) 398 | { 399 | goto beach; 400 | } 401 | // store it 402 | query_blocks[b->block.id] = tmp; 403 | // ban it 404 | if (tmp->evict1_sz > 0) 405 | { 406 | banned_l3_for_l1[((tmp->set3 << L3_SLICE_BITS) | tmp->slice)]++; 407 | banned_l2_for_l1[tmp->set2]++; 408 | } 409 | if (tmp->evict2_sz > 0) 410 | { 411 | banned_l3_for_l2[((tmp->set3 << L3_SLICE_BITS) | tmp->slice)]++; 412 | } 413 | // next 414 | tmp = tmp->next; 415 | b = b->next; 416 | } 417 | 418 | // Invalidate cache hierarchy 419 | // OPCODE(code, WBINVD()); 420 | 421 | // Access and flush all blocks in set 422 | tmp = sets[s]; 423 | while (tmp) 424 | { 425 | OPCODE(code, LOAD_RAX((unsigned long long)tmp)); 426 | OPCODE(code, SERIALIZE()); 427 | OPCODE(code, LOAD_RAX((unsigned long long)tmp)); 428 | OPCODE(code, SERIALIZE()); 429 | OPCODE(code, LOAD_RAX((unsigned long long)tmp)); 430 | OPCODE(code, SERIALIZE()); 431 | tmp = tmp->next; 432 | } 433 | tmp = sets[s]; 434 | while (tmp) 435 | { 436 | OPCODE(code, MOV_RAX_CT((unsigned long long)tmp)); 437 | OPCODE(code, CLFLUSH_RAX()); 438 | tmp = tmp->next; 439 | } 440 | OPCODE(code, SERIALIZE()); 441 | 442 | PRINT("[debug] ---------------------------------\n"); 443 | 444 | // Start query 445 | b = lexer->head; 446 | while (b) 447 | { 448 | // Set pointer into right block 449 | tmp = query_blocks[b->block.id]; 450 | if (!tmp) 451 | { 452 | goto beach; 453 | } 454 | 455 | OPCODE(code, CPUID()); 456 | 457 | if (b->block.invalidate) 458 | { 459 | PRINT ("[debug] invalidate cache\n"); 460 | OPCODE(code, WBINVD()); 461 | OPCODE(code, SERIALIZE()); 462 | goto cont; 463 | } 464 | // If a flush req 465 | else if (b->block.flush) 466 | { 467 | PRINT ("[debug] flush %p (%u):\ts1=%d\ts2=%d\ts3=%d\th=%d\n", 468 | tmp, b->block.id, tmp->set1, tmp->set2, tmp->set3, tmp->slice); 469 | OPCODE(code, MOV_RAX_CT((unsigned long long)tmp)); 470 | OPCODE(code, CLFLUSH_RAX()); 471 | OPCODE(code, SERIALIZE()); 472 | goto cont; // continue w/o accessing block 473 | } 474 | // Do we profile this memory access? 475 | else if ((get_only_one_time(conf) && b->next == NULL) 476 | || (b->block.ask && asks < 63)) 477 | { 478 | // Perform TLB preload if set 479 | if (get_tlb_preload(conf) && tmp->evict1_sz > 0) 480 | { 481 | unsigned long long tlb = ((unsigned long long)tmp); 482 | tlb = ((tlb >> 12) << 12) | ((tlb & 0xfff) ^ 0x7e0); // same page, different cache set 483 | PRINT("[debug] tlb refresh %llx s1=%d s2=%d s3=%d h=%d\n", 484 | tlb, get_l1_set((void*)tlb), get_l2_set((void*)tlb), get_l3_set((void*)tlb), get_l3_slice((void*)tlb)); 485 | OPCODE(code, LOAD_RAX(tlb)); 486 | OPCODE(code, SERIALIZE()); 487 | } 488 | if (get_use_pmc(conf) || get_core_cycles(conf)) 489 | { 490 | // Reset performance counters 491 | RESET_PMC0(code); // (wrmsr serializes) 492 | } 493 | else 494 | { 495 | // RAX,RDX <- RDTSC 496 | // RDI <- RAX | (RDX << 32) 497 | MEASURE_PRE_TSC(code); 498 | } 499 | } 500 | 501 | // Access block 502 | OPCODE(code, LOAD_RAX((unsigned long long)tmp)); 503 | 504 | // Do we profile this memory access? 505 | if ((get_only_one_time(conf) && b->next == NULL && !b->block.flush) 506 | || (b->block.ask && asks < 63)) 507 | { 508 | if (get_use_pmc(conf)) 509 | { 510 | // Read performance counters 511 | // rdmsr PCM0 512 | OPCODE(code, SERIALIZE()); 513 | OPCODE(code, MOV_ECX_DWORD(_MSR_IA32_PMC0, 0x00, 0x00, 0x00)); 514 | OPCODE(code, RDMSR()); 515 | if (!get_only_one_time(conf)) 516 | { 517 | // if counter was increased 518 | OPCODE(code, MOV_RDI_DWORD(0x01, 0x00, 0x00, 0x00)); 519 | OPCODE(code, CMP_EAX_EDI()); 520 | // update bit 521 | OPCODE(code, CMOVAE_EAX_EDI()); 522 | OPCODE(code, SHL_RSI()); 523 | } 524 | else 525 | { 526 | // result already in RAX 527 | } 528 | } 529 | else 530 | { 531 | if (get_core_cycles(conf)) 532 | { 533 | // RDI <- cycle since RESET_PMC() 534 | MEASURE_POST_CORE(code); 535 | } 536 | else 537 | { 538 | // RAX,RDX <- RDTSC 539 | // RDI <- RDI - RAX 540 | // NEG RDI 541 | MEASURE_POST_TSC(code); 542 | } 543 | if (!get_only_one_time(conf)) 544 | { 545 | // Compare with threshold and update result bitmask 546 | OPCODE(code, MOV_RDX_RDI()); 547 | OPCODE(code, XOR_RAX_RAX()); 548 | OPCODE(code, MOV_RDI_DWORD(0x01, 0x00, 0x00, 0x00)); 549 | // if time less or equal than t_low goto err 550 | OPCODE(code, CMP_RDX_CT(t_low)); 551 | jmp_at = error_label - code->len; // calc relative position 552 | OPCODE(code, JBE_NEAR(jmp_at)); 553 | // if time above or equal than t_up set bit to 1 554 | OPCODE(code, CMP_RDX_CT(t_up)); 555 | // update bit 556 | OPCODE(code, CMOVAE_RAX_RDI()); 557 | OPCODE(code, SHL_RSI()); 558 | } 559 | else 560 | { 561 | // Just return the load time delay 562 | OPCODE(code, MOV_RAX_RDI()); 563 | } 564 | } 565 | // RSI contains ret value, never modified during execution 566 | OPCODE(code, OR_RSI_RAX()); 567 | asks += 1; 568 | } 569 | else 570 | { 571 | OPCODE(code, SERIALIZE()); 572 | } 573 | 574 | PRINT ("[debug] load %p (%u):\ts1=%d\ts2=%d\ts3=%d\th=%d\n", 575 | tmp, b->block.id, tmp->set1, tmp->set2, tmp->set3, tmp->slice); 576 | 577 | // Evict block from L1 and LFB 578 | if (tmp->evict1_sz > 0) 579 | { 580 | for (i = 0, w1 = 0; i < tmp->evict1_sz && w1 < EVICTION_SZ(L1); i++) 581 | { 582 | sub = tmp->evict1[i]; 583 | if ((banned_l3_for_l1[((sub->set3 << L3_SLICE_BITS) | sub->slice)] == 0) 584 | && (banned_l2_for_l1[sub->set2] == 0) && !IS_LEADER_SET(sub->set3,sub->slice)) 585 | { 586 | OPCODE(code, LOAD_RAX((unsigned long long)(sub))); 587 | OPCODE(code, SERIALIZE()); 588 | WPRINT("\teset1: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", sub, sub->set1, sub->set2, sub->set3, sub->slice); 589 | w1 += 1; 590 | } 591 | } 592 | if (tmp->evict2_sz < 1) // only if in L2 593 | { 594 | // In theory fences solve this, but keep it for now 595 | for (; i < tmp->evict1_sz && w1 < EVICTION_SZ(L1) + LFB; i++) 596 | { 597 | sub = tmp->evict1[i]; 598 | if ((banned_l3_for_l1[((sub->set3 << L3_SLICE_BITS) | sub->slice)] == 0) 599 | && (banned_l2_for_l1[sub->set2] == 0)) 600 | { 601 | OPCODE(code, MOV_RAX_CT((unsigned long long)sub)); // move pointer to rax 602 | OPCODE (code, MOVNTDQA_RAX()); // movntqda xmm1, [rax] 603 | WPRINT("\tlfb: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", sub, sub->set1, sub->set2, sub->set3, sub->slice); 604 | w1 += 1; 605 | } 606 | } 607 | OPCODE(code, SERIALIZE()); 608 | } 609 | if (i == tmp->evict1_sz) 610 | { 611 | printk (KERN_INFO "[info] Needs more blocks to evict L1D + LFB\n"); 612 | } 613 | } 614 | 615 | // Evict block from L2 and SQ (must be same slice) 616 | if (tmp->evict2_sz > 0) 617 | { 618 | for (i = 0, w2 = 0; i < tmp->evict2_sz && w2 < EVICTION_SZ(L2) * 2; i++) 619 | { 620 | sub = tmp->evict2[i]; 621 | // L2 policy is complex in some CPUs, we double access to improve eviction 622 | if ((banned_l3_for_l2[((sub->set3 << L3_SLICE_BITS) | sub->slice)] == 0) && (!IS_LEADER_SET(sub->set3, sub->slice) || IS_LEADER_SET(tmp->set3, tmp->slice))) 623 | { 624 | OPCODE(code, LOAD_RAX((unsigned long long)(sub))); 625 | OPCODE(code, SERIALIZE()); 626 | OPCODE(code, LOAD_RAX((unsigned long long)(sub))); 627 | OPCODE(code, SERIALIZE()); 628 | WPRINT("\teset2: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", sub, sub->set1, sub->set2, sub->set3, sub->slice); 629 | w2 += 1; 630 | } 631 | } 632 | // In theory fences solve this, but keep it for now 633 | for (; i < tmp->evict2_sz && w2 < EVICTION_SZ(L2) + SQ; i++) 634 | { 635 | sub = tmp->evict2[i]; 636 | if (banned_l3_for_l2[((sub->set3 << L3_SLICE_BITS) | sub->slice)] == 0) 637 | { 638 | //OPCODE(code, LOAD_RAX((unsigned long long)(sub))); 639 | OPCODE(code, MOV_RAX_CT((unsigned long long)sub)); // move pointer to rax 640 | OPCODE (code, MOVNTDQA_RAX()); // movntqda xmm1, [rax] 641 | WPRINT("\tsq: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", sub, sub->set1, sub->set2, sub->set3, sub->slice); 642 | w2 += 1; 643 | } 644 | } 645 | OPCODE(code, SERIALIZE()); 646 | if (i == tmp->evict2_sz) 647 | { 648 | printk (KERN_INFO "[info] Need more blocks to evict L2 + SQ\n"); 649 | } 650 | } 651 | PRINT ("[debug] ev1=%d\tev2=%d\n", w1, w2); 652 | 653 | // Thrash leader set 654 | if (get_thrash_set(conf) > -1 && tmp->evict2_sz > 0) 655 | { 656 | others = sets[get_thrash_set(conf)]; // should check bounds 657 | for (i = 0; i < get_thrash_size(conf) && others != NULL; i++) 658 | { 659 | OPCODE(code, LOAD_RAX((unsigned long long)others)); 660 | OPCODE(code, SERIALIZE()); 661 | WPRINT("\tthrashing: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", others, others->set1, others->set2, others->set3, others->slice); 662 | others = others->next; 663 | } 664 | PRINT("[debug] thrash set %d with %d addresses...\n", get_thrash_set(conf), get_thrash_size(conf)); 665 | } 666 | 667 | cont: 668 | b = b->next; 669 | } 670 | // epilogue 671 | OPCODE(code, MOV_RAX_RSI()); // ret value 672 | OPCODE(code, POP_RBX()); 673 | OPCODE(code, POP_RBP()); 674 | OPCODE(code, RETQ()); 675 | 676 | PRINT ("[debug] code length=%zu bytes\n", code->len); 677 | PRINT("[debug] ---------------------------------\n"); 678 | 679 | code->asks = asks; 680 | return code->len; 681 | 682 | beach: 683 | free_code_pages (code); 684 | err: 685 | PRINT ("[debug] err: code length=%zu bytes\n", code->len); 686 | return -1; 687 | } 688 | 689 | 690 | -------------------------------------------------------------------------------- /tool/cachequery.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from lark import Lark, Transformer, v_args 4 | from functools import reduce 5 | from datetime import datetime 6 | import os, cmd, sys, getopt, re, subprocess, configparser 7 | import plyvel 8 | 9 | LOG_HEADER_TEMPLATE = '''==================================== 10 | CacheQuery log - {} 11 | ====================================''' 12 | 13 | class CacheQuery(): 14 | 15 | def __init__(self, conf): 16 | self.settings = conf['General'] 17 | self.system = conf['System'] 18 | self.conf = conf 19 | 20 | # open db from beggining 21 | if self.settings['db_cache']: 22 | self.db = plyvel.DB(self.settings['db_cache'], create_if_missing=True) 23 | else: 24 | self.db = None 25 | 26 | try: 27 | self.check_system() 28 | self.disable_noise() 29 | pass 30 | except: 31 | print("[!] Error: invalid system settings") 32 | sys.exit() 33 | 34 | def check_system(self): 35 | # if not os.access('/sys/kernel/cachequery', os.R_OK): 36 | # print("err: cachequery.ko") 37 | # raise 38 | if self.system.getboolean('disable_prefetch') or self.system.getboolean('disable_turboboost'): 39 | try: 40 | if os.system('sudo modprobe msr') != 0: 41 | print("err: modprobe msr") 42 | raise 43 | except: 44 | print("err: exec modprobe msr") 45 | raise 46 | if self.system.getint('frequency_set') > 0: 47 | try: 48 | if os.system('sudo modprobe acpi-cpufreq') != 0: 49 | print("err: modprobe acpi-cpufreq") 50 | raise 51 | except: 52 | print("err: modprobe acpi-cpufreq") 53 | raise 54 | 55 | def _exec(self, command): 56 | p=subprocess.Popen(command, shell=True, stdout=subprocess.DEVNULL) 57 | p.wait() 58 | return p.returncode 59 | 60 | def disable_noise(self): 61 | try: 62 | if self.system.getboolean('disable_ht'): 63 | if self._exec('echo 0 | sudo tee /sys/devices/system/cpu/cpu*/online') == 0: 64 | print(" [debug] disabling hyperthreading and multi-core") 65 | else: 66 | print(" ERR: hyperthreads", file=sys.stderr) 67 | if self.system.getboolean('disable_prefetch'): 68 | if self._exec('sudo wrmsr -a 0x1a4 15') == 0: 69 | print(" [debug] disabling hardware prefetchers") 70 | else: 71 | print(" ERR: hw prefetch", file=sys.stderr) 72 | if self.system.getboolean('disable_turboboost'): 73 | if self._exec('sudo wrmsr -a 0x1a0 0x4000850089') == 0: 74 | print(" [debug] disabling intel's turboboost") 75 | else: 76 | print(" ERR: turboboost", file=sys.stderr) 77 | freq = self.system.getint('frequency_set') 78 | if freq > 100: 79 | if self._exec('sudo cpupower frequency-set -d {}MHz'.format(freq)) == 0 and self._exec('sudo cpupower frequency-set -u {}MHz'.format(freq)) == 0 and self._exec('sudo cpupower frequency-set -g performance') == 0: 80 | print(" [debug] fixing cpu frequency") 81 | else: 82 | print(" ERR: fix freq", file=sys.stderr) 83 | except: 84 | raise 85 | 86 | def reenable_noise(self): 87 | try: 88 | if self.system.getboolean('disable_prefetch'): 89 | if self._exec('sudo wrmsr -a 0x1a4 0') != 0: 90 | print(" ERR: hw prefetch", file=sys.stderr) 91 | if self.system.getboolean('disable_turboboost'): 92 | if self._exec('sudo wrmsr -a 0x1a0 0x850089') != 0: 93 | print(" ERR: turboboost", file=sys.stderr) 94 | if int(self.system['frequency_set']) > 100: 95 | if self._exec('sudo cpupower frequency-set -d 1MHz') != 0 or self._exec('sudo cpupower frequency-set -u 5000MHz') != 0 or self._exec('sudo cpupower frequency-set -g powersave') != 0: 96 | print(" ERR: fix freq") 97 | if self.system.getboolean('disable_ht'): 98 | if self._exec('echo 1 | sudo tee /sys/devices/system/cpu/cpu*/online') != 0: 99 | print(" ERR: hyperthreads", file=sys.stderr) 100 | except: 101 | raise 102 | 103 | def parse(self, input): 104 | 105 | memblock_grammar = ''' 106 | ID: /[a-zA-Z]+/ 107 | QUESTION: "?" 108 | FLUSH: "!" 109 | NUMBER: /[0-9]+/ 110 | 111 | SPACE: (" " | /\t/ )+ 112 | 113 | AT: "@" 114 | WILDCARD: "_" 115 | INVD: "!" 116 | 117 | flag: (QUESTION | FLUSH) -> string 118 | 119 | block: ID -> trans 120 | 121 | addresses: block -> first 122 | | addresses SPACE block -> append 123 | 124 | group: block -> singleton 125 | | "(" addresses ")" -> group 126 | | "[" addresses "]" -> parallel 127 | | AT -> expand 128 | | WILDCARD -> unfold 129 | 130 | query: group flag? -> modify 131 | | group NUMBER flag? -> modify_power 132 | | INVD -> invalidate 133 | 134 | trace: query -> list 135 | | trace SPACE query -> concat 136 | 137 | start: trace -> out 138 | 139 | ''' 140 | 141 | class MemBlockToQuery(Transformer): 142 | 143 | def __init__(self, alfa, conf, debug=False): 144 | self.dir = [] 145 | self.alphabet = alfa 146 | self.ways = int(conf.cache('ways')) 147 | self.cacheset = int(conf.cache('set')) 148 | self.debug = debug 149 | 150 | def print(self, *args): 151 | if self.debug: 152 | print(*args) 153 | 154 | # Cast Tree to string 155 | @v_args(inline=True) 156 | def string(self, _): 157 | return str(_) 158 | 159 | # Cast Tree to list 160 | @v_args(inline=True) 161 | def list(self, _): 162 | self.print("list: ", list(_)) 163 | return list(_) 164 | 165 | # First 166 | @v_args(inline=True) 167 | def first(self, _): 168 | self.print("first: ", [_]) 169 | return [_] 170 | 171 | # Propagate identity 172 | @v_args(inline=True) 173 | def id(self, _): 174 | self.print("id: ", _) 175 | return _ 176 | 177 | # Returns block 178 | @v_args(inline=True) 179 | def trans(self, block): 180 | id=str(block) 181 | try: 182 | ret = str(self.dir.index(id)) 183 | except ValueError: 184 | self.dir.append(id) 185 | ret = str(len(self.dir)-1) 186 | self.print("block: ", block, ret) 187 | return ret 188 | 189 | # Appends block into list 190 | @v_args(inline=True) 191 | def append(self, addresses, _, block): 192 | self.print("append: ", addresses, block) 193 | addresses.append(block) 194 | return addresses 195 | 196 | # Returns group of lists 197 | @v_args(inline=True) 198 | def group(self, addresses): 199 | self.print("group: ", [addresses]) 200 | return [addresses] 201 | 202 | # Returns group of lists 203 | @v_args(inline=True) 204 | def singleton(self, addresses): 205 | self.print("singleton: ", [[addresses]]) 206 | return [[addresses]] 207 | 208 | # Returns group of lists 209 | @v_args(inline=True) 210 | def invalidate(self, _): 211 | self.print("invalidate") 212 | return _ 213 | 214 | # Returns group of groups 215 | @v_args(inline=True) 216 | def parallel(self, addresses): 217 | ret = [[g] for g in addresses] 218 | self.print("parallel: ", ret) 219 | return ret 220 | 221 | # Expands '@' into query of assoc blocks 222 | @v_args(inline=True) 223 | def expand(self, _): 224 | tmp = self.alphabet[:self.ways] 225 | ret = [] 226 | for i in tmp: 227 | ret.append(self.trans(i)) 228 | ret = self.group(ret) 229 | self.print("expand: ", ret) 230 | return ret 231 | 232 | # Unfolds '_' into assoc blocks 233 | @v_args(inline=True) 234 | def unfold(self, _): 235 | names = self.alphabet[:self.ways] 236 | ret = [] 237 | for e in names: 238 | try: 239 | ret.append(str(self.dir.index(e))) 240 | except ValueError: 241 | self.dir.append(e) 242 | ret.append(str(len(self.dir)-1)) 243 | ret = self.parallel(ret) 244 | self.print("unfold: ", ret) 245 | return ret 246 | 247 | # Adds flag to groups 248 | @v_args(inline=True) 249 | def modify(self, groups, flag=''): 250 | ret = [] 251 | for g in groups: 252 | if isinstance(g, str): 253 | ret.append(g + flag) 254 | else: 255 | ret.append([(e + flag) for e in g]) 256 | ret = [' '.join(g) for g in ret] 257 | self.print("modify: ", ret) 258 | return ret 259 | 260 | # Applies power and modifies flag to groups 261 | @v_args(inline=True) 262 | def modify_power(self, groups, n, flag=''): 263 | ret = self.modify(groups, flag) * int(n) 264 | ret = [' '.join(ret)] 265 | self.print("modify_power: ", ret) 266 | return ret 267 | 268 | # Converts groups into strings 269 | @v_args(inline=True) 270 | def join(self, queries): 271 | ret = [' '.join(queries)] 272 | self.print("join: ", ret) 273 | return ret 274 | 275 | # Cartesian product of groups 276 | @v_args(inline=True) 277 | def concat(self, traces, _, queries): 278 | ret = [(x+" "+y) for x in traces for y in queries] 279 | self.print("concat: ", ret) 280 | return ret 281 | 282 | 283 | # Flatten final list of queries 284 | @v_args(inline=True) 285 | def out(self, traces): 286 | # append cacheset number to block ids 287 | ret = [[re.sub(r'(\d+)', r'\1_0', b) for b in t.split()] for t in traces] 288 | return [traces, [' '.join(q) for q in ret]] 289 | 290 | 291 | memblock_parser = Lark(memblock_grammar, parser='lalr', transformer=MemBlockToQuery(self.settings['alphabet'], self.conf, debug=False)) 292 | parser = memblock_parser.parse 293 | 294 | try: 295 | parser_query = parser(input) 296 | return parser_query 297 | except Exception: 298 | print("syntax error", file=sys.stderr) 299 | 300 | def query(self, query, refresh=True): 301 | path = '/sys/kernel/cachequery/{}_sets/{}/run'.format(self.settings['level'].lower(), self.conf.cache('set')) 302 | try: 303 | if refresh: 304 | with open(path, 'w') as endpoint: 305 | endpoint.write('{}\n'.format(query)) 306 | endpoint.close() 307 | with open(path, 'r') as endpoint: 308 | res = endpoint.readline() 309 | endpoint.close() 310 | return res.rstrip() 311 | except: 312 | raise 313 | 314 | def command(self, input): 315 | print('({}:{}) r {}'.format(self.settings['level'], self.conf.cache('set'), input)) 316 | self.run(input) 317 | 318 | def run(self, input, bypass_cache=False, refresh=True): 319 | try: 320 | query, query_raw = self.parse(input) 321 | except: 322 | return 323 | answer = [] 324 | for i in range(len(query)): 325 | q_raw = query_raw[i] 326 | q = query[i] 327 | ret = '' 328 | # Check cache to safe avoid real query (only for deterministic policies) 329 | if self.db and not bypass_cache: 330 | ret = self.db.get(str.encode(q)) 331 | if not ret: 332 | ret = self.query(q_raw, refresh) 333 | if self.db: 334 | self.db.put(str.encode(q), str.encode(ret)) 335 | else: 336 | ret = ret.decode('utf-8') 337 | print('{} -> {}'.format(q, ret)) 338 | answer.append('{} -> {}'.format(q, ret)) 339 | return answer 340 | 341 | def interactive(self): 342 | cq = self 343 | 344 | class CQShell(cmd.Cmd): 345 | 346 | intro = 'CacheQuery interactive shell. Type "help" or "?" to list commands.\n' 347 | prompt = '({}:{}) '.format(cq.settings['level'], cq.conf.cache('set')) 348 | response = None 349 | if cq.settings['log_file']: 350 | file = open(cq.settings['log_file'], 'a') 351 | print(LOG_HEADER_TEMPLATE.format(datetime.now().strftime("%Y/%m/%d %H:%M:%S")), file=file) 352 | else: 353 | file = None 354 | 355 | def do_r(self, line): 356 | 'Execute query. Example: r @ M _? (default, `r` is optional)' 357 | self.response = [cq.run(line)] 358 | 359 | def default(self, line): 360 | self.response = [cq.run(line)] 361 | 362 | def do_rr(self, line): 363 | 'Execute query bypassing cache if present. Example: rr @ M _?' 364 | self.response = [cq.run(line, True)] 365 | 366 | def do_set(self, line): 367 | 'Overwrite target cacheset. Example: set 12' 368 | try: 369 | if int(line) > -1: 370 | cq.conf.set_cache('set', line) 371 | except: 372 | pass 373 | # refresh prompt 374 | self.prompt = '({}:{}) '.format(cq.settings['level'], cq.conf.cache('set')) 375 | 376 | def do_level(self, line): 377 | 'Overwrite target level. Example: level L1' 378 | if re.match(r'^L(3|2|1)$', line, re.IGNORECASE) is None: 379 | pass 380 | else: 381 | cq.settings['level'] = line.upper() 382 | # refresh prompt 383 | self.prompt = '({}:{}) '.format(cq.settings['level'], cq.conf.cache('set')) 384 | 385 | def do_R(self, line): 386 | 'Repeat query `config.repeat` times. Example: R @ M N b?' 387 | self.response = [cq.run(line, False, True)] # first to generate code 388 | for i in range(cq.settings.getint('repeat')): 389 | self.response.append(cq.run(line, True, cq.settings.getboolean('refresh'))) # bypass cache 390 | 391 | # log session 392 | def do_log(self, arg): 393 | 'Log session into file. Example: log test.cqlog' 394 | if not self.file: 395 | self.file = open(arg, 'a') 396 | print(LOG_HEADER_TEMPLATE.format(datetime.now().strftime("%Y/%m/%d %H:%M:%S")), file=file) 397 | def do_unlog(self, arg): 398 | 'Stop log session' 399 | self.close() 400 | def close(self): 401 | if self.file: 402 | self.file.close() 403 | self.file = None 404 | if cq.db: 405 | cq.db.close() 406 | cq.db = None 407 | 408 | # execute before any command, write into session log file 409 | def precmd(self, line): 410 | if self.file: 411 | print('{} {}'.format(self.prompt, line), file=self.file) 412 | return line 413 | # execute after command, write into session log file 414 | def postcmd(self, stop, line): 415 | if self.file and self.response: 416 | for answers in self.response: 417 | for r in answers: 418 | print(r, file=self.file) 419 | self.response = None 420 | return stop 421 | 422 | # exit commands 423 | def do_EOF(self, line): 424 | 'Quit' 425 | self.close() 426 | return True 427 | 428 | def do_q(self, line): 429 | 'Quite' 430 | self.close() 431 | return True 432 | 433 | return CQShell() 434 | 435 | def usage(): 436 | help = ''' 437 | [!] ./cachequery [options] 438 | 439 | Options: 440 | -h --help 441 | -i --interactive 442 | -v --verbose 443 | 444 | -c --config=filename path to filename with config (default: 'cachequery.ini') 445 | -b --batch path to filename with list of commands 446 | -o --output path to output file for session log 447 | 448 | -l --level target cache level: L3|L2|L1 449 | -s --set target cache set number 450 | 451 | ''' 452 | print(help) 453 | 454 | def main(): 455 | 456 | try: 457 | opts, args = getopt.getopt(sys.argv[1:], "ho:c:ivb:s:l:", ["help", "output=", "config=", "interactive", "batch=","set=","level="]) 458 | except getopt.GetoptError as err: 459 | print(str(err)) 460 | usage() 461 | sys.exit(2) 462 | 463 | # flags 464 | output = None 465 | verbose = False 466 | interactive = False 467 | # options 468 | config_path = 'cachequery.ini' # default path 469 | batch = None 470 | 471 | # config overwrite 472 | cacheset = None 473 | level = None 474 | 475 | # TODO: allow overwrite some config values 476 | for o, a in opts: 477 | if o == "-v": 478 | verbose = True 479 | elif o in ("-h", "--help"): 480 | usage() 481 | sys.exit() 482 | elif o in ("-c", "--config"): 483 | config_path = a 484 | elif o in ("-i", "--interactive"): 485 | interactive = True 486 | elif o in ("-b", "--batch"): 487 | batch = a 488 | elif o in ("-o", "--output"): 489 | output = a 490 | elif o in ("-l", "--level"): 491 | if re.match(r'^L(3|2|1)$', a, re.IGNORECASE) is None: 492 | assert False, "invalid level" 493 | else: 494 | level = a.upper() 495 | elif o in ("-s", "--set"): 496 | try: 497 | if int(a) > -1: 498 | cacheset = a 499 | except: 500 | assert False, "invalid cacheset" 501 | else: 502 | assert False, "unhandled option" 503 | 504 | # read config 505 | try: 506 | config = configparser.ConfigParser() 507 | config.read(config_path) 508 | # add method for dynamic cache check 509 | def cache(self, prop): 510 | return self.get(self.get('General', 'level'), prop) 511 | def set_cache(self, prop, val): 512 | return self.set(self.get('General', 'level'), prop, val) 513 | setattr(configparser.ConfigParser, 'cache', cache) 514 | setattr(configparser.ConfigParser, 'set_cache', set_cache) 515 | except: 516 | print("[!] Error: invalid config file") 517 | sys.exit(1) 518 | 519 | # overwrite options 520 | if level: 521 | config.set('General', 'level', level) 522 | if cacheset: 523 | config.set_cache('set', cacheset) 524 | if output: 525 | config.set('General', 'log_file', output) 526 | 527 | # instantiate cq 528 | CQ = CacheQuery(config) 529 | 530 | # exec single command 531 | if not interactive and not batch: 532 | if len(args) < 1: 533 | usage() 534 | sys.exit(2) 535 | query = ' '.join(args) 536 | CQ.command(query) 537 | 538 | # run batch of commands from file 539 | elif batch: 540 | try: 541 | with open(batch, 'r') as endpoint: 542 | query = endpoint.readline().rstrip() 543 | CQ.command(query) 544 | endpoint.close() 545 | except: 546 | print("[!] Err: invalid filename") 547 | 548 | # start interactive mod 549 | else: 550 | CQ.interactive().cmdloop() 551 | 552 | # destroyy 553 | CQ.reenable_noise() 554 | 555 | if __name__ == '__main__': 556 | main() 557 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #undef __KERNEL__ 2 | #define __KERNEL__ 3 | #undef MODULE 4 | #define MODULE 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "../include/msrdrv.h" 19 | #include "../include/parser.h" 20 | #include "../include/cache.h" 21 | #include "../include/config.h" 22 | #include "../include/histogram.h" 23 | #include "../include/lists.h" 24 | #include "../include/cachequery.h" 25 | #include "../include/x86.h" 26 | #include "../config/settings.h" 27 | 28 | // Static global vars 29 | static Config conf; 30 | 31 | static char *pool_l3 = NULL; 32 | static char *pool_l2 = NULL; 33 | static char *pool_l1 = NULL; 34 | 35 | static int cal[MAX_TIME]; 36 | static int hist[MAX_TIME]; 37 | 38 | // Main Kobjects 39 | static struct kobject *cachequery_kobj; 40 | static struct cacheset_obj *cacheset_l3_objs[L3_CACHE_SETS]; 41 | static struct cacheset_obj *cacheset_l2_objs[L2_CACHE_SETS]; 42 | static struct cacheset_obj *cacheset_l1_objs[L1_CACHE_SETS]; 43 | static struct config_obj **config_objs; 44 | 45 | static struct kset *l3cachesets_kset; 46 | static struct kset *l2cachesets_kset; 47 | static struct kset *l1cachesets_kset; 48 | static struct kset *config_kset; 49 | 50 | // START OF SYSFS MESS 51 | // this needs to be rewritten, right now it's a mix of legacy copy-paste stuff that makes little sense 52 | struct cacheset_obj { 53 | struct kobject kobj; 54 | unsigned int index; 55 | unsigned char level; 56 | lexer_state lexer; 57 | struct smart_buffer code; 58 | char name[20]; 59 | }; 60 | #define to_cacheset_obj(x) container_of(x, struct cacheset_obj, kobj) 61 | struct config_obj { 62 | struct kobject kobj; 63 | unsigned int index; 64 | char name[20]; 65 | }; 66 | #define to_config_obj(x) container_of(x, struct config_obj, kobj) 67 | 68 | struct cacheset_attribute { 69 | struct attribute attr; 70 | ssize_t (*show)(struct cacheset_obj *cacheset, struct cacheset_attribute *attr, char *buf); 71 | ssize_t (*store)(struct cacheset_obj *cacheset, struct cacheset_attribute *attr, char *buf, size_t count); 72 | }; 73 | #define to_cacheset_attr(x) container_of(x, struct cacheset_attribute, attr) 74 | 75 | struct config_attribute { 76 | struct attribute attr; 77 | ssize_t (*show)(struct config_obj *config, struct config_attribute *attr, char *buf); 78 | ssize_t (*store)(struct config_obj *config, struct config_attribute *attr, char *buf, size_t count); 79 | }; 80 | #define to_config_attr(x) container_of(x, struct config_attribute, attr) 81 | 82 | static ssize_t cacheset_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) 83 | { 84 | struct cacheset_attribute *attribute; 85 | struct cacheset_obj *x; 86 | 87 | attribute = to_cacheset_attr(attr); 88 | x = to_cacheset_obj(kobj); 89 | 90 | if (!attribute->show) 91 | return -EIO; 92 | 93 | return attribute->show(x, attribute, buf); 94 | } 95 | 96 | static ssize_t cacheset_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len) 97 | { 98 | struct cacheset_attribute *attribute; 99 | struct cacheset_obj *x; 100 | 101 | attribute = to_cacheset_attr(attr); 102 | x = to_cacheset_obj(kobj); 103 | 104 | if (!attribute->store) 105 | return -EIO; 106 | 107 | return attribute->store(x, attribute, (char*)buf, len); 108 | } 109 | 110 | static ssize_t config_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) 111 | { 112 | struct config_attribute *attribute; 113 | struct config_obj *x; 114 | 115 | attribute = to_config_attr(attr); 116 | x = to_config_obj(kobj); 117 | 118 | if (!attribute->show) 119 | return -EIO; 120 | 121 | return attribute->show(x, attribute, buf); 122 | } 123 | 124 | static ssize_t config_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len) 125 | { 126 | struct config_attribute *attribute; 127 | struct config_obj *x; 128 | 129 | attribute = to_config_attr(attr); 130 | x = to_config_obj(kobj); 131 | 132 | if (!attribute->store) 133 | return -EIO; 134 | 135 | return attribute->store(x, attribute, (char*)buf, len); 136 | } 137 | 138 | static const struct sysfs_ops config_sysfs_ops = { 139 | .show = config_attr_show, 140 | .store = config_attr_store, 141 | }; 142 | 143 | static const struct sysfs_ops cacheset_sysfs_ops = { 144 | .show = cacheset_attr_show, 145 | .store = cacheset_attr_store, 146 | }; 147 | 148 | static void cacheset_release(struct kobject *kobj) 149 | { 150 | struct cacheset_obj *cacheset; 151 | cacheset = to_cacheset_obj (kobj); 152 | } 153 | 154 | static void config_release(struct kobject *kobj) 155 | { 156 | struct config_obj *config; 157 | config = to_config_obj (kobj); 158 | } 159 | 160 | // Declare functions 161 | ssize_t val_show(struct cacheset_obj *kobj, char *out, Block **sets, size_t set_length, size_t evict_size, int t_hit, int t_miss, unsigned char level); 162 | int calibrate (Block **set, int nsets, int ways, unsigned char measure_miss); 163 | 164 | static ssize_t cacheset_show(struct cacheset_obj *kobj, struct cacheset_attribute *attr, char *buf) 165 | { 166 | if (kobj->level == 3) 167 | { 168 | return val_show (kobj, buf, get_sets_l3(), L3_CACHE_SETS, 2*L3_CACHE_WAYS, get_l3_hit_threshold(&conf), get_l3_miss_threshold(&conf), 3); 169 | } 170 | else if (kobj->level == 2) 171 | { 172 | return val_show (kobj, buf, get_sets_l2(), L2_CACHE_SETS, EVICTION_SZ(L2), get_l2_hit_threshold(&conf), get_l2_miss_threshold(&conf), 2); 173 | } 174 | else if (kobj->level == 1) 175 | { 176 | return val_show (kobj, buf, get_sets_l1(), L1_CACHE_SETS, EVICTION_SZ(L1), get_l1_hit_threshold(&conf), get_l1_miss_threshold(&conf), 1); 177 | } 178 | else 179 | { 180 | return 0; 181 | } 182 | } 183 | 184 | static ssize_t code_show(struct cacheset_obj *kobj, struct cacheset_attribute *attr, char *buf) 185 | { 186 | ssize_t len = MIN(kobj->code.len , PAGE_SIZE); 187 | // hope everything is consistent here 188 | if (kobj->code.start != NULL && kobj->code.len > 0) 189 | { 190 | memcpy (buf, kobj->code.start, len); 191 | } 192 | return len; 193 | } 194 | 195 | static ssize_t cacheset_store(struct cacheset_obj *cacheset_obj, struct cacheset_attribute *attr, char *buf, size_t count) 196 | { 197 | if (cacheset_obj->level == 3) 198 | { 199 | parse (&cacheset_obj->lexer, buf, count, L3_CACHE_SETS); 200 | } 201 | else if (cacheset_obj->level == 2) 202 | { 203 | parse (&cacheset_obj->lexer, buf, count, L2_CACHE_SETS); 204 | } 205 | else if (cacheset_obj->level == 1) 206 | { 207 | parse (&cacheset_obj->lexer, buf, count, L1_CACHE_SETS); 208 | } 209 | else 210 | { 211 | return 0; 212 | } 213 | // free code to regenerate for use new sequence 214 | free_code_pages (&cacheset_obj->code); 215 | return count; 216 | } 217 | 218 | static ssize_t config_show(struct config_obj *config_obj, struct config_attribute *attr, char *buf) 219 | { 220 | return conf_show_property (&conf, config_obj->index, buf); 221 | } 222 | 223 | static ssize_t config_store(struct config_obj *config_obj, struct config_attribute *attr, char *buf, size_t count) 224 | { 225 | conf_store_property (&conf, config_obj->index, buf, count); 226 | return count; 227 | } 228 | 229 | static struct cacheset_attribute cacheset_attribute = __ATTR(run, 0664, cacheset_show, cacheset_store); 230 | static struct cacheset_attribute code_attribute = __ATTR(code, 0664, code_show, NULL); 231 | static struct config_attribute conf_attribute = __ATTR(value, 0664, config_show, config_store); 232 | 233 | // Group of attributes 234 | static struct attribute *attrs[] = { 235 | NULL, 236 | }; 237 | 238 | // Specify name creates a directory under kernel 239 | static struct attribute_group attr_group = { 240 | .attrs = attrs, 241 | }; 242 | 243 | // Attributes for genereal cacheset_kobjs 244 | static struct attribute *cacheset_default_attrs[] = { 245 | &cacheset_attribute.attr, 246 | &code_attribute.attr, 247 | NULL, 248 | }; 249 | 250 | // Attributes for genereal config_kobjs 251 | static struct attribute *config_default_attrs[] = { 252 | &conf_attribute.attr, 253 | NULL, 254 | }; 255 | 256 | static struct kobj_type cacheset_ktype = { 257 | .sysfs_ops = &cacheset_sysfs_ops, 258 | .release = cacheset_release, 259 | .default_attrs = cacheset_default_attrs, 260 | }; 261 | 262 | static struct kobj_type config_ktype = { 263 | .sysfs_ops = &config_sysfs_ops, 264 | .release = config_release, 265 | .default_attrs = config_default_attrs, 266 | }; 267 | 268 | static struct cacheset_obj *create_cacheset_obj(unsigned int index, unsigned char level, struct kset *kset) 269 | { 270 | struct cacheset_obj *x; 271 | int retval; 272 | 273 | x = kzalloc(sizeof(*x), GFP_KERNEL); 274 | if (!x) 275 | return NULL; 276 | 277 | x->kobj.kset = kset; 278 | x->index = index; 279 | x->level = level; 280 | lexer_init (&x->lexer); 281 | smart_buffer_init (&x->code); 282 | snprintf (x->name, 20, "%d", index); 283 | 284 | retval = kobject_init_and_add (&x->kobj, &cacheset_ktype, NULL, "%s", x->name); 285 | if (retval) 286 | { 287 | kobject_put(&x->kobj); 288 | return NULL; 289 | } 290 | 291 | kobject_uevent(&x->kobj, KOBJ_ADD); 292 | return x; 293 | } 294 | 295 | static struct config_obj *create_config_obj(unsigned int index, t_property *prop, struct kset *kset) 296 | { 297 | struct config_obj *x; 298 | int retval; 299 | 300 | x = kzalloc(sizeof(*x), GFP_KERNEL); 301 | if (!x) 302 | return NULL; 303 | 304 | x->kobj.kset = kset; 305 | x->index = index; 306 | snprintf (x->name, 20, "%s", prop->name); 307 | 308 | retval = kobject_init_and_add (&x->kobj, &config_ktype, NULL, "%s", x->name); 309 | if (retval) 310 | { 311 | kobject_put(&x->kobj); 312 | return NULL; 313 | } 314 | 315 | kobject_uevent(&x->kobj, KOBJ_ADD); 316 | return x; 317 | } 318 | 319 | static void destroy_config_obj(struct config_obj *config) 320 | { 321 | kobject_put(&config->kobj); 322 | } 323 | 324 | static void destroy_cacheset_obj(struct cacheset_obj *cacheset) 325 | { 326 | free_code_pages (&cacheset->code); 327 | kobject_put(&cacheset->kobj); 328 | } 329 | 330 | 331 | // Rest of function definitions 332 | int calibrate (Block **set, int nsets, int ways, unsigned char measure_miss) 333 | { 334 | size_t i, s, c; 335 | int t_val, diff, kk; 336 | int w1, w2; 337 | Block *evict, **evict1, **evict2, *tmp; 338 | struct smart_buffer code; 339 | unsigned long long (*run)(void) = NULL, ret = 0; 340 | unsigned long flags; 341 | 342 | memset (cal, 0, sizeof(cal)); 343 | 344 | // Calibrate with set 0 345 | s = 0; 346 | // Lazy compute L2 eviction set 347 | if (nsets == L3_CACHE_SETS && set[s]->evict2 == NULL) 348 | { 349 | tmp = set[s]; 350 | while (tmp) 351 | { 352 | find_l2_eviction_set (tmp, get_sets_l2()); 353 | tmp = tmp->next; 354 | } 355 | } 356 | // Allocate not conflicting pages 357 | if (!allocate_code_pages (&code, set, nsets, s, JIT_CODE_SZ)) 358 | { 359 | goto err; 360 | } 361 | 362 | evict1 = set[s]->evict1; 363 | evict2 = set[s]->evict2; 364 | evict = set[s]->next; 365 | 366 | // preamble 367 | OPCODE(&code, PUSH_RBP()); 368 | OPCODE(&code, MOV_RBP_RSP()); 369 | OPCODE(&code, PUSH_RBX()); 370 | 371 | // flush block 372 | OPCODE(&code, MOV_RAX_CT((unsigned long long)(set[s]))); 373 | OPCODE(&code, CLFLUSH_RAX()); 374 | OPCODE(&code, CPUID()); 375 | 376 | // access block 377 | OPCODE(&code, SERIALIZE()); 378 | OPCODE(&code, LOAD_RAX((unsigned long long)(set[s]))); 379 | OPCODE(&code, SERIALIZE()); 380 | 381 | PRINT ("[debug] load %p:\ts1=%d\ts2=%d\ts3=%d\th=%d\n", 382 | set[s], set[s]->set1, set[s]->set2, set[s]->set3, set[s]->slice); 383 | 384 | // evict block from L1 and LFB 385 | if (set[s]->evict1_sz > 0) 386 | { 387 | for (i = 0, w1 = 0; i < set[s]->evict1_sz && w1 < EVICTION_SZ(L1); i++) 388 | { 389 | if (((evict1[i]->set3 != set[s]->set3) || (evict1[i]->slice != set[s]->slice)) 390 | && (evict1[i]->set2 != set[s]->set2)) 391 | { 392 | OPCODE(&code, LOAD_RAX((unsigned long long)(evict1[i]))); 393 | OPCODE(&code, SERIALIZE()); 394 | WPRINT("\teset1: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", evict1[i], evict1[i]->set1, evict1[i]->set2, evict1[i]->set3, evict1[i]->slice); 395 | w1 += 1; 396 | } 397 | } 398 | if (set[s]->evict2_sz < 1) // only if in L2 399 | { 400 | for (w1 = 0; i < set[s]->evict1_sz && w1 < LFB; i++) 401 | { 402 | if (((evict1[i]->set3 != set[s]->set3) || (evict1[i]->slice != set[s]->slice)) 403 | && (evict1[i]->set2 != set[s]->set2)) 404 | { 405 | // OPCODE(code, LOAD_RAX((unsigned long long)(evict1[i]))); 406 | OPCODE(&code, MOV_RAX_CT((unsigned long long)(evict1[i]))); // move pointer to rax 407 | OPCODE (&code, MOVNTDQA_RAX()); // movntqda xmm1, [rax] 408 | WPRINT("\tlfb: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", evict1[i], evict1[i]->set1, evict1[i]->set2, evict1[i]->set3, evict1[i]->slice); 409 | w1 += 1; 410 | } 411 | 412 | } 413 | OPCODE(&code, SERIALIZE()); 414 | } 415 | } 416 | 417 | // evict block from L2 and SQ 418 | if (set[s]->evict2_sz > 0) 419 | { 420 | for (i = 0, w2 = 0; i < set[s]->evict2_sz && w2 < EVICTION_SZ(L2); i++) 421 | { 422 | if ((evict2[i]->set3 != set[s]->set3) || (evict2[i]->slice != set[s]->slice)) 423 | { 424 | OPCODE(&code, LOAD_RAX((unsigned long long)(evict2[i]))); 425 | OPCODE(&code, SERIALIZE()); 426 | WPRINT("\teset2: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", evict2[i], evict2[i]->set1, evict2[i]->set2, evict2[i]->set3, evict2[i]->slice); 427 | w2 += 1; 428 | } 429 | } 430 | for (w2 = 0; i < set[s]->evict2_sz && w2 < SQ; i++) 431 | { 432 | if ((evict2[i]->set3 != set[s]->set3) || (evict2[i]->slice != set[s]->slice)) 433 | { 434 | // OPCODE(code, LOAD_RAX((unsigned long long)(evict2[i]))); 435 | OPCODE(&code, MOV_RAX_CT((unsigned long long)(evict2[i]))); // move pointer to rax 436 | OPCODE (&code, MOVNTDQA_RAX()); // movntqda xmm1, [rax] 437 | WPRINT("\tsq: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", evict2[i], evict2[i]->set1, evict2[i]->set2, evict2[i]->set3, evict2[i]->slice); 438 | w2 += 1; 439 | } 440 | } 441 | OPCODE(&code, SERIALIZE()); 442 | } 443 | 444 | if (measure_miss) 445 | { 446 | // evict current cache level 447 | c = 0; 448 | while (evict && c < ways) 449 | { 450 | if ((nsets == L1_CACHE_SETS && ((evict->set2 != set[s]->set2) && (evict->set3 != set[s]->set3 || evict->slice != set[s]->slice))) 451 | || ((nsets == L2_CACHE_SETS) && (evict->set3 != set[s]->set3 || evict->slice != set[s]->slice)) 452 | || (nsets == L3_CACHE_SETS)) 453 | { 454 | OPCODE(&code, LOAD_RAX((unsigned long long)(evict))); 455 | OPCODE(&code, SERIALIZE()); 456 | WPRINT("\teset: %p\ts1=%d\ts2=%d\ts3=%d\th=%d\n", evict, evict->set1, evict->set2, evict->set3, evict->slice); 457 | c += 1; 458 | } 459 | evict = evict->next; 460 | } 461 | } 462 | 463 | // refresh address TLB if L2 or L3 464 | if (get_tlb_preload(&conf) && set[s]->evict1_sz > 0) 465 | { 466 | unsigned long long tlb = ((unsigned long long)set[s]); 467 | tlb = ((tlb >> 12) << 12) | ((tlb & 0xfff) ^ 0x7e0); // different set index-bits 468 | WPRINT("[debug] tlb refresh %llx s1=%d s2=%d s3=%d h=%d\n", 469 | tlb, get_l1_set((void*)tlb), get_l2_set((void*)tlb), get_l3_set((void*)tlb), get_l3_slice((void*)tlb)); 470 | OPCODE(&code, LOAD_RAX(tlb)); 471 | OPCODE(&code, SERIALIZE()); 472 | } 473 | 474 | if (get_core_cycles(&conf)) 475 | { 476 | RESET_PMC0(&code); 477 | 478 | } 479 | else 480 | { 481 | MEASURE_PRE_TSC(&code); 482 | } 483 | 484 | // Access block 485 | OPCODE(&code, LOAD_RAX((unsigned long long)(set[s]))); 486 | 487 | if (get_core_cycles(&conf)) 488 | { 489 | MEASURE_POST_CORE(&code); 490 | } 491 | else 492 | { 493 | MEASURE_POST_TSC(&code); 494 | } 495 | 496 | // ret diff (rax) 497 | OPCODE(&code, MOV_RAX_RDI()); 498 | OPCODE(&code, POP_RBX()); 499 | OPCODE(&code, POP_RBP()); 500 | OPCODE(&code, RETQ()); 501 | 502 | // warm 503 | run = (unsigned long long(*)(void))(code.start); 504 | run (); 505 | 506 | for (i=0, kk=0; i 3 for core cycles instead of cache miss 513 | } 514 | preempt_disable (); 515 | raw_local_irq_save (flags); 516 | // 517 | ret = run (); 518 | // 519 | raw_local_irq_restore (flags); 520 | preempt_enable (); 521 | // Disable counters 522 | if (get_core_cycles(&conf)) 523 | { 524 | disable_counters (); 525 | } 526 | //EOC 527 | diff = (int)ret; 528 | if (diff < 0) 529 | { 530 | diff = 0; 531 | } 532 | if (diff < get_max_access_time(&conf) && i > 10) 533 | { 534 | cal[diff]++; 535 | kk++; 536 | } 537 | } 538 | print_hist (cal); 539 | t_val = get_min (cal); 540 | 541 | free_code_pages (&code); 542 | return t_val; 543 | 544 | err: 545 | printk ("err: calibration\n"); 546 | return -1; 547 | } 548 | 549 | ssize_t val_show(struct cacheset_obj *kobj, char *out, Block **sets, size_t set_length, size_t evict_size, int t_hit, int t_miss, unsigned char level) 550 | { 551 | size_t trace[128]; 552 | size_t i = 0, rep, fails, ret; 553 | unsigned long flags; 554 | unsigned long long (*run)(void) = NULL, retmask = 0; 555 | unsigned long long bit; 556 | struct smart_buffer *code = &kobj->code; 557 | unsigned int num_repetitions = get_num_repetitions(&conf); 558 | 559 | if (!get_use_pmc(&conf)) 560 | { 561 | // Calibration 562 | if (t_hit < 1) 563 | { 564 | PRINT ("Start hit calibration\n"); 565 | t_hit = calibrate (sets, set_length, evict_size, 0); 566 | PRINT ("Calibrated hit: %d\n", t_hit); 567 | } 568 | if (t_miss < 1) 569 | { 570 | PRINT ("Start miss calibration\n"); 571 | t_miss = calibrate (sets, set_length, evict_size, 1); 572 | PRINT ("Calibrated miss: %d\n", t_miss); 573 | } 574 | } 575 | 576 | // We reuse the code until there's a new sequence (write triggers parse()) 577 | // TODO: add check for change on config 578 | if (code->start == NULL) 579 | { 580 | if (generate_code (&kobj->lexer, kobj->index, code, sets, level, t_miss, t_hit, &conf) < 0) 581 | { 582 | printk (KERN_INFO "Err: Unitialized code\n"); 583 | return 0; 584 | } 585 | } 586 | 587 | // Fix ptr to generated code 588 | run = (unsigned long long(*)(void))code->start; 589 | 590 | // Reset counters 591 | memset (trace, 0, sizeof(trace)); 592 | memset (hist, 0, sizeof(hist)); 593 | 594 | // warmup code 595 | run(); 596 | 597 | for (rep = 0, fails = 0; rep < num_repetitions && fails < num_repetitions; rep++) 598 | { 599 | // EXEC TEST CODE 600 | // Prepare performance counters 601 | if (get_use_pmc(&conf)) 602 | { 603 | prepare_counters (level); 604 | } 605 | else if (get_core_cycles(&conf)) 606 | { 607 | prepare_counters (4); 608 | 609 | } 610 | preempt_disable (); 611 | raw_local_irq_save (flags); 612 | // 613 | retmask = run(); 614 | // 615 | raw_local_irq_restore (flags); 616 | preempt_enable (); 617 | // Disable counters 618 | if (get_use_pmc(&conf) || get_core_cycles(&conf)) 619 | { 620 | disable_counters (); 621 | } 622 | // EOC 623 | if (retmask == 0xffffffffffffffff) 624 | { 625 | printk (KERN_INFO "[err] code exec exception\n"); 626 | ret = sprintf (out, "Runtime exception!\n"); 627 | goto err; 628 | // repeat instead of abort 629 | rep--; 630 | fails++; 631 | } 632 | if (get_only_one_time(&conf)) 633 | { 634 | /* safe distribution of timing measurements for last block */ 635 | if (retmask < MAX_TIME) 636 | { 637 | hist[retmask] += 1; 638 | } 639 | else 640 | { 641 | PRINT ("MAX_TIME exceeded\n"); 642 | } 643 | } 644 | else if (code->asks > 0) 645 | { 646 | i = 0; 647 | bit = 1ULL << (code->asks - 1); 648 | while (bit != 0ULL) 649 | { 650 | // count hit 651 | if ((retmask & bit) == 0ULL) 652 | { 653 | trace[i] += 1; 654 | } 655 | bit >>= 1; 656 | i += 1; 657 | } 658 | } 659 | // yield (); // force preemption 660 | } 661 | 662 | if (fails >= num_repetitions) 663 | { 664 | printk (KERN_INFO "Error: exceeded fails max\n"); 665 | ret = 0; 666 | goto err; 667 | } 668 | 669 | // return length of string 670 | ret = 0; 671 | if (get_only_one_time(&conf)) 672 | { 673 | PRINT ("[time] Total reps: %lu (%lu fails)\n", rep, fails); 674 | print_hist (hist); 675 | PRINT ("[time] Mean: %d\n", get_mean(hist, i-1)); 676 | PRINT ("[time] Mode: %d\n", get_mode(hist)); 677 | PRINT ("[time] Below MISS threshold: %d\n", get_n_below(hist, t_miss)); 678 | } 679 | else 680 | { 681 | // buffer is of size PAGE_SIZE 682 | for (i = 0; i < code->asks; i++) 683 | { 684 | ret += sprintf (&out[ret], "%ld ", trace[i]); 685 | } 686 | ret += sprintf (&out[ret], "\n"); 687 | } 688 | 689 | err: 690 | return ret; 691 | 692 | } 693 | 694 | static int __init cachequery_init(void) 695 | { 696 | int ret; 697 | unsigned int i, j; 698 | 699 | // Create and add object 700 | cachequery_kobj = kobject_create_and_add ("cachequery", kernel_kobj); 701 | if (!cachequery_kobj) 702 | { 703 | return -ENOMEM; 704 | } 705 | 706 | // Create files for group 707 | ret = sysfs_create_group (cachequery_kobj, &attr_group); 708 | if (ret) 709 | { 710 | goto err; 711 | } 712 | 713 | // Allocate pools 714 | if (!(pool_l3 = __vmalloc (L3_POOL_SZ, GFP_KERNEL|__GFP_HIGHMEM|__GFP_COMP, PAGE_KERNEL_LARGE))) 715 | { 716 | ret = -ENOMEM; 717 | goto err; 718 | } 719 | if (!(pool_l2 = __vmalloc (L2_POOL_SZ, GFP_KERNEL|__GFP_HIGHMEM|__GFP_COMP, PAGE_KERNEL_LARGE))) 720 | { 721 | ret = -ENOMEM; 722 | goto clean2; 723 | } 724 | if (!(pool_l1 = __vmalloc (L1_POOL_SZ, GFP_KERNEL|__GFP_HIGHMEM|__GFP_COMP, PAGE_KERNEL_LARGE))) 725 | { 726 | ret = -ENOMEM; 727 | goto clean1; 728 | } 729 | 730 | // Init and shuffle lists of congruent addresses 731 | init_lists(pool_l3, pool_l2, pool_l1); 732 | 733 | // Start computing eviction sets 734 | init_evictionsets(); 735 | 736 | // Create structures 737 | l3cachesets_kset = kset_create_and_add("l3_sets", NULL, cachequery_kobj); 738 | if (!l3cachesets_kset) 739 | goto clean; 740 | l2cachesets_kset = kset_create_and_add("l2_sets", NULL, cachequery_kobj); 741 | if (!l2cachesets_kset) 742 | goto unreg3; 743 | l1cachesets_kset = kset_create_and_add("l1_sets", NULL, cachequery_kobj); 744 | if (!l1cachesets_kset) 745 | goto unreg2; 746 | config_kset = kset_create_and_add("config", NULL, cachequery_kobj); 747 | if (!config_kset) 748 | goto unreg1; 749 | 750 | for (i = 0; i < L3_CACHE_SETS; i++) 751 | { 752 | // add error check and clean 753 | cacheset_l3_objs[i] = create_cacheset_obj(i, 3, l3cachesets_kset); 754 | if (!cacheset_l3_objs[i]) 755 | goto obj3; 756 | } 757 | for (i = 0; i < L2_CACHE_SETS; i++) 758 | { 759 | // add error check and clean 760 | cacheset_l2_objs[i] = create_cacheset_obj(i, 2, l2cachesets_kset); 761 | if (!cacheset_l2_objs[i]) 762 | goto obj2; 763 | } 764 | for (i = 0; i < L1_CACHE_SETS; i++) 765 | { 766 | // add error check and clean 767 | cacheset_l1_objs[i] = create_cacheset_obj(i, 1, l1cachesets_kset); 768 | if (!cacheset_l1_objs[i]) 769 | goto obj1; 770 | } 771 | 772 | // Initialize config 773 | printk (KERN_INFO "Creating config...\n"); 774 | init_config (&conf); 775 | if (!(config_objs = vmalloc (sizeof(struct config_obj*)*conf.length))) 776 | { 777 | ret = -ENOMEM; 778 | goto obj1; 779 | } 780 | for (i = 0; i < conf.length; i++) 781 | { 782 | // add error check and clean 783 | config_objs[i] = create_config_obj(i, &conf.properties[i], config_kset); 784 | if (!config_objs[i]) 785 | goto conf; 786 | } 787 | 788 | // Success 789 | printk (KERN_INFO "Loaded cachequery.ko\n"); 790 | return 0; 791 | 792 | // Beach 793 | conf: 794 | for (j =0; j < i; j++) 795 | destroy_config_obj (config_objs[j]); 796 | i = L1_CACHE_SETS; 797 | obj1: 798 | for (j = 0; j < i; j++) 799 | destroy_cacheset_obj (cacheset_l1_objs[j]); 800 | i = L2_CACHE_SETS; 801 | obj2: 802 | for (j = 0; j < i; j++) 803 | destroy_cacheset_obj (cacheset_l2_objs[j]); 804 | i = L3_CACHE_SETS; 805 | obj3: 806 | for (j = 0; j < i; j++) 807 | destroy_cacheset_obj (cacheset_l3_objs[j]); 808 | 809 | kset_unregister (config_kset); 810 | unreg1: 811 | kset_unregister (l1cachesets_kset); 812 | unreg2: 813 | kset_unregister (l2cachesets_kset); 814 | unreg3: 815 | kset_unregister (l3cachesets_kset); 816 | clean: 817 | for (i = 0; i < L3_CACHE_SETS; i++) 818 | { 819 | clean_l3_set (i); 820 | } 821 | for (i = 0; i < L2_CACHE_SETS; i++) 822 | { 823 | clean_l2_set (i); 824 | } 825 | vfree (config_objs); 826 | vfree (pool_l1); 827 | clean1: 828 | vfree (pool_l2); 829 | clean2: 830 | vfree (pool_l3); 831 | err: 832 | kobject_put (cachequery_kobj); 833 | return ret; 834 | } 835 | 836 | static void __exit cachequery_exit(void) 837 | { 838 | unsigned int i; 839 | for (i = 0; i < L2_CACHE_SETS; i++) 840 | { 841 | clean_l2_set (i); 842 | } 843 | for (i = 0; i < L3_CACHE_SETS; i++) 844 | { 845 | clean_l3_set (i); 846 | } 847 | vfree (pool_l3); 848 | vfree (pool_l2); 849 | vfree (pool_l1); 850 | for (i = 0; i < L3_CACHE_SETS; i++) 851 | destroy_cacheset_obj (cacheset_l3_objs[i]); 852 | for (i = 0; i < L2_CACHE_SETS; i++) 853 | destroy_cacheset_obj (cacheset_l2_objs[i]); 854 | for (i = 0; i < L1_CACHE_SETS; i++) 855 | destroy_cacheset_obj (cacheset_l1_objs[i]); 856 | for (i =0; i < conf.length; i++) 857 | destroy_config_obj (config_objs[i]); 858 | kset_unregister (l3cachesets_kset); 859 | kset_unregister (l2cachesets_kset); 860 | kset_unregister (l1cachesets_kset); 861 | kset_unregister (config_kset); 862 | kobject_put (cachequery_kobj); 863 | printk (KERN_INFO "Unlodaded cachequery.ko\n"); 864 | } 865 | 866 | module_init(cachequery_init); 867 | module_exit(cachequery_exit); 868 | 869 | MODULE_LICENSE("GPL"); 870 | MODULE_AUTHOR("Pepe Vila - @cgvwzq"); 871 | MODULE_DESCRIPTION("CacheQuery's Kernel module"); 872 | --------------------------------------------------------------------------------