├── .gitignore ├── README.md ├── exit_reason.py ├── kvm_vmexit_count.py ├── kvm_vmexit_slower.py ├── kvm_vmexit_time.py └── kvm_vmexit_time_hist.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.swo 3 | *.pyc 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KVM BPF Tools 2 | Some codes to trace KVM events using BPF. 3 | Currently only x86 is supported. 4 | 5 | ## Requirements 6 | - [bcc](https://github.com/iovisor/bcc) 7 | - [fire](https://github.com/google/python-fire) 8 | 9 | ## VMEXIT Counts 10 | Counts number of KVM VMEXIT. 11 | L2 shows the number of VMEXIT from nested guests. 12 | 13 | ```sh 14 | % sudo ./kvm_vmexit_count.py 15 | Tracing... Hit Ctrl-C to end. 16 | ^C 17 | Exit reason Total L1 L2 18 | 0 EXCEPTION_OR_NMI 52 0 52 19 | 1 EXTERNAL_INT 68 49 19 20 | 7 INTERRUPT_WINDOW 91 84 7 21 | 12 HLT 2965 2787 178 22 | 15 RDPMC 8106 8106 0 23 | 19 VMCLEAR 118 118 0 24 | 20 VMLAUNCH 104 104 0 25 | 21 VMPTRLD 117 117 0 26 | 23 VMREAD 2887 2887 0 27 | 24 VMRESUME 2418 2418 0 28 | 28 MOV_CR 428 428 0 29 | 29 MOV_DR 579 579 0 30 | 30 IO_INSTRUCTION 134 46 88 31 | 31 RDMSR 1771 1761 10 32 | 32 WRMSR 1987 1294 693 33 | 44 APIC_ACCESS 7657 7657 0 34 | 48 EPT_VIOLATION 1528 5 152 35 | 49 EPT_MISCONFIG 76 76 0 36 | 50 INVEPT 25 25 0 37 | 52 VMX_PREEMPT_TIMER 51 32 19 38 | 53 INVVPID 17 17 0 39 | Total 31179 28590 2589 40 | ``` 41 | 42 | ## VMEXIT Counts and Handling time 43 | Counts number of KVM VMEXIT and measure average handling time. 44 | Reported time is in nano seconds. 45 | 46 | ```sh 47 | $ sudo ./kvm_vmexit_time.py 48 | Tracing... Hit Ctrl-C to end. 49 | ^C 50 | Exit reason Total (Avg Time) L1 (Avg Time) L2 (Avg Time) 51 | 0 EXCEPTION_OR_NMI 27 ( 24760) 1 ( 4979) 26 ( 25520) 52 | 1 EXTERNAL_INT 25 ( 3304) 21 ( 2797) 4 ( 5967) 53 | 7 INTERRUPT_WINDOW 38 ( 4009) 37 ( 3363) 1 ( 27890) 54 | 12 HLT 1245 ( 3437190) 1164 ( 3674603) 81 ( 25469) 55 | 15 RDPMC 3760 ( 3785) 3760 ( 3785) 0 ( 0) 56 | 19 VMCLEAR 82 ( 12110) 82 ( 12110) 0 ( 0) 57 | 20 VMLAUNCH 69 ( 32072) 69 ( 32072) 0 ( 0) 58 | 21 VMPTRLD 82 ( 8692) 82 ( 8692) 0 ( 0) 59 | 23 VMREAD 1282 ( 3403) 1282 ( 3403) 0 ( 0) 60 | 24 VMRESUME 1031 ( 28250) 1031 ( 28250) 0 ( 0) 61 | 28 MOV_CR 324 ( 6583) 324 ( 6583) 0 ( 0) 62 | 29 MOV_DR 308 ( 2597) 308 ( 2597) 0 ( 0) 63 | 30 IO_INSTRUCTION 68 ( 21263) 19 ( 14930) 49 ( 23719) 64 | 31 RDMSR 897 ( 2858) 892 ( 2744) 5 ( 23206) 65 | 32 WRMSR 1060 ( 9160) 777 ( 3059) 283 ( 25912) 66 | 44 APIC_ACCESS 3414 ( 8813) 3414 ( 8813) 0 ( 0) 67 | 48 EPT_VIOLATION 666 ( 27375) 1 ( 15456) 665 ( 27393) 68 | 49 EPT_MISCONFIG 22 ( 21835) 22 ( 21835) 0 ( 0) 69 | 50 INVEPT 6 ( 6420) 6 ( 6420) 0 ( 0) 70 | 52 VMX_PREEMPT_TIMER 21 ( 12105) 14 ( 4543) 7 ( 27228) 71 | 53 INVVPID 2 ( 6608) 2 ( 6608) 0 ( 0) 72 | Total (Avg Time) 14430 ( 304754) 13309 ( 328184) 1121 ( 26580) 73 | ``` 74 | 75 | ## VMEXIT Handling time histogram 76 | Show VMEXIT handling time histogram. 77 | Reported time is in nano seconds. 78 | 79 | ```sh 80 | % sudo ./kvm_vmexit_time_hist.py 81 | Tracing... Hit Ctrl-C to end. 82 | ^C 83 | value : count distribution 84 | 0 -> 1 : 0 | | 85 | 2 -> 3 : 0 | | 86 | 4 -> 7 : 0 | | 87 | 8 -> 15 : 0 | | 88 | 16 -> 31 : 0 | | 89 | 32 -> 63 : 0 | | 90 | 64 -> 127 : 0 | | 91 | 128 -> 255 : 0 | | 92 | 256 -> 511 : 55 | | 93 | 512 -> 1023 : 772 |*** | 94 | 1024 -> 2047 : 2497 |********** | 95 | 2048 -> 4095 : 9349 |****************************************| 96 | 4096 -> 8191 : 3823 |**************** | 97 | 8192 -> 16383 : 2762 |*********** | 98 | 16384 -> 32767 : 2912 |************ | 99 | 32768 -> 65535 : 129 | | 100 | 65536 -> 131071 : 16 | | 101 | 131072 -> 262143 : 47 | | 102 | 262144 -> 524287 : 56 | | 103 | 524288 -> 1048575 : 228 | | 104 | 1048576 -> 2097151 : 183 | | 105 | 2097152 -> 4194303 : 1115 |**** | 106 | 4194304 -> 8388607 : 290 |* | 107 | 8388608 -> 16777215 : 201 | | 108 | ``` 109 | 110 | Only trace the specified VMEXIT by the specifying VMEXIT number. 111 | 112 | ```sh 113 | % sudo ./kvm_vmexit_time_hist.py 12 114 | Tracing... Hit Ctrl-C to end. 115 | ^C 116 | value : count distribution 117 | 0 -> 1 : 0 | | 118 | 2 -> 3 : 0 | | 119 | 4 -> 7 : 0 | | 120 | 8 -> 15 : 0 | | 121 | 16 -> 31 : 0 | | 122 | 32 -> 63 : 0 | | 123 | 64 -> 127 : 0 | | 124 | 128 -> 255 : 0 | | 125 | 256 -> 511 : 5 | | 126 | 512 -> 1023 : 498 |****************************************| 127 | 1024 -> 2047 : 489 |*************************************** | 128 | 2048 -> 4095 : 457 |************************************ | 129 | 4096 -> 8191 : 318 |************************* | 130 | 8192 -> 16383 : 301 |************************ | 131 | 16384 -> 32767 : 137 |*********** | 132 | 32768 -> 65535 : 9 | | 133 | 65536 -> 131071 : 2 | | 134 | 131072 -> 262143 : 1 | | 135 | 262144 -> 524287 : 2 | | 136 | 524288 -> 1048575 : 18 |* | 137 | 1048576 -> 2097151 : 25 |** | 138 | 2097152 -> 4194303 : 99 |******* | 139 | 4194304 -> 8388607 : 29 |** | 140 | 8388608 -> 16777215 : 13 |* | 141 | ``` 142 | 143 | ## VMEXIT Handling time slower 144 | Report VMEXIT events whose handling time is longer than the predefined value. 145 | Reported time is in nano seconds. 146 | 147 | ```sh 148 | # Threshold is 1ms (default: 10ms) and excludes exit 12 (HLT) 149 | % sudo ./kvm_vmexit_slower.py 1 --excludes 12, 150 | Excludes: HLT 151 | Tracing... Hit Ctrl-C to end. 152 | 44 APIC_ACCESS 2631024 (L1) 153 | 44 APIC_ACCESS 1442657 (L1) 154 | 1 EXTERNAL_INT 1635030 (L1) 155 | ... 156 | ``` 157 | 158 | ## License 159 | Apache-2.0 160 | -------------------------------------------------------------------------------- /exit_reason.py: -------------------------------------------------------------------------------- 1 | EXIT_REASON = [ 2 | "EXCEPTION_OR_NMI", # 0x0 3 | "EXTERNAL_INT", # 0x1 4 | "TRIPLE_FAULT", # 0x2 5 | "INIT_SIGNAL", # 0x3 6 | "STARTUP_IPI", # 0x4 7 | "IO_SMI", # 0x5 8 | "OTHER_SMI", # 0x6 9 | "INTERRUPT_WINDOW", # 0x7 10 | "NMI_WINDOW", # 0x8 11 | "TASK_SWITCH", # 0x9 12 | "CPUID", # 0xA 13 | "GETSEC", # 0xB 14 | "HLT", # 0xC 15 | "INVD", # 0xD 16 | "INVLPG", # 0xE 17 | "RDPMC", # 0xF 18 | "RDTSC", # 0x10 19 | "RSM", # 0x11 20 | "VMCALL", # 0x12 21 | "VMCLEAR", # 0x13 22 | "VMLAUNCH", # 0x14 23 | "VMPTRLD", # 0x15 24 | "VMPTRST", # 0x16 25 | "VMREAD", # 0x17 26 | "VMRESUME", # 0x18 27 | "VMWRITE", # 0x19 28 | "VMXOFF", # 0x1A 29 | "VMXON", # 0x1B 30 | "MOV_CR", # 0x1C 31 | "MOV_DR", # 0x1D 32 | "IO_INSTRUCTION", # 0x1E 33 | "RDMSR", # 0x1F 34 | "WRMSR", # 0x20 35 | "ENTFAIL_GUEST_STATE", # 0x21 36 | "ENTFAIL_MSR_LOADING", # 0x22 37 | "----UNUSED----", # 0x23 38 | "MWAIT", # 0x24 39 | "MONITOR_TRAP_FLAG", # 0x25 40 | "----UNUSED----", # 0x26 41 | "MONITOR", # 0x27 42 | "PAUSE", # 0x28 43 | "ENTFAIL_MACHINE_CHK", # 0x29 44 | "----UNSUED----", # 0x2A 45 | "TPR_BELOW_THRESHOLD", # 0x2B 46 | "APIC_ACCESS", # 0x2C 47 | "VIRTUALIZED_EOI", # 0x2D 48 | "ACCESS_GDTR_OR_IDTR", # 0x2E 49 | "ACCESS_LDTR_OR_TR", # 0x2F 50 | "EPT_VIOLATION", # 0x30 51 | "EPT_MISCONFIG", # 0x31 52 | "INVEPT", # 0x32 53 | "RDTSCP", # 0x33 54 | "VMX_PREEMPT_TIMER", # 0x34 55 | "INVVPID", # 0x35 56 | "WBINVD", # 0x36 57 | "XSETBV", # 0x37 58 | "APIC_WRITE", # 0x38 59 | "RDRAND", # 0x39 60 | "INVPCID", # 0x3A 61 | "VMFUNC", # 0x3B 62 | ] 63 | -------------------------------------------------------------------------------- /kvm_vmexit_count.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | from collections import defaultdict 4 | from time import sleep 5 | 6 | from bcc import BPF 7 | 8 | from exit_reason import EXIT_REASON 9 | 10 | text = """ 11 | BPF_HASH(counts, unsigned int, u64); 12 | BPF_HASH(counts_nested, unsigned int, u64); 13 | 14 | TRACEPOINT_PROBE(kvm, kvm_exit) { 15 | u64 zero = 0, *val; 16 | unsigned int key = args->exit_reason; 17 | val = counts.lookup_or_init(&key, &zero); 18 | (*val)++; 19 | return 0; 20 | } 21 | 22 | TRACEPOINT_PROBE(kvm, kvm_nested_vmexit) { 23 | u64 zero = 0, *val; 24 | unsigned int key = args->exit_code; 25 | val = counts_nested.lookup_or_init(&key, &zero); 26 | (*val)++; 27 | return 0; 28 | } 29 | """ 30 | 31 | 32 | def main(): 33 | # load BPF program 34 | b = BPF(text=text) 35 | 36 | print("Tracing... Hit Ctrl-C to end.") 37 | try: 38 | sleep(99999999) 39 | except KeyboardInterrupt: 40 | pass 41 | print() 42 | 43 | counts = defaultdict(int) 44 | result = {} 45 | for table_name in ("counts", "counts_nested"): 46 | cs = b.get_table(table_name) 47 | result[table_name] = defaultdict(int) 48 | for k, v in cs.items(): 49 | k, v = k.value, v.value 50 | result[table_name][EXIT_REASON[k]] = v 51 | 52 | print("{:3s} {:18s} {:>8s} {:>8s} {:>8s}".format( 53 | "", "Exit reason", "Total", "L1", "L2")) 54 | total = 0 55 | l2_total = 0 56 | for i, e in enumerate(EXIT_REASON): 57 | c = result["counts"][e] 58 | cn = result["counts_nested"][e] 59 | if c > 0 or cn > 0: 60 | print("{:3d} {:18s} {:8d} {:8d} {:8d}".format(i, e, c, c - cn, cn)) 61 | total += c 62 | l2_total += cn 63 | print("{:3s} {:18s} {:8d} {:8d} {:8d}".format("", "Total", total, 64 | total - l2_total, l2_total)) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /kvm_vmexit_slower.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | import ctypes as ct 4 | import sys 5 | 6 | from bcc import BPF 7 | 8 | if not hasattr(BPF, "perf_buffer_poll"): 9 | BPF.perf_buffer_poll = BPF.kprobe_poll 10 | 11 | from exit_reason import EXIT_REASON 12 | 13 | text = """ 14 | struct data{ 15 | u64 time; 16 | int nested; 17 | unsigned int exit_reason; 18 | }; 19 | 20 | BPF_HASH(start_time, u64, struct data); 21 | BPF_PERF_OUTPUT(events); 22 | 23 | TRACEPOINT_PROBE(kvm, kvm_exit) { 24 | u64 id = bpf_get_current_pid_tgid(); 25 | struct data zero = {.time=0, .nested=0, .exit_reason=0}; 26 | struct data *val = start_time.lookup_or_init(&id, &zero); 27 | val->time = bpf_ktime_get_ns(); 28 | val->exit_reason = args->exit_reason; 29 | val->nested = 0; 30 | return 0; 31 | } 32 | 33 | TRACEPOINT_PROBE(kvm, kvm_nested_vmexit) { 34 | u64 id = bpf_get_current_pid_tgid(); 35 | struct data *val = start_time.lookup(&id); 36 | if (val != 0){ 37 | val->nested = 1; 38 | }else{ 39 | // something wrong 40 | } 41 | return 0; 42 | } 43 | 44 | TRACEPOINT_PROBE(kvm, kvm_entry) { 45 | u64 id = bpf_get_current_pid_tgid(); 46 | struct data *st = start_time.lookup(&id); 47 | if (st != 0){ 48 | st->time = bpf_ktime_get_ns() - st->time; 49 | if(st->time >= THRESH){ 50 | events.perf_submit(args, st, sizeof(struct data)); 51 | } 52 | } 53 | return 0; 54 | } 55 | """ 56 | 57 | class Data(ct.Structure): 58 | _fields_ = [("time", ct.c_uint64), ("nested", ct.c_int), ("exit_reason", ct.c_uint)] 59 | 60 | 61 | def main(thresh=10, excludes=[]): 62 | global text 63 | 64 | if (type(excludes) == int): 65 | excludes = [excludes] 66 | 67 | if len(excludes) > 0: 68 | print("Excludes: {}".format(",".join([EXIT_REASON[i] for i in excludes]))) 69 | 70 | # XXX: unit of thresh is milliseconds 71 | # mill to ns 72 | thresh = "{}ULL".format(int(thresh * 1000000)); 73 | text = text.replace("THRESH", thresh) 74 | 75 | def print_event(cpu, data, size): 76 | event = ct.cast(data, ct.POINTER(Data)).contents 77 | if event.exit_reason not in excludes: 78 | print("{:3d} {:18s} {:8.0f} ({})".format(event.exit_reason, EXIT_REASON[event.exit_reason], event.time, "L1" if event.nested == 0 else "L2")) 79 | 80 | # load BPF program 81 | b = BPF(text=text) 82 | b["events"].open_perf_buffer(print_event, page_cnt=64) 83 | 84 | print("Tracing... Hit Ctrl-C to end.") 85 | try: 86 | while True: 87 | b.perf_buffer_poll() 88 | except KeyboardInterrupt: 89 | pass 90 | print() 91 | 92 | if __name__ == "__main__": 93 | import fire 94 | fire.Fire(main) 95 | -------------------------------------------------------------------------------- /kvm_vmexit_time.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | from collections import defaultdict 4 | from time import sleep 5 | 6 | from bcc import BPF 7 | 8 | from exit_reason import EXIT_REASON 9 | 10 | text = """ 11 | 12 | struct tmp{ 13 | u64 time; 14 | int nested; 15 | unsigned int exit_reason; 16 | }; 17 | 18 | struct value{ 19 | u64 cumulative_time; 20 | u64 count; 21 | }; 22 | 23 | BPF_HASH(start_time, u64, struct tmp); 24 | BPF_HASH(counts, unsigned int, struct value); 25 | BPF_HASH(counts_nested, unsigned int, struct value); 26 | 27 | TRACEPOINT_PROBE(kvm, kvm_exit) { 28 | u64 id = bpf_get_current_pid_tgid(); 29 | struct tmp zero = {.time=0, .nested=0, .exit_reason=0}; 30 | struct tmp *val = start_time.lookup_or_init(&id, &zero); 31 | val->time = bpf_ktime_get_ns(); 32 | val->exit_reason = args->exit_reason; 33 | val->nested = 0; 34 | return 0; 35 | } 36 | 37 | TRACEPOINT_PROBE(kvm, kvm_nested_vmexit) { 38 | u64 id = bpf_get_current_pid_tgid(); 39 | struct tmp *val = start_time.lookup(&id); 40 | if (val != 0){ 41 | val->nested = 1; 42 | }else{ 43 | // something wrong 44 | } 45 | return 0; 46 | } 47 | 48 | TRACEPOINT_PROBE(kvm, kvm_entry) { 49 | u64 id = bpf_get_current_pid_tgid(); 50 | struct tmp *st = start_time.lookup(&id); 51 | if (st != 0){ 52 | unsigned int exit_reason = st->exit_reason; 53 | struct value zero = {.cumulative_time = 0, .count = 0}; 54 | struct value* val; 55 | if(st->nested == 0){ 56 | val = counts.lookup_or_init(&exit_reason, &zero); 57 | }else{ 58 | val = counts_nested.lookup_or_init(&exit_reason, &zero); 59 | } 60 | val->cumulative_time += bpf_ktime_get_ns() - st->time; 61 | val->count += 1; 62 | } 63 | return 0; 64 | } 65 | """ 66 | 67 | 68 | def main(): 69 | # load BPF program 70 | b = BPF(text=text) 71 | 72 | print("Tracing... Hit Ctrl-C to end.") 73 | try: 74 | sleep(99999999) 75 | except KeyboardInterrupt: 76 | pass 77 | print() 78 | 79 | result = {} 80 | for table_name in ("counts", "counts_nested"): 81 | cs = b.get_table(table_name) 82 | result[table_name] = {} 83 | for k, v in cs.items(): 84 | k, c, t = k.value, v.count, v.cumulative_time 85 | result[table_name][EXIT_REASON[k]] = {"count": c, "cumul_time": t} 86 | 87 | print("{:3s} {:18s} {:>8s} {:>10s} {:>8s} {:>10s} {:>8s} {:>10s}".format( 88 | "", "Exit reason", "Total", "(Avg Time)", "L1", "(Avg Time)", "L2", "(Avg Time)")) 89 | l1_total = 1 90 | l2_total = 0 91 | l1_time_total = 0 92 | l2_time_total = 0 93 | for i, e in enumerate(EXIT_REASON): 94 | cc = 0 95 | ct = 0 96 | c_avg = 0 97 | cnc = 0 98 | cnt = 0 99 | cn_avg = 0 100 | if e in result["counts"]: 101 | c = result["counts"][e] 102 | cc, ct = c["count"], c["cumul_time"] 103 | c_avg = ct / float(cc) 104 | if e in result["counts_nested"]: 105 | cn = result["counts_nested"][e] 106 | cnc, cnt = cn["count"], cn["cumul_time"] 107 | cn_avg = cnt / float(cnc) 108 | if cc > 0 or cnc > 0: 109 | t = cc + cnc 110 | t_avg = (ct + cnt) / float(cc + cnc) 111 | print("{:3d} {:18s} {:8d} ({:8.0f}) {:8d} ({:8.0f}) {:8d} ({:8.0f})".format( 112 | i, e, t, t_avg, cc, c_avg, cnc, cn_avg)) 113 | l1_total += cc 114 | l2_total += cnc 115 | l1_time_total += ct 116 | l2_time_total += cnt 117 | print("{:3s} {:18s} {:8d} ({:8.0f}) {:8d} ({:8.0f}) {:8d} ({:8.0f})".format("", "Total (Avg Time)", l1_total + l2_total, (l1_time_total + l2_time_total) / (l1_total + l2_total), 118 | l1_total, l1_time_total / l1_total, 119 | l2_total, l2_time_total / l2_total)) 120 | 121 | 122 | if __name__ == "__main__": 123 | main() 124 | -------------------------------------------------------------------------------- /kvm_vmexit_time_hist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | from collections import defaultdict 4 | from time import sleep 5 | 6 | from bcc import BPF 7 | 8 | from exit_reason import EXIT_REASON 9 | 10 | # Measure the time between vmexit and vmentry 11 | 12 | text = """ 13 | BPF_HISTOGRAM(dist); 14 | BPF_HASH(time, int, u64); 15 | 16 | // I found that kvm_exit tracepoint event does not have vcpu_id :( 17 | //TRACEPOINT_PROBE(kvm, kvm_exit) { 18 | // u64 zero = 0, *start; 19 | // int vcpu_id = args->vcpu->vpu_id; 20 | // val = time.lookup_or_init(&vcpu_id, &zero); 21 | // *val = bpf_ktime_get_ns(); 22 | // return 0; 23 | //} 24 | 25 | #include 26 | #include 27 | 28 | int kprobe__vmx_handle_exit(struct pt_regs *ctx, struct kvm_vcpu *vcpu){ 29 | u64 zero = 0, *start; 30 | int vcpu_id = vcpu->vcpu_id; 31 | start = time.lookup_or_init(&vcpu_id, &zero); 32 | *start = bpf_ktime_get_ns(); 33 | return 0; 34 | } 35 | 36 | TRACEPOINT_PROBE(kvm, kvm_entry) { 37 | int vcpu_id = args->vcpu_id; 38 | u64 *start = time.lookup(&vcpu_id); 39 | if (start != 0){ 40 | u64 end = bpf_ktime_get_ns(); 41 | dist.increment(bpf_log2l(end-*start)); 42 | } 43 | return 0; 44 | } 45 | """ 46 | 47 | 48 | def main(): 49 | # load BPF program 50 | b = BPF(text=text) 51 | 52 | print("Tracing... Hit Ctrl-C to end.") 53 | try: 54 | sleep(99999999) 55 | except KeyboardInterrupt: 56 | pass 57 | print() 58 | 59 | dist = b["dist"] 60 | dist.print_log2_hist() 61 | 62 | if __name__ == "__main__": 63 | main() 64 | --------------------------------------------------------------------------------