├── README.md ├── readaheadstat.bt ├── readaheadstat.py └── readaheadstat_influxdb.py /README.md: -------------------------------------------------------------------------------- 1 | # Read Ahead Cache Stats 2 | 3 | > :warning: The `readaheadstat.py` tool was originally showcased in "BPF Performance Tools" book by Brendan Gregg. It has now been upstreamed in the official [BCC repository](https://github.com/iovisor/bcc/blob/master/tools/readahead.py) and will be maintained there. you can still refer to the influxdb version here to see how it works. 4 | 5 | This tool shows the performance of the read ahead mechanism in your FS, specifically 6 | higlighting ununsed pages in the cache and how long they have remained there. Cache 7 | misses in read-ahead cache reduces the performance of a FS call and can be an important 8 | metric in understanding how moden architectures affect such metrics in the system. 9 | 10 | ## `readaheadstat.bt` 11 | 12 | This tool requires [`bpftrace`](https://github.com/iovisor/bpftrace) to be installed on the system 13 | 14 | ``` 15 | $ sudo ./readaheadstat.bt 16 | 17 | Attaching 5 probes... 18 | ^C 19 | Readahead unused pages: 15816 20 | 21 | Readahead used page age (ms): 22 | @age_ms: 23 | [0] 2216 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 24 | [1] 74 |@ | 25 | [2, 4) 152 |@@ | 26 | [4, 8) 553 |@@@@@@@ | 27 | [8, 16) 116 |@ | 28 | [16, 32) 81 |@ | 29 | [32, 64) 79 |@ | 30 | [64, 128) 88 |@ | 31 | [128, 256) 601 |@@@@@@@@ | 32 | [256, 512) 157 |@@ | 33 | [512, 1K) 107 |@ | 34 | [1K, 2K) 136 |@ | 35 | [2K, 4K) 3689 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| 36 | [4K, 8K) 82 |@ | 37 | [8K, 16K) 1170 |@@@@@@@@@@@@@@@@ | 38 | ``` 39 | 40 | ## `readaheadstat.py` 41 | 42 | This tool requires [BCC](https://github.com/iovisor/bcc) to be installed on your system 43 | 44 | ``` 45 | $ sudo ./readaheadstat.py 46 | 47 | Tracing... Hit Ctrl-C to end. 48 | ^C 49 | Read-ahead unused pages: 3630 50 | Histogram of read-ahead used page age (ms) 51 | ========================================== 52 | ms : count distribution 53 | 0 -> 1 : 136 |***************** | 54 | 2 -> 3 : 0 | | 55 | 4 -> 7 : 0 | | 56 | 8 -> 15 : 1 | | 57 | 16 -> 31 : 0 | | 58 | 32 -> 63 : 0 | | 59 | 64 -> 127 : 0 | | 60 | 128 -> 255 : 0 | | 61 | 256 -> 511 : 43 |***** | 62 | 512 -> 1023 : 0 | | 63 | 1024 -> 2047 : 312 |****************************************| 64 | ``` 65 | 66 | -------------------------------------------------------------------------------- /readaheadstat.bt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bpftrace 2 | 3 | # This was originally created for the BPF Performance Tools book 4 | # published by Addison Wesley. ISBN-13: 9780136554820 5 | # When copying or porting, include this comment. 6 | 7 | kprobe:__do_page_cache_readahead { @in_readahead[tid] = 1; } 8 | kretprobe:__do_page_cache_readahead { @in_readahead[tid] = 0; } 9 | 10 | kretprobe:__page_cache_alloc 11 | /@in_readahead[tid]/ 12 | { 13 | @birth[retval] = nsecs; 14 | @rapages++; 15 | } 16 | 17 | kprobe:mark_page_accessed 18 | /@birth[arg0]/ 19 | { 20 | @age_ms = hist((nsecs - @birth[arg0]) / 1000000); 21 | delete(@birth[arg0]); 22 | @rapages--; 23 | } 24 | 25 | END 26 | { 27 | printf("\nReadahead unused pages: %d\n", @rapages); 28 | printf("\nReadahead used page age (ms):\n"); 29 | print(@age_ms); clear(@age_ms); 30 | clear(@birth); clear(@in_readahead); clear(@rapages); 31 | } 32 | -------------------------------------------------------------------------------- /readaheadstat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # @lint-avoid-python-3-compatibility-imports 3 | # 4 | # readahead Show performance of read-ahead cache 5 | # For Linux, uses BCC, eBPF 6 | # 7 | # Copyright (c) 2020 Suchakra Sharma 8 | # Licensed under the Apache License, Version 2.0 (the "License") 9 | # This was originally created for the BPF Performance Tools book 10 | # published by Addison Wesley. ISBN-13: 9780136554820 11 | # When copying or porting, include this comment. 12 | # 13 | # 20-Aug-2020 Suchakra Sharma Ported from bpftrace to BCC 14 | 15 | from __future__ import print_function 16 | from bcc import BPF 17 | from time import sleep 18 | import ctypes as ct 19 | import argparse 20 | 21 | # arguments 22 | examples = """examples: 23 | ./readahead -d 20 # monitor for 10 seconds and generate stats 24 | """ 25 | 26 | parser = argparse.ArgumentParser( 27 | description="Monitor performance of read ahead cache", 28 | formatter_class=argparse.RawDescriptionHelpFormatter, 29 | epilog=examples) 30 | parser.add_argument("-d", "--duration", type=int, 31 | help="total duration to monitor for, in seconds") 32 | args = parser.parse_args() 33 | if not args.duration: 34 | args.duration = 99999999 35 | 36 | # BPF program 37 | program = """ 38 | #include 39 | #include 40 | BPF_HASH(flag, u32, u8); // used to track if we are in do_page_cache_readahead() 41 | BPF_HASH(birth, struct page*, u64); // used to track timestamps of cache alloc'ed page 42 | BPF_ARRAY(pages); // increment/decrement readahead pages 43 | BPF_HISTOGRAM(dist); 44 | int entry__do_page_cache_readahead(struct pt_regs *ctx) { 45 | u32 pid; 46 | u8 one = 1; 47 | pid = bpf_get_current_pid_tgid(); 48 | flag.update(&pid, &one); 49 | return 0; 50 | } 51 | int exit__do_page_cache_readahead(struct pt_regs *ctx) { 52 | u32 pid; 53 | u8 zero = 0; 54 | pid = bpf_get_current_pid_tgid(); 55 | flag.update(&pid, &zero); 56 | return 0; 57 | } 58 | int exit__page_cache_alloc(struct pt_regs *ctx) { 59 | u32 pid; 60 | u64 ts; 61 | struct page *retval = (struct page*) PT_REGS_RC(ctx); 62 | u32 zero = 0; // static key for accessing pages[0] 63 | pid = bpf_get_current_pid_tgid(); 64 | u8 *f = flag.lookup(&pid); 65 | if (f != NULL && *f == 1) { 66 | ts = bpf_ktime_get_ns(); 67 | birth.update(&retval, &ts); 68 | u64 *count = pages.lookup(&zero); 69 | if (count) (*count)++; // increment read ahead pages count 70 | } 71 | return 0; 72 | } 73 | int entry_mark_page_accessed(struct pt_regs *ctx) { 74 | u64 ts, delta; 75 | struct page *arg0 = (struct page *) PT_REGS_PARM1(ctx); 76 | u32 zero = 0; // static key for accessing pages[0] 77 | u64 *bts = birth.lookup(&arg0); 78 | if (bts != NULL) { 79 | delta = bpf_ktime_get_ns() - *bts; 80 | dist.increment(bpf_log2l(delta/1000000)); 81 | u64 *count = pages.lookup(&zero); 82 | if (count) (*count)--; // decrement read ahead pages count 83 | birth.delete(&arg0); // remove the entry from hashmap 84 | } 85 | return 0; 86 | } 87 | """ 88 | 89 | b = BPF(text=program) 90 | b.attach_kprobe(event="__do_page_cache_readahead", fn_name="entry__do_page_cache_readahead") 91 | b.attach_kretprobe(event="__do_page_cache_readahead", fn_name="exit__do_page_cache_readahead") 92 | b.attach_kretprobe(event="__page_cache_alloc", fn_name="exit__page_cache_alloc") 93 | b.attach_kprobe(event="mark_page_accessed", fn_name="entry_mark_page_accessed") 94 | 95 | # header 96 | print("Tracing... Hit Ctrl-C to end.") 97 | 98 | # print 99 | def print_stats(): 100 | print() 101 | print("Read-ahead unused pages: %d" % (b["pages"][ct.c_ulong(0)].value)) 102 | print("Histogram of read-ahead used page age (ms):") 103 | print("") 104 | b["dist"].print_log2_hist("age (ms)") 105 | b["dist"].clear() 106 | b["pages"].clear() 107 | 108 | while True: 109 | try: 110 | sleep(args.duration) 111 | print_stats() 112 | except KeyboardInterrupt: 113 | print_stats() 114 | break 115 | -------------------------------------------------------------------------------- /readaheadstat_influxdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # @lint-avoid-python-3-compatibility-imports 3 | # 4 | # readaheadstat Count unused pages in read ahead cache with age 5 | # For Linux, uses bpftrace, eBPF 6 | # 7 | # Copyright (c) 2020 Suchakra Sharma 8 | # Licensed under the Apache License, Version 2.0 (the "License") 9 | # 10 | # 20-Aug-2020 Suchakra Sharma Created this. 11 | 12 | from __future__ import print_function 13 | from bcc import BPF 14 | import bcc 15 | from time import sleep 16 | from influxdb import InfluxDBClient 17 | import ctypes as ct 18 | 19 | program = """ 20 | #include 21 | #include 22 | 23 | BPF_HASH(flag, u32, u8); // used to track if we are in do_page_cache_readahead() 24 | BPF_HASH(birth, struct page*, u64); // used to track timestamps of cache alloc'ed page 25 | BPF_ARRAY(pages); // increment/decrement readahead pages 26 | BPF_HISTOGRAM(dist); 27 | 28 | int entry__do_page_cache_readahead(struct pt_regs *ctx) { 29 | u32 pid; 30 | u8 one = 1; 31 | pid = bpf_get_current_pid_tgid(); 32 | flag.update(&pid, &one); 33 | return 0; 34 | } 35 | 36 | int exit__do_page_cache_readahead(struct pt_regs *ctx) { 37 | u32 pid; 38 | u8 zero = 0; 39 | pid = bpf_get_current_pid_tgid(); 40 | flag.update(&pid, &zero); 41 | return 0; 42 | } 43 | 44 | int exit__page_cache_alloc(struct pt_regs *ctx) { 45 | u32 pid; 46 | u64 ts; 47 | struct page *retval = (struct page*) PT_REGS_RC(ctx); 48 | u32 zero = 0; // static key for accessing pages[0] 49 | pid = bpf_get_current_pid_tgid(); 50 | u8 *f = flag.lookup(&pid); 51 | if (f != NULL && *f == 1) { 52 | ts = bpf_ktime_get_ns(); 53 | birth.update(&retval, &ts); 54 | 55 | u64 *count = pages.lookup(&zero); 56 | if (count) (*count)++; // increment read ahead pages count 57 | } 58 | return 0; 59 | } 60 | 61 | int entry_mark_page_accessed(struct pt_regs *ctx) { 62 | u64 ts, delta; 63 | struct page *arg0 = (struct page *) PT_REGS_PARM1(ctx); 64 | u32 zero = 0; // static key for accessing pages[0] 65 | u64 *bts = birth.lookup(&arg0); 66 | if (bts != NULL) { 67 | delta = bpf_ktime_get_ns() - *bts; 68 | dist.increment(bpf_log2l(delta/1000000)); 69 | 70 | u64 *count = pages.lookup(&zero); 71 | if (count) (*count)--; // decrement read ahead pages count 72 | 73 | birth.delete(&arg0); // remove the entry from hashmap 74 | } 75 | return 0; 76 | } 77 | """ 78 | do_exit = 0 79 | b = BPF(text=program) 80 | b.attach_kprobe(event="__do_page_cache_readahead", fn_name="entry__do_page_cache_readahead") 81 | b.attach_kretprobe(event="__do_page_cache_readahead", fn_name="exit__do_page_cache_readahead") 82 | b.attach_kretprobe(event="__page_cache_alloc", fn_name="exit__page_cache_alloc") 83 | b.attach_kprobe(event="mark_page_accessed", fn_name="entry_mark_page_accessed") 84 | 85 | dbClient = InfluxDBClient('localhost', 8086, 'root', 'root', 'ReadAHead') 86 | 87 | # Write the time series data points into database - user login details 88 | dbClient.create_database('ReadAHead') 89 | 90 | while (1): 91 | try: 92 | sleep(5) 93 | except KeyboardInterrupt: 94 | pass; do_exit = 1 95 | usedPage = 0 96 | for k, v in b["dist"].items(): 97 | usedPage = usedPage + v.value 98 | logEvents= [{"measurement":"eBPF", 99 | "fields": 100 | { 101 | "{category}".format(category = k.value):v.value, 102 | } 103 | } 104 | ] 105 | 106 | dbClient.write_points(logEvents) 107 | #print(logEvents) 108 | b["dist"].print_log2_hist("usecs") 109 | logEvents= [{"measurement":"eBPF", 110 | "fields": 111 | { 112 | "unused":b["pages"][ct.c_ulong(0)].value, 113 | "used":usedPage 114 | } 115 | } 116 | ] 117 | dbClient.write_points(logEvents) 118 | if do_exit: 119 | exit() 120 | --------------------------------------------------------------------------------