#!/usr/bin/python # @lint-avoid-python-3-compatibility-imports # # readahead Show performance of read-ahead cache # For Linux, uses BCC, eBPF # # Copyright (c) 2020 Suchakra Sharma # Licensed under the Apache License, Version 2.0 (the "License") # This was originally created for the BPF Performance Tools book # published by Addison Wesley. ISBN-13: 9780136554820 # When copying or porting, include this comment. # # 20-Aug-2020 Suchakra Sharma Ported from bpftrace to BCC # 17-Sep-2021 Hengqi Chen Migrated to kfunc from __future__ import print_function from bcc import BPF from time import sleep import ctypes as ct import argparse # arguments examples = """examples: ./readahead -d 20 # monitor for 20 seconds and generate stats """ parser = argparse.ArgumentParser( description="Monitor performance of read ahead cache", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples) parser.add_argument("-d", "--duration", type=int, help="total duration to monitor for, in seconds") args = parser.parse_args() if not args.duration: args.duration = 99999999 # BPF program bpf_text = """ #include #include BPF_HASH(flag, u32, u8); // used to track if we are in do_page_cache_readahead() BPF_HASH(birth, struct page*, u64); // used to track timestamps of cache alloc'ed page BPF_ARRAY(pages); // increment/decrement readahead pages BPF_HISTOGRAM(dist); """ bpf_text_kprobe = """ int entry__do_page_cache_readahead(struct pt_regs *ctx) { u32 pid; u8 one = 1; pid = bpf_get_current_pid_tgid(); flag.update(&pid, &one); return 0; } int exit__do_page_cache_readahead(struct pt_regs *ctx) { u32 pid; u8 zero = 0; pid = bpf_get_current_pid_tgid(); flag.update(&pid, &zero); return 0; } int exit__page_cache_alloc(struct pt_regs *ctx) { u32 pid; u64 ts; struct page *retval = (struct page*) PT_REGS_RC(ctx); u32 zero = 0; // static key for accessing pages[0] pid = bpf_get_current_pid_tgid(); u8 *f = flag.lookup(&pid); if (f != NULL && *f == 1) { ts = bpf_ktime_get_ns(); birth.update(&retval, &ts); pages.atomic_increment(zero); } return 0; } int entry_mark_page_accessed(struct pt_regs *ctx) { u64 ts, delta; struct page *arg0 = (struct page *) PT_REGS_PARM1(ctx); u32 zero = 0; // static key for accessing pages[0] u64 *bts = birth.lookup(&arg0); if (bts != NULL) { delta = bpf_ktime_get_ns() - *bts; dist.atomic_increment(bpf_log2l(delta/1000000)); pages.atomic_increment(zero, -1); birth.delete(&arg0); // remove the entry from hashmap } return 0; } """ bpf_text_kfunc = """ KFUNC_PROBE(RA_FUNC) { u32 pid = bpf_get_current_pid_tgid(); u8 one = 1; flag.update(&pid, &one); return 0; } KRETFUNC_PROBE(RA_FUNC) { u32 pid = bpf_get_current_pid_tgid(); u8 zero = 0; flag.update(&pid, &zero); return 0; } KRETFUNC_PROBE(__page_cache_alloc, gfp_t gfp, struct page *retval) { u64 ts; u32 zero = 0; // static key for accessing pages[0] u32 pid = bpf_get_current_pid_tgid(); u8 *f = flag.lookup(&pid); if (f != NULL && *f == 1) { ts = bpf_ktime_get_ns(); birth.update(&retval, &ts); pages.atomic_increment(zero); } return 0; } KFUNC_PROBE(mark_page_accessed, struct page *arg0) { u64 ts, delta; u32 zero = 0; // static key for accessing pages[0] u64 *bts = birth.lookup(&arg0); if (bts != NULL) { delta = bpf_ktime_get_ns() - *bts; dist.atomic_increment(bpf_log2l(delta/1000000)); pages.atomic_increment(zero, -1); birth.delete(&arg0); // remove the entry from hashmap } return 0; } """ if BPF.support_kfunc(): if BPF.get_kprobe_functions(b"__do_page_cache_readahead"): ra_func = "__do_page_cache_readahead" else: ra_func = "do_page_cache_ra" bpf_text += bpf_text_kfunc.replace("RA_FUNC", ra_func) b = BPF(text=bpf_text) else: bpf_text += bpf_text_kprobe b = BPF(text=bpf_text) if BPF.get_kprobe_functions(b"__do_page_cache_readahead"): ra_event = "__do_page_cache_readahead" else: ra_event = "do_page_cache_ra" b.attach_kprobe(event=ra_event, fn_name="entry__do_page_cache_readahead") b.attach_kretprobe(event=ra_event, fn_name="exit__do_page_cache_readahead") b.attach_kretprobe(event="__page_cache_alloc", fn_name="exit__page_cache_alloc") b.attach_kprobe(event="mark_page_accessed", fn_name="entry_mark_page_accessed") # header print("Tracing... Hit Ctrl-C to end.") # print def print_stats(): print() print("Read-ahead unused pages: %d" % (b["pages"][ct.c_ulong(0)].value)) print("Histogram of read-ahead used page age (ms):") print("") b["dist"].print_log2_hist("age (ms)") b["dist"].clear() b["pages"].clear() while True: try: sleep(args.duration) print_stats() except KeyboardInterrupt: print_stats() break