#!/usr/bin/env python # # cachestat Count cache kernel function calls. # For Linux, uses BCC, eBPF. See .c file. # # USAGE: cachestat # Taken from funccount by Brendan Gregg # This is a rewrite of cachestat from perf to bcc # https://github.com/brendangregg/perf-tools/blob/master/fs/cachestat # # Copyright (c) 2016 Allan McAleavy. # Copyright (c) 2015 Brendan Gregg. # Licensed under the Apache License, Version 2.0 (the "License") # # 09-Sep-2015 Brendan Gregg Created this. # 06-Nov-2015 Allan McAleavy # 13-Jan-2016 Allan McAleavy run pep8 against program # 02-Feb-2019 Brendan Gregg Column shuffle, bring back %ratio # 15-Feb-2023 Rong Tao Add writeback_dirty_{folio,page} tracepoints from __future__ import print_function from bcc import BPF from time import sleep, strftime import argparse import signal import re from sys import argv # signal handler def signal_ignore(signal, frame): print() # Function to gather data from /proc/meminfo # return dictionary for quicker lookup of both values def get_meminfo(): result = dict() for line in open('/proc/meminfo'): k = line.split(':', 3) v = k[1].split() result[k[0]] = int(v[0]) return result # set global variables mpa = 0 mbd = 0 apcl = 0 apd = 0 total = 0 misses = 0 hits = 0 debug = 0 # arguments parser = argparse.ArgumentParser( description="Count cache kernel function calls", formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-T", "--timestamp", action="store_true", help="include timestamp on output") parser.add_argument("interval", nargs="?", default=1, help="output interval, in seconds") parser.add_argument("count", nargs="?", default=-1, help="number of outputs") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) args = parser.parse_args() count = int(args.count) tstamp = args.timestamp interval = int(args.interval) # define BPF program bpf_text = """ #include struct key_t { // NF_{APCL,MPA,MBD,APD} u32 nf; }; enum { NF_APCL, NF_MPA, NF_MBD, NF_APD, }; BPF_HASH(counts, struct key_t); static int __do_count(void *ctx, u32 nf) { struct key_t key = {}; u64 ip; key.nf = nf; counts.atomic_increment(key); // update counter return 0; } int do_count_apcl(struct pt_regs *ctx) { return __do_count(ctx, NF_APCL); } int do_count_mpa(struct pt_regs *ctx) { return __do_count(ctx, NF_MPA); } int do_count_mbd(struct pt_regs *ctx) { return __do_count(ctx, NF_MBD); } int do_count_apd(struct pt_regs *ctx) { return __do_count(ctx, NF_APD); } int do_count_apd_tp(void *ctx) { return __do_count(ctx, NF_APD); } """ if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # load BPF program b = BPF(text=bpf_text) b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count_apcl") b.attach_kprobe(event="mark_page_accessed", fn_name="do_count_mpa") # Function account_page_dirtied() is changed to folio_account_dirtied() in 5.15. # Both folio_account_dirtied() and account_page_dirtied() are # static functions and they may be gone during compilation and this may # introduce some inaccuracy, use tracepoint writeback_dirty_{page,folio}, # instead when attaching kprobe fails, and report the running # error in time. if BPF.get_kprobe_functions(b'folio_account_dirtied'): b.attach_kprobe(event="folio_account_dirtied", fn_name="do_count_apd") elif BPF.get_kprobe_functions(b'account_page_dirtied'): b.attach_kprobe(event="account_page_dirtied", fn_name="do_count_apd") elif BPF.tracepoint_exists("writeback", "writeback_dirty_folio"): b.attach_tracepoint(tp="writeback:writeback_dirty_folio", fn_name="do_count_apd_tp") elif BPF.tracepoint_exists("writeback", "writeback_dirty_page"): b.attach_tracepoint(tp="writeback:writeback_dirty_page", fn_name="do_count_apd_tp") else: raise Exception("Failed to attach kprobe %s or %s or any tracepoint" % ("folio_account_dirtied", "account_page_dirtied")) b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count_mbd") # header if tstamp: print("%-8s " % "TIME", end="") print("%8s %8s %8s %8s %12s %10s" % ("HITS", "MISSES", "DIRTIES", "HITRATIO", "BUFFERS_MB", "CACHED_MB")) loop = 0 exiting = 0 while 1: if count > 0: loop += 1 if loop > count: exit() try: sleep(interval) except KeyboardInterrupt: exiting = 1 # as cleanup can take many seconds, trap Ctrl-C: signal.signal(signal.SIGINT, signal_ignore) counts = b["counts"] for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): # partial string matches in case of .isra (necessary?) if k.nf == 0: # NF_APCL apcl = max(0, v.value) if k.nf == 1: # NF_MPA mpa = max(0, v.value) if k.nf == 2: # NF_MBD mbd = max(0, v.value) if k.nf == 3: # NF_APD apd = max(0, v.value) # total = total cache accesses without counting dirties # misses = total of add to lru because of read misses total = mpa - mbd misses = apcl - apd if misses < 0: misses = 0 if total < 0: total = 0 hits = total - misses # If hits are < 0, then its possible misses are overestimated # due to possibly page cache read ahead adding more pages than # needed. In this case just assume misses as total and reset hits. if hits < 0: misses = total hits = 0 ratio = 0 if total > 0: ratio = float(hits) / total if debug: print("%d %d %d %d %d %d %d\n" % (mpa, mbd, apcl, apd, total, misses, hits)) counts.clear() # Get memory info mem = get_meminfo() cached = int(mem["Cached"]) / 1024 buff = int(mem["Buffers"]) / 1024 if tstamp: print("%-8s " % strftime("%H:%M:%S"), end="") print("%8d %8d %8d %7.2f%% %12.0f %10.0f" % (hits, misses, mbd, 100 * ratio, buff, cached)) mpa = mbd = apcl = apd = total = misses = hits = cached = buff = 0 if exiting: print("Detaching...") exit()