diff --git a/tools/funcslower.py b/tools/funcslower.py new file mode 100755 index 000000000000..b8ef230ae9e4 --- /dev/null +++ b/tools/funcslower.py @@ -0,0 +1,227 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# funcslower Trace slow kernel or user function calls. +# For Linux, uses BCC, eBPF. +# +# USAGE: funcslower [-h] [-p PID] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] +# [-T] [-t] [-v] function [function ...] +# +# WARNING: This tool traces function calls by instrumenting the entry and +# return from each function. For commonly-invoked functions like memory allocs +# or file writes, this can be extremely expensive. Mind the overhead. +# +# By default, a minimum millisecond threshold of 1 is used. +# +# Copyright 2017, Sasha Goldshtein +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 30-Mar-2017 Sasha Goldshtein Created this. + +from __future__ import print_function +from bcc import BPF +import argparse +import ctypes as ct +import time + +examples = """examples: + ./funcslower vfs_write # trace vfs_write calls slower than 1ms + ./funcslower -m 10 vfs_write # same, but slower than 10ms + ./funcslower -u 10 c:open # trace open calls slower than 10us + ./funcslower -p 135 c:open # trace pid 135 only + ./funcslower c:malloc c:free # trace both malloc and free slower than 1ms + ./funcslower -a 2 c:open # show first two arguments to open +""" +parser = argparse.ArgumentParser( + description="Trace slow kernel or user function calls.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid", + help="trace this PID only") +parser.add_argument("-m", "--min-ms", type=float, dest="min_ms", + help="minimum duration to trace (ms)") +parser.add_argument("-u", "--min-us", type=float, dest="min_us", + help="minimum duration to trace (us)") +parser.add_argument("-a", "--arguments", type=int, + help="print this many entry arguments, as hex") +parser.add_argument("-T", "--time", action="store_true", + help="show HH:MM:SS timestamp") +parser.add_argument("-t", "--timestamp", action="store_true", + help="show timestamp in seconds at us resolution") +parser.add_argument("-v", "--verbose", action="store_true", + help="print the BPF program for debugging purposes") +parser.add_argument(metavar="function", nargs="+", dest="functions", + help="function(s) to trace") + +args = parser.parse_args() +# fractions are allowed, but rounded to an integer nanosecond +if args.min_ms: + duration_ns = int(args.min_ms * 1000000) +elif args.min_us: + duration_ns = int(args.min_us * 1000) +else: + duration_ns = 1000000 # default to 1ms + +bpf_text = """ +#include +#include // for TASK_COMM_LEN + +struct entry_t { + u64 id; + u64 start_ns; +#ifdef GRAB_ARGS + u64 args[6]; +#endif +}; + +struct data_t { + u64 id; + u64 tgid_pid; + u64 start_ns; + u64 duration_ns; + u64 retval; + char comm[TASK_COMM_LEN]; +#ifdef GRAB_ARGS + u64 args[6]; +#endif +}; + +BPF_HASH(entryinfo, u64, struct entry_t); +BPF_PERF_OUTPUT(events); + +static int trace_entry(struct pt_regs *ctx, int id) +{ + u64 tgid_pid = bpf_get_current_pid_tgid(); + u32 tgid = tgid_pid >> 32; + if (TGID_FILTER) + return 0; + + u32 pid = tgid_pid; + + struct entry_t entry = {}; + entry.start_ns = bpf_ktime_get_ns(); + entry.id = id; +#ifdef GRAB_ARGS + entry.args[0] = PT_REGS_PARM1(ctx); + entry.args[1] = PT_REGS_PARM2(ctx); + entry.args[2] = PT_REGS_PARM3(ctx); + entry.args[3] = PT_REGS_PARM4(ctx); + entry.args[4] = PT_REGS_PARM5(ctx); + entry.args[5] = PT_REGS_PARM6(ctx); +#endif + + entryinfo.update(&tgid_pid, &entry); + + return 0; +} + +int trace_return(struct pt_regs *ctx) +{ + struct entry_t *entryp; + u64 tgid_pid = bpf_get_current_pid_tgid(); + + entryp = entryinfo.lookup(&tgid_pid); + if (entryp == 0) { + return 0; + } + + u64 delta_ns = bpf_ktime_get_ns() - entryp->start_ns; + entryinfo.delete(&tgid_pid); + + if (delta_ns < DURATION_NS) + return 0; + + struct data_t data = {}; + data.id = entryp->id; + data.tgid_pid = tgid_pid; + data.start_ns = entryp->start_ns; + data.duration_ns = delta_ns; + data.retval = PT_REGS_RC(ctx); +#ifdef GRAB_ARGS + bpf_probe_read(&data.args[0], sizeof(data.args), entryp->args); +#endif + bpf_get_current_comm(&data.comm, sizeof(data.comm)); + events.perf_submit(ctx, &data, sizeof(data)); + + return 0; +} +""" + +bpf_text = bpf_text.replace('DURATION_NS', str(duration_ns)) +if args.arguments: + bpf_text = "#define GRAB_ARGS\n" + bpf_text +if args.tgid: + bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % tgid) +else: + bpf_text = bpf_text.replace('TGID_FILTER', '0') + +for i in range(len(args.functions)): + bpf_text += """ +int trace_%d(struct pt_regs *ctx) { + return trace_entry(ctx, %d); +} +""" % (i, i) + +if args.verbose: + print(bpf_text) + +b = BPF(text=bpf_text) + +for i, function in enumerate(args.functions): + if ":" in function: + library, func = function.split(":") + b.attach_uprobe(name=library, sym=func, fn_name="trace_%d" % i) + b.attach_uretprobe(name=library, sym=func, fn_name="trace_return") + else: + b.attach_kprobe(event=function, fn_name="trace_%d" % i) + b.attach_kretprobe(event=function, fn_name="trace_return") + +TASK_COMM_LEN = 16 # linux/sched.h + +class Data(ct.Structure): + _fields_ = [ + ("id", ct.c_ulonglong), + ("tgid_pid", ct.c_ulonglong), + ("start_ns", ct.c_ulonglong), + ("duration_ns", ct.c_ulonglong), + ("retval", ct.c_ulonglong), + ("comm", ct.c_char * TASK_COMM_LEN) + ] + ([("args", ct.c_ulonglong * 6)] if args.arguments else []) + +time_designator = "us" if args.min_us else "ms" +time_value = args.min_us or args.min_ms or 1 +time_multiplier = 1000 if args.min_us else 1000000 +time_col = args.time or args.timestamp +print("Tracing function calls slower than %g %s... Ctrl+C to quit." % + (time_value, time_designator)) +print((("%-10s " % "TIME" if time_col else "") + "%-14s %-6s %7s %16s %s") % + ("COMM", "PID", "LAT(%s)" % time_designator, "RVAL", + "FUNC" + (" ARGS" if args.arguments else ""))) + +earliest_ts = 0 + +def time_str(event): + if args.time: + return "%-10s " % time.strftime("%H:%M:%S") + if args.timestamp: + global earliest_ts + if earliest_ts == 0: + earliest_ts = event.start_ns + return "%-10.6f " % ((event.start_ns - earliest_ts) / 1000000000.0) + return "" + +def args_str(event): + if not args.arguments: + return "" + return str.join(" ", ["0x%x" % arg for arg in event.args[:args.arguments]]) + +def print_event(cpu, data, size): + event = ct.cast(data, ct.POINTER(Data)).contents + ts = float(event.duration_ns) / time_multiplier + print((time_str(event) + "%-14.14s %-6s %7.2f %16x %s %s") % + (event.comm.decode(), event.tgid_pid >> 32, + ts, event.retval, args.functions[event.id], args_str(event))) + +b["events"].open_perf_buffer(print_event, page_cnt=64) +while True: + b.kprobe_poll()