forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
funcslower: Trace slow kernel or user function calls
This tool attaches to entry and return points of specified functions and traces function calls slower than a given threshold. Resolves iovisor#915.
- Loading branch information
Showing
1 changed file
with
227 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
#!/usr/bin/python | ||
# @lint-avoid-python-3-compatibility-imports | ||
# | ||
# funcslower Trace slow kernel or user function calls. | ||
# For Linux, uses BCC, eBPF. | ||
# | ||
# USAGE: funcslower [-h] [-p PID] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] | ||
# [-T] [-t] [-v] function [function ...] | ||
# | ||
# WARNING: This tool traces function calls by instrumenting the entry and | ||
# return from each function. For commonly-invoked functions like memory allocs | ||
# or file writes, this can be extremely expensive. Mind the overhead. | ||
# | ||
# By default, a minimum millisecond threshold of 1 is used. | ||
# | ||
# Copyright 2017, Sasha Goldshtein | ||
# Licensed under the Apache License, Version 2.0 (the "License") | ||
# | ||
# 30-Mar-2017 Sasha Goldshtein Created this. | ||
|
||
from __future__ import print_function | ||
from bcc import BPF | ||
import argparse | ||
import ctypes as ct | ||
import time | ||
|
||
examples = """examples: | ||
./funcslower vfs_write # trace vfs_write calls slower than 1ms | ||
./funcslower -m 10 vfs_write # same, but slower than 10ms | ||
./funcslower -u 10 c:open # trace open calls slower than 10us | ||
./funcslower -p 135 c:open # trace pid 135 only | ||
./funcslower c:malloc c:free # trace both malloc and free slower than 1ms | ||
./funcslower -a 2 c:open # show first two arguments to open | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Trace slow kernel or user function calls.", | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
epilog=examples) | ||
parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid", | ||
help="trace this PID only") | ||
parser.add_argument("-m", "--min-ms", type=float, dest="min_ms", | ||
help="minimum duration to trace (ms)") | ||
parser.add_argument("-u", "--min-us", type=float, dest="min_us", | ||
help="minimum duration to trace (us)") | ||
parser.add_argument("-a", "--arguments", type=int, | ||
help="print this many entry arguments, as hex") | ||
parser.add_argument("-T", "--time", action="store_true", | ||
help="show HH:MM:SS timestamp") | ||
parser.add_argument("-t", "--timestamp", action="store_true", | ||
help="show timestamp in seconds at us resolution") | ||
parser.add_argument("-v", "--verbose", action="store_true", | ||
help="print the BPF program for debugging purposes") | ||
parser.add_argument(metavar="function", nargs="+", dest="functions", | ||
help="function(s) to trace") | ||
|
||
args = parser.parse_args() | ||
# fractions are allowed, but rounded to an integer nanosecond | ||
if args.min_ms: | ||
duration_ns = int(args.min_ms * 1000000) | ||
elif args.min_us: | ||
duration_ns = int(args.min_us * 1000) | ||
else: | ||
duration_ns = 1000000 # default to 1ms | ||
|
||
bpf_text = """ | ||
#include <uapi/linux/ptrace.h> | ||
#include <linux/sched.h> // for TASK_COMM_LEN | ||
struct entry_t { | ||
u64 id; | ||
u64 start_ns; | ||
#ifdef GRAB_ARGS | ||
u64 args[6]; | ||
#endif | ||
}; | ||
struct data_t { | ||
u64 id; | ||
u64 tgid_pid; | ||
u64 start_ns; | ||
u64 duration_ns; | ||
u64 retval; | ||
char comm[TASK_COMM_LEN]; | ||
#ifdef GRAB_ARGS | ||
u64 args[6]; | ||
#endif | ||
}; | ||
BPF_HASH(entryinfo, u64, struct entry_t); | ||
BPF_PERF_OUTPUT(events); | ||
static int trace_entry(struct pt_regs *ctx, int id) | ||
{ | ||
u64 tgid_pid = bpf_get_current_pid_tgid(); | ||
u32 tgid = tgid_pid >> 32; | ||
if (TGID_FILTER) | ||
return 0; | ||
u32 pid = tgid_pid; | ||
struct entry_t entry = {}; | ||
entry.start_ns = bpf_ktime_get_ns(); | ||
entry.id = id; | ||
#ifdef GRAB_ARGS | ||
entry.args[0] = PT_REGS_PARM1(ctx); | ||
entry.args[1] = PT_REGS_PARM2(ctx); | ||
entry.args[2] = PT_REGS_PARM3(ctx); | ||
entry.args[3] = PT_REGS_PARM4(ctx); | ||
entry.args[4] = PT_REGS_PARM5(ctx); | ||
entry.args[5] = PT_REGS_PARM6(ctx); | ||
#endif | ||
entryinfo.update(&tgid_pid, &entry); | ||
return 0; | ||
} | ||
int trace_return(struct pt_regs *ctx) | ||
{ | ||
struct entry_t *entryp; | ||
u64 tgid_pid = bpf_get_current_pid_tgid(); | ||
entryp = entryinfo.lookup(&tgid_pid); | ||
if (entryp == 0) { | ||
return 0; | ||
} | ||
u64 delta_ns = bpf_ktime_get_ns() - entryp->start_ns; | ||
entryinfo.delete(&tgid_pid); | ||
if (delta_ns < DURATION_NS) | ||
return 0; | ||
struct data_t data = {}; | ||
data.id = entryp->id; | ||
data.tgid_pid = tgid_pid; | ||
data.start_ns = entryp->start_ns; | ||
data.duration_ns = delta_ns; | ||
data.retval = PT_REGS_RC(ctx); | ||
#ifdef GRAB_ARGS | ||
bpf_probe_read(&data.args[0], sizeof(data.args), entryp->args); | ||
#endif | ||
bpf_get_current_comm(&data.comm, sizeof(data.comm)); | ||
events.perf_submit(ctx, &data, sizeof(data)); | ||
return 0; | ||
} | ||
""" | ||
|
||
bpf_text = bpf_text.replace('DURATION_NS', str(duration_ns)) | ||
if args.arguments: | ||
bpf_text = "#define GRAB_ARGS\n" + bpf_text | ||
if args.tgid: | ||
bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % tgid) | ||
else: | ||
bpf_text = bpf_text.replace('TGID_FILTER', '0') | ||
|
||
for i in range(len(args.functions)): | ||
bpf_text += """ | ||
int trace_%d(struct pt_regs *ctx) { | ||
return trace_entry(ctx, %d); | ||
} | ||
""" % (i, i) | ||
|
||
if args.verbose: | ||
print(bpf_text) | ||
|
||
b = BPF(text=bpf_text) | ||
|
||
for i, function in enumerate(args.functions): | ||
if ":" in function: | ||
library, func = function.split(":") | ||
b.attach_uprobe(name=library, sym=func, fn_name="trace_%d" % i) | ||
b.attach_uretprobe(name=library, sym=func, fn_name="trace_return") | ||
else: | ||
b.attach_kprobe(event=function, fn_name="trace_%d" % i) | ||
b.attach_kretprobe(event=function, fn_name="trace_return") | ||
|
||
TASK_COMM_LEN = 16 # linux/sched.h | ||
|
||
class Data(ct.Structure): | ||
_fields_ = [ | ||
("id", ct.c_ulonglong), | ||
("tgid_pid", ct.c_ulonglong), | ||
("start_ns", ct.c_ulonglong), | ||
("duration_ns", ct.c_ulonglong), | ||
("retval", ct.c_ulonglong), | ||
("comm", ct.c_char * TASK_COMM_LEN) | ||
] + ([("args", ct.c_ulonglong * 6)] if args.arguments else []) | ||
|
||
time_designator = "us" if args.min_us else "ms" | ||
time_value = args.min_us or args.min_ms or 1 | ||
time_multiplier = 1000 if args.min_us else 1000000 | ||
time_col = args.time or args.timestamp | ||
print("Tracing function calls slower than %g %s... Ctrl+C to quit." % | ||
(time_value, time_designator)) | ||
print((("%-10s " % "TIME" if time_col else "") + "%-14s %-6s %7s %16s %s") % | ||
("COMM", "PID", "LAT(%s)" % time_designator, "RVAL", | ||
"FUNC" + (" ARGS" if args.arguments else ""))) | ||
|
||
earliest_ts = 0 | ||
|
||
def time_str(event): | ||
if args.time: | ||
return "%-10s " % time.strftime("%H:%M:%S") | ||
if args.timestamp: | ||
global earliest_ts | ||
if earliest_ts == 0: | ||
earliest_ts = event.start_ns | ||
return "%-10.6f " % ((event.start_ns - earliest_ts) / 1000000000.0) | ||
return "" | ||
|
||
def args_str(event): | ||
if not args.arguments: | ||
return "" | ||
return str.join(" ", ["0x%x" % arg for arg in event.args[:args.arguments]]) | ||
|
||
def print_event(cpu, data, size): | ||
event = ct.cast(data, ct.POINTER(Data)).contents | ||
ts = float(event.duration_ns) / time_multiplier | ||
print((time_str(event) + "%-14.14s %-6s %7.2f %16x %s %s") % | ||
(event.comm.decode(), event.tgid_pid >> 32, | ||
ts, event.retval, args.functions[event.id], args_str(event))) | ||
|
||
b["events"].open_perf_buffer(print_event, page_cnt=64) | ||
while True: | ||
b.kprobe_poll() |