forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request iovisor#1078 from goldshtn/funcslower
funcslower: Trace slow kernel or user function calls
- Loading branch information
Showing
5 changed files
with
452 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
.TH funcslower 8 "2017-03-30" "USER COMMANDS" | ||
.SH NAME | ||
fileslower \- Trace slow kernel or user function calls. | ||
.SH SYNOPSIS | ||
.B fileslower [\-h] [\-p PID] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T] [-t] [-v] function [function ...] | ||
.SH DESCRIPTION | ||
This script traces a kernel or user function's entry and return points, and | ||
prints a message when the function's latency exceeded the specified threshold. | ||
Multiple functions are supported, and you can mix kernel functions with user | ||
functions in different libraries. | ||
|
||
WARNING: See the OVERHEAD section. | ||
|
||
By default, a minimum millisecond threshold of 1 is used. Recursive functions | ||
are not supported: only the inner-most recursive invocation will be traced. | ||
|
||
Since this uses BPF, only the root user can use this tool. | ||
.SH REQUIREMENTS | ||
CONFIG_BPF and bcc. | ||
.SH OPTIONS | ||
\-p PID | ||
Trace this PID only. | ||
.TP | ||
\-m MIN_NS | ||
Minimum duration to trace, in milliseconds. Default is 1 ms. | ||
.TP | ||
\-u MIN_US | ||
Minimum duration to trace, in microseconds. | ||
.TP | ||
\-a ARGUMENTS | ||
Print the function's arguments, up to 6. | ||
.TP | ||
\-T | ||
Print a HH:MM:SS timestamp with each entry. | ||
.TP | ||
\-t | ||
Print a seconds timestamp with each entry, at microsecond resolution. | ||
.TP | ||
\-v | ||
Print the resulting BPF program, for debugging purposes. | ||
.TP | ||
function | ||
The function to trace -- multiple functions are supported. If a plain function | ||
name is provided, the function is assumed to be a kernel function. For user | ||
functions, provide the library name and the function name, e.g. bash:readline | ||
or c:malloc. | ||
.SH EXAMPLES | ||
.TP | ||
Trace vfs_write calls slower than 1ms: | ||
# | ||
.B funcslower vfs_write | ||
.TP | ||
Trace open() calls in libc slower than 10us: | ||
# | ||
.B funcslower \-u 10 c:open | ||
.TP | ||
Trace both malloc() and free() slower than 10us, in pid 135 only: | ||
# | ||
.B funcslower \-p 135 \-u 10 c:malloc c:free | ||
.TP | ||
Trace the write syscall and print its first 4 arguments: | ||
# | ||
.B funcslower -a 4 SyS_write | ||
.SH FIELDS | ||
.TP | ||
TIME | ||
Time of the event as a human-readable HH:MM:SS format, or a timestamp in seconds | ||
at microsecond-accuracy from the first event seen. | ||
.TP | ||
COMM | ||
Process name. | ||
.TP | ||
PID | ||
Process ID. | ||
.TP | ||
LAT | ||
Latency of the operation in either microseconds (us) or milliseconds (ms). | ||
.TP | ||
RVAL | ||
The return value from the function. Often useful for diagnosing a relationship | ||
between slow and failed function calls. | ||
.TP | ||
FUNC | ||
The function name, followed by its arguments if requested. | ||
.SH OVERHEAD | ||
Depending on the function(s) being traced, overhead can become severe. For | ||
example, tracing a common function like malloc() can slow down a C/C++ program | ||
by a factor of 2 or more. On the other hand, tracing a low-frequency event like | ||
the SyS_setreuid() function will probably not be as prohibitive, and in fact | ||
negligible for functions that are called up to 100-1000 times per second. | ||
|
||
You should first use the funclatency and argdist tools for investigation, | ||
because they summarize data in-kernel and have a much lower overhead than this | ||
tool. To get a general idea of the number of times a particular function is | ||
called (and estimate the overhead), use the funccount tool, e.g.: | ||
.PP | ||
# funccount c:open | ||
.SH SOURCE | ||
This is from bcc. | ||
.IP | ||
https://github.com/iovisor/bcc | ||
.PP | ||
Also look in the bcc distribution for a companion _examples.txt file containing | ||
example usage, output, and commentary for this tool. | ||
.SH OS | ||
Linux | ||
.SH STABILITY | ||
Unstable - in development. | ||
.SH AUTHOR | ||
Sasha Goldshtein | ||
.SH SEE ALSO | ||
funccount(8), funclatency(8), argdist(8), trace(8) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
#!/usr/bin/python | ||
# @lint-avoid-python-3-compatibility-imports | ||
# | ||
# funcslower Trace slow kernel or user function calls. | ||
# For Linux, uses BCC, eBPF. | ||
# | ||
# USAGE: funcslower [-h] [-p PID] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] | ||
# [-T] [-t] [-v] function [function ...] | ||
# | ||
# WARNING: This tool traces function calls by instrumenting the entry and | ||
# return from each function. For commonly-invoked functions like memory allocs | ||
# or file writes, this can be extremely expensive. Mind the overhead. | ||
# | ||
# By default, a minimum millisecond threshold of 1 is used. | ||
# | ||
# Copyright 2017, Sasha Goldshtein | ||
# Licensed under the Apache License, Version 2.0 (the "License") | ||
# | ||
# 30-Mar-2017 Sasha Goldshtein Created this. | ||
|
||
from __future__ import print_function | ||
from bcc import BPF | ||
import argparse | ||
import ctypes as ct | ||
import time | ||
|
||
examples = """examples: | ||
./funcslower vfs_write # trace vfs_write calls slower than 1ms | ||
./funcslower -m 10 vfs_write # same, but slower than 10ms | ||
./funcslower -u 10 c:open # trace open calls slower than 10us | ||
./funcslower -p 135 c:open # trace pid 135 only | ||
./funcslower c:malloc c:free # trace both malloc and free slower than 1ms | ||
./funcslower -a 2 c:open # show first two arguments to open | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Trace slow kernel or user function calls.", | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
epilog=examples) | ||
parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid", | ||
help="trace this PID only") | ||
parser.add_argument("-m", "--min-ms", type=float, dest="min_ms", | ||
help="minimum duration to trace (ms)") | ||
parser.add_argument("-u", "--min-us", type=float, dest="min_us", | ||
help="minimum duration to trace (us)") | ||
parser.add_argument("-a", "--arguments", type=int, | ||
help="print this many entry arguments, as hex") | ||
parser.add_argument("-T", "--time", action="store_true", | ||
help="show HH:MM:SS timestamp") | ||
parser.add_argument("-t", "--timestamp", action="store_true", | ||
help="show timestamp in seconds at us resolution") | ||
parser.add_argument("-v", "--verbose", action="store_true", | ||
help="print the BPF program for debugging purposes") | ||
parser.add_argument(metavar="function", nargs="+", dest="functions", | ||
help="function(s) to trace") | ||
|
||
args = parser.parse_args() | ||
# fractions are allowed, but rounded to an integer nanosecond | ||
if args.min_ms: | ||
duration_ns = int(args.min_ms * 1000000) | ||
elif args.min_us: | ||
duration_ns = int(args.min_us * 1000) | ||
else: | ||
duration_ns = 1000000 # default to 1ms | ||
|
||
bpf_text = """ | ||
#include <uapi/linux/ptrace.h> | ||
#include <linux/sched.h> // for TASK_COMM_LEN | ||
struct entry_t { | ||
u64 id; | ||
u64 start_ns; | ||
#ifdef GRAB_ARGS | ||
u64 args[6]; | ||
#endif | ||
}; | ||
struct data_t { | ||
u64 id; | ||
u64 tgid_pid; | ||
u64 start_ns; | ||
u64 duration_ns; | ||
u64 retval; | ||
char comm[TASK_COMM_LEN]; | ||
#ifdef GRAB_ARGS | ||
u64 args[6]; | ||
#endif | ||
}; | ||
BPF_HASH(entryinfo, u64, struct entry_t); | ||
BPF_PERF_OUTPUT(events); | ||
static int trace_entry(struct pt_regs *ctx, int id) | ||
{ | ||
u64 tgid_pid = bpf_get_current_pid_tgid(); | ||
u32 tgid = tgid_pid >> 32; | ||
if (TGID_FILTER) | ||
return 0; | ||
u32 pid = tgid_pid; | ||
struct entry_t entry = {}; | ||
entry.start_ns = bpf_ktime_get_ns(); | ||
entry.id = id; | ||
#ifdef GRAB_ARGS | ||
entry.args[0] = PT_REGS_PARM1(ctx); | ||
entry.args[1] = PT_REGS_PARM2(ctx); | ||
entry.args[2] = PT_REGS_PARM3(ctx); | ||
entry.args[3] = PT_REGS_PARM4(ctx); | ||
entry.args[4] = PT_REGS_PARM5(ctx); | ||
entry.args[5] = PT_REGS_PARM6(ctx); | ||
#endif | ||
entryinfo.update(&tgid_pid, &entry); | ||
return 0; | ||
} | ||
int trace_return(struct pt_regs *ctx) | ||
{ | ||
struct entry_t *entryp; | ||
u64 tgid_pid = bpf_get_current_pid_tgid(); | ||
entryp = entryinfo.lookup(&tgid_pid); | ||
if (entryp == 0) { | ||
return 0; | ||
} | ||
u64 delta_ns = bpf_ktime_get_ns() - entryp->start_ns; | ||
entryinfo.delete(&tgid_pid); | ||
if (delta_ns < DURATION_NS) | ||
return 0; | ||
struct data_t data = {}; | ||
data.id = entryp->id; | ||
data.tgid_pid = tgid_pid; | ||
data.start_ns = entryp->start_ns; | ||
data.duration_ns = delta_ns; | ||
data.retval = PT_REGS_RC(ctx); | ||
#ifdef GRAB_ARGS | ||
bpf_probe_read(&data.args[0], sizeof(data.args), entryp->args); | ||
#endif | ||
bpf_get_current_comm(&data.comm, sizeof(data.comm)); | ||
events.perf_submit(ctx, &data, sizeof(data)); | ||
return 0; | ||
} | ||
""" | ||
|
||
bpf_text = bpf_text.replace('DURATION_NS', str(duration_ns)) | ||
if args.arguments: | ||
bpf_text = "#define GRAB_ARGS\n" + bpf_text | ||
if args.tgid: | ||
bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % tgid) | ||
else: | ||
bpf_text = bpf_text.replace('TGID_FILTER', '0') | ||
|
||
for i in range(len(args.functions)): | ||
bpf_text += """ | ||
int trace_%d(struct pt_regs *ctx) { | ||
return trace_entry(ctx, %d); | ||
} | ||
""" % (i, i) | ||
|
||
if args.verbose: | ||
print(bpf_text) | ||
|
||
b = BPF(text=bpf_text) | ||
|
||
for i, function in enumerate(args.functions): | ||
if ":" in function: | ||
library, func = function.split(":") | ||
b.attach_uprobe(name=library, sym=func, fn_name="trace_%d" % i) | ||
b.attach_uretprobe(name=library, sym=func, fn_name="trace_return") | ||
else: | ||
b.attach_kprobe(event=function, fn_name="trace_%d" % i) | ||
b.attach_kretprobe(event=function, fn_name="trace_return") | ||
|
||
TASK_COMM_LEN = 16 # linux/sched.h | ||
|
||
class Data(ct.Structure): | ||
_fields_ = [ | ||
("id", ct.c_ulonglong), | ||
("tgid_pid", ct.c_ulonglong), | ||
("start_ns", ct.c_ulonglong), | ||
("duration_ns", ct.c_ulonglong), | ||
("retval", ct.c_ulonglong), | ||
("comm", ct.c_char * TASK_COMM_LEN) | ||
] + ([("args", ct.c_ulonglong * 6)] if args.arguments else []) | ||
|
||
time_designator = "us" if args.min_us else "ms" | ||
time_value = args.min_us or args.min_ms or 1 | ||
time_multiplier = 1000 if args.min_us else 1000000 | ||
time_col = args.time or args.timestamp | ||
print("Tracing function calls slower than %g %s... Ctrl+C to quit." % | ||
(time_value, time_designator)) | ||
print((("%-10s " % "TIME" if time_col else "") + "%-14s %-6s %7s %16s %s") % | ||
("COMM", "PID", "LAT(%s)" % time_designator, "RVAL", | ||
"FUNC" + (" ARGS" if args.arguments else ""))) | ||
|
||
earliest_ts = 0 | ||
|
||
def time_str(event): | ||
if args.time: | ||
return "%-10s " % time.strftime("%H:%M:%S") | ||
if args.timestamp: | ||
global earliest_ts | ||
if earliest_ts == 0: | ||
earliest_ts = event.start_ns | ||
return "%-10.6f " % ((event.start_ns - earliest_ts) / 1000000000.0) | ||
return "" | ||
|
||
def args_str(event): | ||
if not args.arguments: | ||
return "" | ||
return str.join(" ", ["0x%x" % arg for arg in event.args[:args.arguments]]) | ||
|
||
def print_event(cpu, data, size): | ||
event = ct.cast(data, ct.POINTER(Data)).contents | ||
ts = float(event.duration_ns) / time_multiplier | ||
print((time_str(event) + "%-14.14s %-6s %7.2f %16x %s %s") % | ||
(event.comm.decode(), event.tgid_pid >> 32, | ||
ts, event.retval, args.functions[event.id], args_str(event))) | ||
|
||
b["events"].open_perf_buffer(print_event, page_cnt=64) | ||
while True: | ||
b.kprobe_poll() |
Oops, something went wrong.