forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
offcputime: Resolve stacks using a StackTrace table
The manual walking for kernel stacks in the eBPF code has been replaced with a `BPF_STACK_TRACE` table; the stack ID is now stored as an integer in the main key. Thanks to the `StackTrace.walk` iterator, the printing code can also be significantly simplified for both folded and full stack modes.
- Loading branch information
Showing
3 changed files
with
224 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
#!/usr/bin/python | ||
# | ||
# offcputime Summarize off-CPU time by kernel stack trace | ||
# For Linux, uses BCC, eBPF. | ||
# | ||
# USAGE: offcputime [-h] [-u] [-p PID] [-v] [-f] [duration] | ||
# | ||
# The current implementation uses an unrolled loop for x86_64, and was written | ||
# as a proof of concept. This implementation should be replaced in the future | ||
# with an appropriate bpf_ call, when available. | ||
# | ||
# Currently limited to a stack trace depth of 21 (maxdepth + 1). | ||
# | ||
# Copyright 2016 Netflix, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License") | ||
# | ||
# 13-Jan-2016 Brendan Gregg Created this. | ||
|
||
from __future__ import print_function | ||
from bcc import BPF | ||
from time import sleep, strftime | ||
import argparse | ||
import signal | ||
|
||
# arguments | ||
examples = """examples: | ||
./offcputime # trace off-CPU stack time until Ctrl-C | ||
./offcputime 5 # trace for 5 seconds only | ||
./offcputime -f 5 # 5 seconds, and output in folded format | ||
./offcputime -u # don't include kernel threads (user only) | ||
./offcputime -p 185 # trace fo PID 185 only | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Summarize off-CPU time by kernel stack trace", | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
epilog=examples) | ||
parser.add_argument("-u", "--useronly", action="store_true", | ||
help="user threads only (no kernel threads)") | ||
parser.add_argument("-p", "--pid", | ||
help="trace this PID only") | ||
parser.add_argument("-v", "--verbose", action="store_true", | ||
help="show raw addresses") | ||
parser.add_argument("-f", "--folded", action="store_true", | ||
help="output folded format") | ||
parser.add_argument("duration", nargs="?", default=99999999, | ||
help="duration of trace, in seconds") | ||
args = parser.parse_args() | ||
folded = args.folded | ||
duration = int(args.duration) | ||
debug = 0 | ||
maxdepth = 20 # and MAXDEPTH | ||
if args.pid and args.useronly: | ||
print("ERROR: use either -p or -u.") | ||
exit() | ||
|
||
# signal handler | ||
def signal_ignore(signal, frame): | ||
print() | ||
|
||
# define BPF program | ||
bpf_text = """ | ||
#include <uapi/linux/ptrace.h> | ||
#include <linux/sched.h> | ||
#define MAXDEPTH 20 | ||
#define MINBLOCK_US 1 | ||
struct key_t { | ||
char name[TASK_COMM_LEN]; | ||
// Skip saving the ip | ||
u64 ret[MAXDEPTH]; | ||
}; | ||
BPF_HASH(counts, struct key_t); | ||
BPF_HASH(start, u32); | ||
static u64 get_frame(u64 *bp) { | ||
if (*bp) { | ||
// The following stack walker is x86_64 specific | ||
u64 ret = 0; | ||
if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8))) | ||
return 0; | ||
if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp)) | ||
*bp = 0; | ||
if (ret < __START_KERNEL_map) | ||
return 0; | ||
return ret; | ||
} | ||
return 0; | ||
} | ||
int oncpu(struct pt_regs *ctx, struct task_struct *prev) { | ||
u32 pid; | ||
u64 ts, *tsp; | ||
// record previous thread sleep time | ||
if (FILTER) { | ||
pid = prev->pid; | ||
ts = bpf_ktime_get_ns(); | ||
start.update(&pid, &ts); | ||
} | ||
// calculate current thread's delta time | ||
pid = bpf_get_current_pid_tgid(); | ||
tsp = start.lookup(&pid); | ||
if (tsp == 0) | ||
return 0; // missed start or filtered | ||
u64 delta = bpf_ktime_get_ns() - *tsp; | ||
start.delete(&pid); | ||
delta = delta / 1000; | ||
if (delta < MINBLOCK_US) | ||
return 0; | ||
// create map key | ||
u64 zero = 0, *val, bp = 0; | ||
int depth = 0; | ||
struct key_t key = {}; | ||
bpf_get_current_comm(&key.name, sizeof(key.name)); | ||
bp = ctx->bp; | ||
// unrolled loop (MAXDEPTH): | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
if (!(key.ret[depth++] = get_frame(&bp))) goto out; | ||
out: | ||
val = counts.lookup_or_init(&key, &zero); | ||
(*val) += delta; | ||
return 0; | ||
} | ||
""" | ||
if args.pid: | ||
filter = 'pid == %s' % args.pid | ||
elif args.useronly: | ||
filter = '!(prev->flags & PF_KTHREAD)' | ||
else: | ||
filter = '1' | ||
bpf_text = bpf_text.replace('FILTER', filter) | ||
if debug: | ||
print(bpf_text) | ||
|
||
# initialize BPF | ||
b = BPF(text=bpf_text) | ||
b.attach_kprobe(event="finish_task_switch", fn_name="oncpu") | ||
matched = b.num_open_kprobes() | ||
if matched == 0: | ||
print("0 functions traced. Exiting.") | ||
exit() | ||
|
||
# header | ||
if not folded: | ||
print("Tracing off-CPU time (us) by kernel stack", end="") | ||
if duration < 99999999: | ||
print(" for %d secs." % duration) | ||
else: | ||
print("... Hit Ctrl-C to end.") | ||
|
||
# output | ||
while (1): | ||
try: | ||
sleep(duration) | ||
except KeyboardInterrupt: | ||
# as cleanup can take many seconds, trap Ctrl-C: | ||
signal.signal(signal.SIGINT, signal_ignore) | ||
|
||
if not folded: | ||
print() | ||
counts = b.get_table("counts") | ||
for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): | ||
if folded: | ||
# print folded stack output | ||
line = k.name.decode() + ";" | ||
for i in reversed(range(0, maxdepth)): | ||
if k.ret[i] == 0: | ||
continue | ||
line = line + b.ksym(k.ret[i]) | ||
if i != 0: | ||
line = line + ";" | ||
print("%s %d" % (line, v.value)) | ||
else: | ||
# print default multi-line stack output | ||
for i in range(0, maxdepth): | ||
if k.ret[i] == 0: | ||
break | ||
print(" %-16x %s" % (k.ret[i], | ||
b.ksym(k.ret[i]))) | ||
print(" %-16s %s" % ("-", k.name)) | ||
print(" %d\n" % v.value) | ||
counts.clear() | ||
|
||
if not folded: | ||
print("Detaching...") | ||
exit() |