From ad341c94c7e679ea0915f0f81884f503ce9f9be3 Mon Sep 17 00:00:00 2001 From: Brendan Gregg Date: Tue, 9 Feb 2016 00:31:24 -0800 Subject: [PATCH] statsnoop --- README.md | 1 + man/man8/statsnoop.8 | 86 ++++++++++++++++++++++++++ tools/statsnoop.py | 118 ++++++++++++++++++++++++++++++++++++ tools/statsnoop_example.txt | 73 ++++++++++++++++++++++ 4 files changed, 278 insertions(+) create mode 100644 man/man8/statsnoop.8 create mode 100755 tools/statsnoop.py create mode 100644 tools/statsnoop_example.txt diff --git a/README.md b/README.md index 8aa6f01d2e86..3bf859902481 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ Tools: - tools/[softirqs](tools/softirqs.py): Measure soft IRQ (soft interrupt) event time. [Examples](tools/softirqs_example.txt). - tools/[stackcount](tools/stackcount.py): Count kernel function calls and their stack traces. [Examples](tools/stackcount_example.txt). - tools/[stacksnoop](tools/stacksnoop.py): Trace a kernel function and print all kernel stack traces. [Examples](tools/stacksnoop_example.txt). +- tools/[statsnoop](tools/statsnoop.py): Trace stat() syscalls. [Examples](tools/statsnoop_example.txt). - tools/[syncsnoop](tools/syncsnoop.py): Trace sync() syscall. [Examples](tools/syncsnoop_example.txt). - tools/[tcpaccept](tools/tcpaccept.py): Trace TCP passive connections (accept()). [Examples](tools/tcpaccept_example.txt). - tools/[tcpconnect](tools/tcpconnect.py): Trace TCP active connections (connect()). [Examples](tools/tcpconnect_example.txt). diff --git a/man/man8/statsnoop.8 b/man/man8/statsnoop.8 new file mode 100644 index 000000000000..335895c94778 --- /dev/null +++ b/man/man8/statsnoop.8 @@ -0,0 +1,86 @@ +.TH statsnoop 8 "2016-02-08" "USER COMMANDS" +.SH NAME +statsnoop \- Trace stat() syscalls. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B statsnoop [\-h] [\-t] [\-x] [\-p PID] +.SH DESCRIPTION +statsnoop traces the different stat() syscalls, showing which processes are +attempting to read information about which files. This can be useful for +determining the location of config and log files, or for troubleshooting +applications that are failing, especially on startup. + +This works by tracing various kernel sys_stat() functions using dynamic +tracing, and will need updating to match any changes to these functions. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH OPTIONS +.TP +\-h +Print usage message. +.TP +\-t +Include a timestamp column. +.TP +\-x +Only print failed stats. +.TP +\-p PID +Trace this process ID only (filtered in-kernel). +.SH EXAMPLES +.TP +Trace all stat() syscalls: +# +.B statsnoop +.TP +Trace all stat() syscalls, and include timestamps: +# +.B statsnoop \-t +.TP +Trace only stat() syscalls that failed: +# +.B statsnoop \-x +.TP +Trace PID 181 only: +# +.B statsnoop \-p 181 +.SH FIELDS +.TP +TIME(s) +Time of the call, in seconds. +.TP +PID +Process ID +.TP +COMM +Process name +.TP +FD +File descriptor (if success), or -1 (if failed) +.TP +ERR +Error number (see the system's errno.h) +.TP +PATH +Open path +.SH OVERHEAD +This traces the kernel stat function and prints output for each event. As the +rate of this is generally expected to be low (< 1000/s), the overhead is also +expected to be negligible. If you have an application that is calling a high +rate of stat()s, then test and understand overhead before use. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Brendan Gregg +.SH SEE ALSO +opensnoop(1) diff --git a/tools/statsnoop.py b/tools/statsnoop.py new file mode 100755 index 000000000000..fd6b160764d1 --- /dev/null +++ b/tools/statsnoop.py @@ -0,0 +1,118 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# statsnoop Trace stat() syscalls. +# For Linux, uses BCC, eBPF. Embedded C. +# +# USAGE: statsnoop [-h] [-t] [-x] [-p PID] +# +# Copyright 2016 Netflix, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 08-Feb-2016 Brendan Gregg Created this. + +from __future__ import print_function +from bcc import BPF +import argparse + +# arguments +examples = """examples: + ./statsnoop # trace all stat() syscalls + ./statsnoop -t # include timestamps + ./statsnoop -x # only show failed stats + ./statsnoop -p 181 # only trace PID 181 +""" +parser = argparse.ArgumentParser( + description="Trace stat() syscalls", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-t", "--timestamp", action="store_true", + help="include timestamp on output") +parser.add_argument("-x", "--failed", action="store_true", + help="only show failed stats") +parser.add_argument("-p", "--pid", + help="trace this PID only") +args = parser.parse_args() +debug = 0 + +# define BPF program +bpf_text = """ +#include + +BPF_HASH(args_filename, u32, const char *); + +int trace_entry(struct pt_regs *ctx, const char __user *filename) +{ + u32 pid = bpf_get_current_pid_tgid(); + + FILTER + args_filename.update(&pid, &filename); + + return 0; +}; + +int trace_return(struct pt_regs *ctx) +{ + const char **filenamep; + int ret = ctx->ax; + u32 pid = bpf_get_current_pid_tgid(); + + filenamep = args_filename.lookup(&pid); + if (filenamep == 0) { + // missed entry + return 0; + } + + bpf_trace_printk("%s %d\\n", *filenamep, ret); + args_filename.delete(&pid); + + return 0; +} +""" +if args.pid: + bpf_text = bpf_text.replace('FILTER', + 'if (pid != %s) { return 0; }' % args.pid) +else: + bpf_text = bpf_text.replace('FILTER', '') +if debug: + print(bpf_text) + +# initialize BPF +b = BPF(text=bpf_text) +b.attach_kprobe(event="sys_stat", fn_name="trace_entry") +b.attach_kprobe(event="sys_statfs", fn_name="trace_entry") +b.attach_kprobe(event="sys_newstat", fn_name="trace_entry") +b.attach_kretprobe(event="sys_stat", fn_name="trace_return") +b.attach_kretprobe(event="sys_statfs", fn_name="trace_return") +b.attach_kretprobe(event="sys_newstat", fn_name="trace_return") + +# header +if args.timestamp: + print("%-14s" % ("TIME(s)"), end="") +print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH")) + +start_ts = 0 + +# format output +while 1: + (task, pid, cpu, flags, ts, msg) = b.trace_fields() + (filename, ret_s) = msg.split(" ") + + ret = int(ret_s) + if (args.failed and (ret >= 0)): + continue + + # split return value into FD and errno columns + if ret >= 0: + fd_s = ret + err = 0 + else: + fd_s = "-1" + err = - ret + + # print columns + if args.timestamp: + if start_ts == 0: + start_ts = ts + print("%-14.9f" % (ts - start_ts), end="") + print("%-6d %-16s %4s %3s %s" % (pid, task, fd_s, err, filename)) diff --git a/tools/statsnoop_example.txt b/tools/statsnoop_example.txt new file mode 100644 index 000000000000..45f0e7e6131a --- /dev/null +++ b/tools/statsnoop_example.txt @@ -0,0 +1,73 @@ +Demonstrations of statsnoop, the Linux eBPF/bcc version. + + +statsnoop traces the different stat() syscalls system-wide, and prints various +details. Example output: + +# ./statsnoop +PID COMM FD ERR PATH +31126 bash 0 0 . +31126 bash -1 2 /usr/local/sbin/iconfig +31126 bash -1 2 /usr/local/bin/iconfig +31126 bash -1 2 /usr/sbin/iconfig +31126 bash -1 2 /usr/bin/iconfig +31126 bash -1 2 /sbin/iconfig +31126 bash -1 2 /bin/iconfig +31126 bash -1 2 /usr/games/iconfig +31126 bash -1 2 /usr/local/games/iconfig +31126 bash -1 2 /apps/python/bin/iconfig +31126 bash -1 2 /mnt/src/llvm/build/bin/iconfig +8902 command-not-fou -1 2 /usr/bin/Modules/Setup +8902 command-not-fou -1 2 /usr/bin/lib/python3.4/os.py +8902 command-not-fou -1 2 /usr/bin/lib/python3.4/os.pyc +8902 command-not-fou 0 0 /usr/lib/python3.4/os.py +8902 command-not-fou -1 2 /usr/bin/pybuilddir.txt +8902 command-not-fou -1 2 /usr/bin/lib/python3.4/lib-dynload +8902 command-not-fou 0 0 /usr/lib/python3.4/lib-dynload +8902 command-not-fou 0 0 /apps/python/lib/python2.7/site-packages +8902 command-not-fou 0 0 /apps/python/lib/python2.7/site-packages +8902 command-not-fou 0 0 /apps/python/lib/python2.7/site-packages +8902 command-not-fou 0 0 /usr/lib/python3.4/ +8902 command-not-fou 0 0 /usr/lib/python3.4/ +[...] + +This output has caught me mistyping a command in another shell, "iconfig" +instead of "ifconfig". The first several lines show the bash shell searching +the $PATH, and failing to find it (ERR == 2 is file not found). Then, a +"command-not-found" program executes (the name is truncated to 16 characters +in the COMM field), which begins the process of searching for and suggesting +a package. ie, this: + +# iconfig +No command 'iconfig' found, did you mean: + Command 'vconfig' from package 'vlan' (main) + Command 'fconfig' from package 'redboot-tools' (universe) + Command 'mconfig' from package 'mono-devel' (main) + Command 'iwconfig' from package 'wireless-tools' (main) + Command 'zconfig' from package 'python-zconfig' (universe) + Command 'ifconfig' from package 'net-tools' (main) +iconfig: command not found + +statsnoop can be used for general debugging, to see what file information has +been requested, and whether those files exist. It can be used as a companion +to opensnoop, which shows what files were actually opened. + + +USAGE message: + +# ./statsnoop -h +usage: statsnoop [-h] [-t] [-x] [-p PID] + +Trace stat() syscalls + +optional arguments: + -h, --help show this help message and exit + -t, --timestamp include timestamp on output + -x, --failed only show failed stats + -p PID, --pid PID trace this PID only + +examples: + ./statsnoop # trace all stat() syscalls + ./statsnoop -t # include timestamps + ./statsnoop -x # only show failed stats + ./statsnoop -p 181 # only trace PID 181