updated opensnoop to use bpf_perf_output

devopsleague · Apr 29, 2016 · 3c446c7 · 3c446c7
1 parent 751ac54
commit 3c446c7
Show file tree

Hide file tree

Showing 3 changed files with 215 additions and 28 deletions.
diff --git a/man/man8/opensnoop.8 b/man/man8/opensnoop.8
@@ -12,6 +12,10 @@ on startup.
 This works by tracing the kernel sys_open() function using dynamic tracing, and
 will need updating to match any changes to this function.
 
+This makes use of a Linux 4.5 feature (bpf_perf_event_output());
+for kernels older than 4.5, see the version under tools/old,
+which uses an older mechanism.
+
 Since this uses BPF, only the root user can use this tool.
 .SH REQUIREMENTS
 CONFIG_BPF and bcc.

diff --git a/tools/old/opensnoop.py b/tools/old/opensnoop.py
@@ -0,0 +1,112 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# opensnoop Trace open() syscalls.
+# For Linux, uses BCC, eBPF. Embedded C.
+#
+# USAGE: opensnoop [-h] [-t] [-x] [-p PID]
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 17-Sep-2015 Brendan Gregg Created this.
+
+from __future__ import print_function
+from bcc import BPF
+import argparse
+
+# arguments
+examples = """examples:
+ ./opensnoop # trace all open() syscalls
+ ./opensnoop -t # include timestamps
+ ./opensnoop -x # only show failed opens
+ ./opensnoop -p 181 # only trace PID 181
+"""
+parser = argparse.ArgumentParser(
+ description="Trace open() syscalls",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("-t", "--timestamp", action="store_true",
+ help="include timestamp on output")
+parser.add_argument("-x", "--failed", action="store_true",
+ help="only show failed opens")
+parser.add_argument("-p", "--pid",
+ help="trace this PID only")
+args = parser.parse_args()
+debug = 0
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+
+BPF_HASH(args_filename, u32, const char *);
+
+int kprobe__sys_open(struct pt_regs *ctx, const char __user *filename)
+{
+ u32 pid = bpf_get_current_pid_tgid();
+
+ FILTER
+ args_filename.update(&pid, &filename);
+
+ return 0;
+};
+
+int kretprobe__sys_open(struct pt_regs *ctx)
+{
+ const char **filenamep;
+ int ret = ctx->ax;
+ u32 pid = bpf_get_current_pid_tgid();
+
+ filenamep = args_filename.lookup(&pid);
+ if (filenamep == 0) {
+ // missed entry
+ return 0;
+ }
+
+ bpf_trace_printk("%d %s\\n", ret, *filenamep);
+ args_filename.delete(&pid);
+
+ return 0;
+}
+"""
+if args.pid:
+ bpf_text = bpf_text.replace('FILTER',
+ 'if (pid != %s) { return 0; }' % args.pid)
+else:
+ bpf_text = bpf_text.replace('FILTER', '')
+if debug:
+ print(bpf_text)
+
+# initialize BPF
+b = BPF(text=bpf_text)
+
+# header
+if args.timestamp:
+ print("%-14s" % ("TIME(s)"), end="")
+print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH"))
+
+start_ts = 0
+
+# format output
+while 1:
+ (task, pid, cpu, flags, ts, msg) = b.trace_fields()
+ (ret_s, filename) = msg.split(" ", 1)
+
+ ret = int(ret_s)
+ if (args.failed and (ret >= 0)):
+ continue
+
+ # split return value into FD and errno columns
+ if ret >= 0:
+ fd_s = ret
+ err = 0
+ else:
+ fd_s = "-1"
+ err = - ret
+
+ # print columns
+ if args.timestamp:
+ if start_ts == 0:
+ start_ts = ts
+ print("%-14.9f" % (ts - start_ts), end="")
+ print("%-6d %-16s %4s %3s %s" % (pid, task, fd_s, err, filename))
diff --git a/tools/opensnoop.py b/tools/opensnoop.py
@@ -10,10 +10,12 @@
 # Licensed under the Apache License, Version 2.0 (the "License")
 #
 # 17-Sep-2015 Brendan Gregg Created this.
+# 29-Apr-2016 Allan McAleavy updated for BPF_PERF_OUTPUT
 
 from __future__ import print_function
 from bcc import BPF
 import argparse
+import ctypes as ct
 
 # arguments
 examples = """examples:
@@ -38,32 +40,67 @@
 # define BPF program
 bpf_text = """
 #include <uapi/linux/ptrace.h>
+#include <uapi/linux/limits.h>
+#include <linux/sched.h>
+
+struct val_t {
+ u32 pid;
+ u64 ts;
+ char comm[TASK_COMM_LEN];
+ const char *fname;
+};
+
+struct data_t {
+ u32 pid;
+ u64 ts;
+ u64 delta;
+ int ret;
+ char comm[TASK_COMM_LEN];
+ char fname[NAME_MAX];
+};
 
 BPF_HASH(args_filename, u32, const char *);
+BPF_HASH(infotmp, u32, struct val_t);
+BPF_PERF_OUTPUT(events);
 
-int kprobe__sys_open(struct pt_regs *ctx, const char __user *filename)
+int trace_entry(struct pt_regs *ctx, const char __user *filename)
 {
+ struct val_t val = {};
  u32 pid = bpf_get_current_pid_tgid();
 
  FILTER
- args_filename.update(&pid, &filename);
+ if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
+ val.pid = bpf_get_current_pid_tgid();
+ val.ts = bpf_ktime_get_ns();
+ val.fname = filename;
+ infotmp.update(&pid, &val);
+ }
 
  return 0;
 };
 
-int kretprobe__sys_open(struct pt_regs *ctx)
+int trace_return(struct pt_regs *ctx)
 {
- const char **filenamep;
- int ret = ctx->ax;
  u32 pid = bpf_get_current_pid_tgid();
+ struct val_t *valp;
+ struct data_t data = {};
+
+ u64 tsp = bpf_ktime_get_ns();
 
- filenamep = args_filename.lookup(&pid);
- if (filenamep == 0) {
+ valp = infotmp.lookup(&pid);
+ if (valp == 0) {
  // missed entry
  return 0;
  }
-
- bpf_trace_printk("%d %s\\n", ret, *filenamep);
+ bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
+ bpf_probe_read(&data.fname, sizeof(data.fname), (void *)valp->fname);
+ data.pid = valp->pid;
+ data.delta = tsp - valp->ts;
+ data.ts = tsp / 1000;
+ data.ret = ctx->ax;
+
+ events.perf_submit(ctx, &data, sizeof(data));
+ infotmp.delete(&pid);
  args_filename.delete(&pid);
 
  return 0;
@@ -79,34 +116,68 @@
 
 # initialize BPF
 b = BPF(text=bpf_text)
+b.attach_kprobe(event="sys_open", fn_name="trace_entry")
+b.attach_kretprobe(event="sys_open", fn_name="trace_return")
+
+TASK_COMM_LEN = 16 # linux/sched.h
+NAME_MAX = 255 # linux/limits.h
+
+class Data(ct.Structure):
+ _fields_ = [
+ ("pid", ct.c_ulonglong),
+ ("ts", ct.c_ulonglong),
+ ("delta", ct.c_ulonglong),
+ ("ret", ct.c_int),
+ ("comm", ct.c_char * TASK_COMM_LEN),
+ ("fname", ct.c_char * NAME_MAX)
+ ]
+
+start_ts = 0
+prev_ts = 0
+delta = 0
 
 # header
 if args.timestamp:
  print("%-14s" % ("TIME(s)"), end="")
 print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH"))
 
-start_ts = 0
-
-# format output
-while 1:
- (task, pid, cpu, flags, ts, msg) = b.trace_fields()
- (ret_s, filename) = msg.split(" ", 1)
-
- ret = int(ret_s)
- if (args.failed and (ret >= 0)):
- continue
+# process event
+def print_event(cpu, data, size):
+ event = ct.cast(data, ct.POINTER(Data)).contents
+ global start_ts
+ global prev_ts
+ global delta
+ global cont
 
  # split return value into FD and errno columns
- if ret >= 0:
- fd_s = ret
+ if event.ret >= 0:
+ fd_s = event.ret
  err = 0
  else:
- fd_s = "-1"
- err = - ret
+ fd_s = -1
+ err = - event.ret
+
+ if start_ts == 0:
+ prev_ts = start_ts
+
+ if start_ts == 1:
+ delta = float(delta) + (event.ts - prev_ts)
+
+ if (args.failed and (event.ret >= 0)):
+ start_ts = 1
+ prev_ts = event.ts
+ return
 
- # print columns
  if args.timestamp:
- if start_ts == 0:
- start_ts = ts
- print("%-14.9f" % (ts - start_ts), end="")
- print("%-6d %-16s %4s %3s %s" % (pid, task, fd_s, err, filename))
+ print("%-14.9f" % (delta / 1000000), end="")
+
+ print("%-6d %-16s %4d %3d %s" % (event.pid, event.comm,
+ fd_s, err, event.fname))
+
+ prev_ts = event.ts
+ start_ts = 1
+
+# loop with callback to print_event
+b["events"].open_perf_buffer(print_event)
+while 1:
+ b.kprobe_poll()