From af18bb3c199fce54e5a7709e495df990770d4f71 Mon Sep 17 00:00:00 2001
From: Brendan Gregg <brendan.d.gregg@gmail.com>
Date: Sun, 7 Feb 2016 15:28:50 -0800
Subject: [PATCH] execsnoop

---
 README.md                   |   1 +
 man/man8/execsnoop.8        |  88 ++++++++++++++++++++
 tools/execsnoop.py          | 158 ++++++++++++++++++++++++++++++++++++
 tools/execsnoop_example.txt |  82 +++++++++++++++++++
 4 files changed, 329 insertions(+)
 create mode 100644 man/man8/execsnoop.8
 create mode 100755 tools/execsnoop.py
 create mode 100644 tools/execsnoop_example.txt

diff --git a/README.md b/README.md
index 18e954ccb770..c1d40a3fc7de 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,7 @@ Tools:
 - tools/[biosnoop](tools/biosnoop.py): Trace block device I/O with PID and latency. [Examples](tools/biosnoop_example.txt).
 - tools/[bitesize](tools/bitesize.py): Show per process I/O size histogram. [Examples](tools/bitesize_example.txt).
 - tools/[cachestat](tools/cachestat.py): Trace page cache hit/miss ratio. [Examples](tools/cachestat_example.txt).
+- tools/[execsnoop](tools/execsnoop.py): Trace new processes via exec() syscalls.. [Examples](tools/execsnoop_example.txt).
 - tools/[fsslower](tools/fsslower.py): Trace slow file system synchronous reads and writes. [Examples](tools/fsslower_example.txt).
 - tools/[funccount](tools/funccount.py): Count kernel function calls. [Examples](tools/funccount_example.txt).
 - tools/[funclatency](tools/funclatency.py): Time kernel functions and show their latency distribution. [Examples](tools/funclatency_example.txt).
diff --git a/man/man8/execsnoop.8 b/man/man8/execsnoop.8
new file mode 100644
index 000000000000..9ce4a4de637c
--- /dev/null
+++ b/man/man8/execsnoop.8
@@ -0,0 +1,88 @@
+.TH execsnoop 8  "2016-02-07" "USER COMMANDS"
+.SH NAME
+execsnoop \- Trace new processes via exec() syscalls. Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B execsnoop [\-h] [\-t] [\-X] [\-n NAME]
+.SH DESCRIPTION
+execsnoop traces new processes, showing the filename executed, argument
+list, and return value (0 for success).
+
+It works by traces the execve() system call (commonly used exec() variant).
+This catches new processes that follow the fork->exec sequence, as well as
+processes that re-exec() themselves. Some applications fork() but do not
+exec(), eg, for worker processes, which won't be included in the execsnoop
+output.
+
+This works by tracing the kernel sys_execve() function using dynamic tracing,
+and will need updating to match any changes to this function.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-t
+Include a timestamp column.
+.TP
+\-X
+Exclude failed exec()s
+.TP
+\-n NAME
+Only print command lines matching this name (regex), matched anywhere
+.SH EXAMPLES
+.TP
+Trace all exec() syscalls:
+#
+.B execsnoop
+.TP
+Trace all exec() syscalls, and include timestamps:
+#
+.B execsnoop \-t
+.TP
+Only trace successful exec()s:
+#
+.B execsnoop \-X
+.TP
+Only trace exec()s where the filename or arguments contain "mount":
+#
+.B opensnoop \-n mount
+.SH FIELDS
+.TP
+TIME(s)
+Time of exec() return, in seconds.
+.TP
+PCOMM
+Parent process/command name.
+.TP
+PID
+Process ID
+.TP
+RET
+Return value of exec(). 0 == successs.
+.TP
+ARGS
+Filename for the exec(), followed be up to 19 arguments. An ellipsis "..." is
+shown if the argument list is known to be truncated.
+.SH OVERHEAD
+This traces the kernel execve function and prints output for each event. As the
+rate of this is generally expected to be low (< 1000/s), the overhead is also
+expected to be negligible. If you have an application that is calling a high
+rate of exec()s, then test and understand overhead before use.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+opensnoop(1)
diff --git a/tools/execsnoop.py b/tools/execsnoop.py
new file mode 100755
index 000000000000..d819d5410a63
--- /dev/null
+++ b/tools/execsnoop.py
@@ -0,0 +1,158 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# execsnoop Trace new processes via exec() syscalls.
+#           For Linux, uses BCC, eBPF. Embedded C.
+#
+# USAGE: execsnoop [-h] [-t] [-X] [-n NAME]
+#
+# This currently will print up to a maximum of 19 arguments, plus the process
+# name, so 20 fields in total (MAXARG).
+#
+# This won't catch all new processes: an application may fork() but not exec().
+#
+# Copyright 2016 Netflix, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 07-Feb-2016   Brendan Gregg   Created this.
+
+from __future__ import print_function
+from bcc import BPF
+import argparse
+import re
+
+# arguments
+examples = """examples:
+    ./execsnoop           # trace all exec() syscalls
+    ./execsnoop -X        # only show successful exec()s
+    ./execsnoop -t        # include timestamps
+    ./execsnoop -n main   # only print command lines containing "main"
+"""
+parser = argparse.ArgumentParser(
+    description="Trace exec() syscalls",
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    epilog=examples)
+parser.add_argument("-t", "--timestamp", action="store_true",
+    help="include timestamp on output")
+parser.add_argument("-X", "--excludefails", action="store_true",
+    help="exclude failed exec()s")
+parser.add_argument("-n", "--name",
+    help="only print commands matching this name (regex), any arg")
+args = parser.parse_args()
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+
+#define MAXARG   20
+#define ARGSIZE  64
+
+static int print_arg(void *ptr) {
+    // Fetch an argument, and print using bpf_trace_printk(). This is a work
+    // around until we have a binary trace interface for passing event data to
+    // bcc. Since exec()s should be low frequency, the additional overhead in
+    // this case should not be a problem.
+    const char *argp = NULL;
+    char buf[ARGSIZE] = {};
+
+    bpf_probe_read(&argp, sizeof(argp), ptr);
+    if (argp == NULL) return 0;
+
+    bpf_probe_read(&buf, sizeof(buf), (void *)(argp));
+    bpf_trace_printk("ARG %s\\n", buf);
+
+    return 1;
+}
+
+int kprobe__sys_execve(struct pt_regs *ctx, struct filename *filename,
+    const char __user *const __user *__argv,
+    const char __user *const __user *__envp)
+{
+    char fname[ARGSIZE] = {};
+    bpf_probe_read(&fname, sizeof(fname), (void *)(filename));
+    bpf_trace_printk("ARG %s\\n", fname);
+
+    int i = 1;  // skip first arg, as we printed fname
+
+    // unrolled loop to walk argv[] (MAXARG)
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++; // X
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++;
+    if (print_arg((void *)&__argv[i]) == 0) goto out; i++; // XX
+    bpf_trace_printk("ARG ...\\n");    // truncated
+
+out:
+    return 0;
+}
+
+int kretprobe__sys_execve(struct pt_regs *ctx)
+{
+    bpf_trace_printk("RET %d\\n", ctx->ax);
+    return 0;
+}
+"""
+
+# initialize BPF
+b = BPF(text=bpf_text)
+
+# header
+if args.timestamp:
+    print("%-8s" % ("TIME(s)"), end="")
+print("%-16s %-6s %3s %s" % ("PCOMM", "PID", "RET", "ARGS"))
+
+start_ts = 0
+cmd = {}
+pcomm = {}
+
+# format output
+while 1:
+    (task, pid, cpu, flags, ts, msg) = b.trace_fields()
+    (type, arg) = msg.split(" ", 1)
+
+    if start_ts == 0:
+        start_ts = ts
+
+    if type == "RET":
+        skip = 0
+        if args.name:
+            if not re.search(args.name, cmd[pid]):
+                skip = 1
+        if args.excludefails and int(arg) < 0:
+            skip = 1
+        if skip:
+            del cmd[pid]
+            del pcomm[pid]
+            continue
+
+        # output
+        if args.timestamp:
+            print("%-8.3f" % (ts - start_ts), end="")
+        print("%-16s %-6s %3s %s" % (pcomm[pid], pid, arg, cmd[pid]))
+        del cmd[pid]
+        del pcomm[pid]
+    else:
+        # build command line string
+        if pid in cmd:
+            cmd[pid] = cmd[pid] + " " + arg
+        else:
+            cmd[pid] = arg
+        if pid not in pcomm:
+            pcomm[pid] = task
diff --git a/tools/execsnoop_example.txt b/tools/execsnoop_example.txt
new file mode 100644
index 000000000000..b689f06225a1
--- /dev/null
+++ b/tools/execsnoop_example.txt
@@ -0,0 +1,82 @@
+Demonstrations of execsnoop, the Linux eBPF/bcc version.
+
+
+execsnoop traces new processes. For example:
+
+# ./execsnoop 
+PCOMM            PID    RET ARGS
+supervise        9660     0 ./run
+supervise        9661     0 ./run
+mkdir            9662     0 /bin/mkdir -p ./main
+run              9663     0 ./run
+chown            9664     0 /bin/chown nobody:nobody ./main
+run              9665     0 /bin/mkdir -p ./main
+supervise        9667     0 ./run
+run              9660    -2 /usr/local/bin/setuidgid nobody /command/multilog t ./main
+chown            9668     0 /bin/chown nobody:nobody ./main
+run              9666     0 /bin/chmod 0777 main
+run              9663    -2 /usr/local/bin/setuidgid nobody /command/multilog t ./main
+run              9669     0 /bin/mkdir -p ./main
+run              9661    -2 /usr/local/bin/setuidgid nobody /command/multilog t ./main
+supervise        9670     0 ./run
+[...]
+
+The output shows the parent process/command name (PCOMM), the PID, the return
+value of the exec() (RET), and the filename with arguments (ARGS). The example
+above shows various regular system daemon activity, including some failures
+(trying to execute a /usr/local/bin/setuidgid, which I just noticed doesn't
+exist).
+
+It works by traces the execve() system call (commonly used exec() variant), and
+shows details of the arguments and return value. This catches new processes
+that follow the fork->exec sequence, as well as processes that re-exec()
+themselves. Some applications fork() but do not exec(), eg, for worker
+processes, which won't be included in the execsnoop output.
+
+
+The -X option can be used to only show successful exec()s. For example, tracing
+a "man ls":
+
+# ./execsnoop -X
+PCOMM            PID    RET ARGS
+bash             15887    0 /usr/bin/man ls
+preconv          15894    0 /usr/bin/preconv -e UTF-8
+man              15896    0 /usr/bin/tbl
+man              15897    0 /usr/bin/nroff -mandoc -rLL=169n -rLT=169n -Tutf8
+man              15898    0 /usr/bin/pager -s
+nroff            15900    0 /usr/bin/locale charmap
+nroff            15901    0 /usr/bin/groff -mtty-char -Tutf8 -mandoc -rLL=169n -rLT=169n
+groff            15902    0 /usr/bin/troff -mtty-char -mandoc -rLL=169n -rLT=169n -Tutf8
+groff            15903    0 /usr/bin/grotty
+
+This shows the various commands used to process the "man ls" command.
+
+
+A -t option can be used to include a timestamp column, and a -n option to match
+on a name or substring from the full command line (filename + args). Regular
+expressions are allowed. For example, matching commands containing "mount":
+
+# ./execsnoop -tn mount
+TIME(s) PCOMM            PID    RET ARGS
+2.849   bash             18049    0 /bin/mount -p
+
+
+USAGE message:
+
+# ./execsnoop -h
+usage: execsnoop [-h] [-t] [-X] [-n NAME]
+
+Trace exec() syscalls
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -t, --timestamp       include timestamp on output
+  -X, --excludefails    exclude failed exec()s
+  -n NAME, --name NAME  only print commands matching this name (regex), any
+                        arg
+
+examples:
+    ./execsnoop           # trace all exec() syscalls
+    ./execsnoop -X        # only show successful exec()s
+    ./execsnoop -t        # include timestamps
+    ./execsnoop -n main   # only print command lines containing "main"