diff --git a/README.md b/README.md index 50d6db0a2fc2..54733ebb58d1 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,8 @@ pair of .c and .py files, and some are directories of files. - tools/[runqlat](tools/runqlat.py): Run queue (scheduler) latency as a histogram. [Examples](tools/runqlat_example.txt). - tools/[runqlen](tools/runqlen.py): Run queue length as a histogram. [Examples](tools/runqlen_example.txt). - tools/[runqslower](tools/runqslower.py): Trace long process scheduling delays. [Examples](tools/runqslower_example.txt). +- tools/[shmsnoop](tools/shmsnoop.py): Trace System V shared memory syscalls. [Examples](tools/shmsnoop_example.txt). +- tools/[sofdsnoop](tools/sofdsnoop.py): Trace FDs passed through unix sockets. [Examples](tools/sofdsnoop_example.txt). - tools/[slabratetop](tools/slabratetop.py): Kernel SLAB/SLUB memory cache allocation rate top. [Examples](tools/slabratetop_example.txt). - tools/[softirqs](tools/softirqs.py): Measure soft IRQ (soft interrupt) event time. [Examples](tools/softirqs_example.txt). - tools/[solisten](tools/solisten.py): Trace TCP socket listen. [Examples](tools/solisten_example.txt). diff --git a/man/man8/shmsnoop.8 b/man/man8/shmsnoop.8 new file mode 100644 index 000000000000..390974f6f7ea --- /dev/null +++ b/man/man8/shmsnoop.8 @@ -0,0 +1,74 @@ +.TH shmsnoop 8 "2018-09-24" "USER COMMANDS" +.SH NAME +shmsnoop \- Trace System V shared memory syscalls. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B shmsnoop [\-h] [\-T] [\-p] [\-t] [\-d DURATION] [\-n NAME] +.SH DESCRIPTION +shmsnoop traces System V shared memory syscalls: shmget, shmat, shmdt, shmctl + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH OPTIONS +.TP +\-h +Print usage message. +.TP +\-T +Include a timestamp column. +.TP +\-p PID +Trace this process ID only (filtered in-kernel). +.TP +\-t TID +Trace this thread ID only (filtered in-kernel). +.TP +\-d DURATION +Total duration of trace in seconds. +.TP +\-n NAME +Only print command lines matching this command name (regex) +.SH EXAMPLES +.TP +Trace all shm* syscalls: +# +.B shmsnoop +.TP +Trace all shm* syscalls, and include timestamps: +# +.B shmsnoop \-T +.TP +Only trace shm* syscalls where the process contains "server": +# +.B shmsnoop \-n server +.SH FIELDS +.TP +TIME(s) +Time of shm syscall return, in seconds. +.TP +PID +Process ID +.TP +COMM +Parent process/command name. +.TP +RET +Return value of shm syscall. +.TP +ARGS +"arg: value" couples that represent given syscall arguments as described in their manpage +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Jiri Olsa +.SH SEE ALSO +opensnoop(1) diff --git a/man/man8/spfdsnoop.8 b/man/man8/spfdsnoop.8 new file mode 100644 index 000000000000..ffad57c591c1 --- /dev/null +++ b/man/man8/spfdsnoop.8 @@ -0,0 +1,85 @@ +.TH sofdsnoop 8 "2018-11-08" "USER COMMANDS" +.SH NAME +sofdsnoop \- Trace FDs passed through unix sockets. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B sofdsnoop [-h] [-T] [-p PID] [-t TID] [-n NAME] [-d DURATION] +.SH DESCRIPTION +sofdsnoop traces FDs passed through unix sockets + +Every file descriptor that is passed via unix sockets os displayed +on separate line together with process info (TID/COMM columns), +ACTION details (SEND/RECV), file descriptor number (FD) and its +translation to file if available (NAME). + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH OPTIONS +.TP +\-h +Print usage message. +.TP +\-T +Include a timestamp column. +.TP +\-p PID +Trace this process ID only (filtered in-kernel). +.TP +\-t TID +Trace this thread ID only (filtered in-kernel). +.TP +\-d DURATION +Total duration of trace in seconds. +.TP +\-n NAME +Only print command lines matching this command name (regex) +.SH EXAMPLES +.TP +Trace all sockets: +# +.B sofdsnoop +.TP +Trace all sockets, and include timestamps: +# +.B sofdsnoop \-T +.TP +Only trace sockets where the process contains "server": +# +.B sofdsnoop \-n server +.SH FIELDS +.TP +TIME(s) +Time of SEDN/RECV actions, in seconds. +.TP +ACTION +Operation on the fd SEND/RECV. +.TP +TID +Process TID +.TP +COMM +Parent process/command name. +.TP +SOCKET +The socket carrier. +.TP +FD +file descriptor number +.TP +NAME +file name for SEND lines +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Jiri Olsa +.SH SEE ALSO +opensnoop(1) diff --git a/snapcraft/snapcraft.yaml b/snapcraft/snapcraft.yaml index e4acdb27a102..e408686f2553 100644 --- a/snapcraft/snapcraft.yaml +++ b/snapcraft/snapcraft.yaml @@ -189,6 +189,12 @@ apps: perlstat: command: wrapper perlstat aliases: [perlstat] + shmsnoop: + command: wrapper shmsnoop + aliases: [shmsnoop] + sofdsnoop: + command: wrapper sofdsnoop + aliases: [sofdsnoop] phpcalls: command: wrapper phpcalls aliases: [phpcalls] diff --git a/tests/python/test_tools_smoke.py b/tests/python/test_tools_smoke.py index ab80ecf250ec..f6fca6376740 100755 --- a/tests/python/test_tools_smoke.py +++ b/tests/python/test_tools_smoke.py @@ -262,6 +262,14 @@ def test_runqlat(self): def test_runqlen(self): self.run_with_duration("runqlen.py 1 1") + @skipUnless(kernel_version_ge(4,8), "requires kernel >= 4.8") + def test_shmsnoop(self): + self.run_with_int("shmsnoop.py") + + @skipUnless(kernel_version_ge(4,8), "requires kernel >= 4.8") + def test_sofdsnoop(self): + self.run_with_int("sofdsnoop.py") + def test_slabratetop(self): self.run_with_duration("slabratetop.py 1 1") diff --git a/tools/shmsnoop.py b/tools/shmsnoop.py new file mode 100755 index 000000000000..ef990645d300 --- /dev/null +++ b/tools/shmsnoop.py @@ -0,0 +1,319 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# shmsnoop Trace shm*() syscalls. +# For Linux, uses BCC, eBPF. Embedded C. +# +# USAGE: shmsnoop [-h] [-T] [-x] [-p PID] [-d DURATION] [-t TID] [-n NAME] +# +# Copyright (c) 2018 Jiri Olsa. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 08-Oct-2018 Jiri Olsa Created this. + +from __future__ import print_function +from bcc import ArgString, BPF +import argparse +import ctypes as ct +from datetime import datetime, timedelta + +# arguments +examples = """examples: + ./shmsnoop # trace all shm*() syscalls + ./shmsnoop -T # include timestamps + ./shmsnoop -p 181 # only trace PID 181 + ./shmsnoop -t 123 # only trace TID 123 + ./shmsnoop -d 10 # trace for 10 seconds only + ./shmsnoop -n main # only print process names containing "main" +""" +parser = argparse.ArgumentParser( + description="Trace shm*() syscalls", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-T", "--timestamp", action="store_true", + help="include timestamp on output") +parser.add_argument("-p", "--pid", + help="trace this PID only") +parser.add_argument("-t", "--tid", + help="trace this TID only") +parser.add_argument("-d", "--duration", + help="total duration of trace in seconds") +parser.add_argument("-n", "--name", + type=ArgString, + help="only print process names containing this name") +parser.add_argument("--ebpf", action="store_true", + help=argparse.SUPPRESS) +args = parser.parse_args() +debug = 0 +if args.duration: + args.duration = timedelta(seconds=int(args.duration)) + +# define BPF program +bpf_text = """ +#include +#include +#include + +struct val_t { + u64 id; + u64 ts; + int sys; + unsigned long key; + unsigned long size; + unsigned long shmflg; + unsigned long shmid; + unsigned long cmd; + unsigned long buf; + unsigned long shmaddr; + unsigned long ret; + char comm[TASK_COMM_LEN]; +}; + +BPF_HASH(infotmp, u64, struct val_t); +BPF_PERF_OUTPUT(events); + +enum { + SYS_SHMGET, + SYS_SHMAT, + SYS_SHMDT, + SYS_SHMCTL, +}; + +static int enter(struct val_t *val) +{ + u64 id = bpf_get_current_pid_tgid(); + u32 pid = id >> 32; // PID is higher part + u32 tid = id; // Cast and get the lower part + + FILTER + + val->id = id; + infotmp.update(&id, val); + return 0; +} + +int trace_return(struct pt_regs *ctx) +{ + u64 id = bpf_get_current_pid_tgid(); + u64 tsp = bpf_ktime_get_ns(); + struct val_t *val; + + val = infotmp.lookup(&id); + if (val == 0) + return 0; + + if (bpf_get_current_comm(&val->comm, sizeof(val->comm)) != 0) + goto out; + + val->ts = tsp / 1000; + val->ret = PT_REGS_RC(ctx); + events.perf_submit(ctx, val, sizeof(*val)); + +out: + infotmp.delete(&id); + return 0; +} + +int syscall__shmget(struct pt_regs *ctx, u64 key, u64 size, u64 shmflg) +{ + struct val_t val = { + .sys = SYS_SHMGET, + }; + + val.key = key; + val.size = size; + val.shmflg = shmflg; + return enter(&val); +}; + +int syscall__shmat(struct pt_regs *ctx, u64 shmid, u64 shmaddr, u64 shmflg) +{ + struct val_t val = { + .sys = SYS_SHMAT, + }; + + val.shmid = shmid; + val.shmaddr = shmaddr; + val.shmflg = shmflg; + return enter(&val); +}; + +int syscall__shmdt(struct pt_regs *ctx, u64 shmaddr) +{ + struct val_t val = { + .sys = SYS_SHMDT, + }; + + val.shmaddr = shmaddr; + return enter(&val); +}; + +int syscall__shmctl(struct pt_regs *ctx, u64 shmid, u64 cmd, u64 buf) +{ + struct val_t val = { + .sys = SYS_SHMCTL, + }; + + val.shmid = shmid; + val.cmd = cmd; + val.buf = buf; + return enter(&val); +}; + +""" +if args.tid: # TID trumps PID + bpf_text = bpf_text.replace('FILTER', + 'if (tid != %s) { return 0; }' % args.tid) +elif args.pid: + bpf_text = bpf_text.replace('FILTER', + 'if (pid != %s) { return 0; }' % args.pid) +else: + bpf_text = bpf_text.replace('FILTER', '') + +if debug or args.ebpf: + print(bpf_text) + if args.ebpf: + exit() + +# initialize BPF +b = BPF(text=bpf_text) + +syscall_fnname = b.get_syscall_fnname("shmget") +if BPF.ksymname(syscall_fnname) != -1: + b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmget") + b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") + +syscall_fnname = b.get_syscall_fnname("shmat") +if BPF.ksymname(syscall_fnname) != -1: + b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmat") + b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") + +syscall_fnname = b.get_syscall_fnname("shmdt") +if BPF.ksymname(syscall_fnname) != -1: + b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmdt") + b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") + +syscall_fnname = b.get_syscall_fnname("shmctl") +if BPF.ksymname(syscall_fnname) != -1: + b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmctl") + b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") + +TASK_COMM_LEN = 16 # linux/sched.h + +SYS_SHMGET = 0 +SYS_SHMAT = 1 +SYS_SHMDT = 2 +SYS_SHMCTL = 3 + +initial_ts = 0 + +class Data(ct.Structure): + _fields_ = [ + ("id", ct.c_ulonglong), + ("ts", ct.c_ulonglong), + ("sys", ct.c_int), + ("key", ct.c_ulong), + ("size", ct.c_ulong), + ("shmflg", ct.c_ulong), + ("shmid", ct.c_ulong), + ("cmd", ct.c_ulong), + ("buf", ct.c_ulong), + ("shmaddr", ct.c_ulong), + ("ret", ct.c_ulong), + ("comm", ct.c_char * TASK_COMM_LEN), + ] + +# header +if args.timestamp: + print("%-14s" % ("TIME(s)"), end="") +print("%-6s %-16s %6s %16s ARGs" % + ("TID" if args.tid else "PID", "COMM", "SYS", "RET")) + +def sys_name(sys): + switcher = { + SYS_SHMGET: "SHMGET", + SYS_SHMAT: "SHMAT", + SYS_SHMDT: "SHMDT", + SYS_SHMCTL: "SHMCTL", + } + return switcher.get(sys, "N/A") + +shmget_flags = [ + { 'name' : 'IPC_CREAT', 'value' : 0o1000 }, + { 'name' : 'IPC_EXCL', 'value' : 0o2000 }, + { 'name' : 'SHM_HUGETLB', 'value' : 0o4000 }, + { 'name' : 'SHM_HUGE_2MB', 'value' : 21 << 26 }, + { 'name' : 'SHM_HUGE_1GB', 'value' : 30 << 26 }, + { 'name' : 'SHM_NORESERVE', 'value' : 0o10000 }, + { 'name' : 'SHM_EXEC', 'value' : 0o100000 } +] + +shmat_flags = [ + { 'name' : 'SHM_RDONLY', 'value' : 0o10000 }, + { 'name' : 'SHM_RND', 'value' : 0o20000 }, + { 'name' : 'SHM_REMAP', 'value' : 0o40000 }, + { 'name' : 'SHM_EXEC', 'value' : 0o100000 }, +] + +def shmflg_str(val, flags): + cur = filter(lambda x : x['value'] & val, flags) + str = "0x%x" % val + + if (not val): + return str + + str += " (" + cnt = 0 + for x in cur: + if cnt: + str += "|" + str += x['name'] + val &= ~x['value'] + cnt += 1 + + if val != 0 or not cnt: + if cnt: + str += "|" + str += "0%o" % val + + str += ")" + return str + +# process event +def print_event(cpu, data, size): + event = ct.cast(data, ct.POINTER(Data)).contents + global initial_ts + + if not initial_ts: + initial_ts = event.ts + + if args.name and bytes(args.name) not in event.comm: + return + + if args.timestamp: + delta = event.ts - initial_ts + print("%-14.9f" % (float(delta) / 1000000), end="") + + print("%-6d %-16s %6s %16lx " % + (event.id & 0xffffffff if args.tid else event.id >> 32, + event.comm.decode(), sys_name(event.sys), event.ret), end = '') + + if event.sys == SYS_SHMGET: + print("key: 0x%lx, size: %lu, shmflg: %s" % + (event.key, event.size, shmflg_str(event.shmflg, shmget_flags))) + + if event.sys == SYS_SHMAT: + print("shmid: 0x%lx, shmaddr: 0x%lx, shmflg: %s" % + (event.shmid, event.shmaddr, shmflg_str(event.shmflg, shmat_flags))) + + if event.sys == SYS_SHMDT: + print("shmaddr: 0x%lx" % (event.shmaddr)) + + if event.sys == SYS_SHMCTL: + print("shmid: 0x%lx, cmd: %lu, buf: 0x%x" % (event.shmid, event.cmd, event.buf)) + +# loop with callback to print_event +b["events"].open_perf_buffer(print_event, page_cnt=64) +start_time = datetime.now() +while not args.duration or datetime.now() - start_time < args.duration: + b.perf_buffer_poll(timeout=1000) diff --git a/tools/shmsnoop_example.txt b/tools/shmsnoop_example.txt new file mode 100644 index 000000000000..53bbe7091ece --- /dev/null +++ b/tools/shmsnoop_example.txt @@ -0,0 +1,66 @@ +Demonstrations of shmsnoop, the Linux eBPF/bcc version. + +shmsnoop traces shm*() syscalls, for example: + +# ./shmsnoop.py +PID COMM SYS RET ARGs +19813 server SHMGET 10000 key: 0x78020001, size: 20, shmflg: 0x3b6 (IPC_CREAT|0666) +19813 server SHMAT 7f1cf8b1f000 shmid: 0x10000, shmaddr: 0x0, shmflg: 0x0 +19816 client SHMGET 10000 key: 0x78020001, size: 20, shmflg: 0x1b6 (0666) +19816 client SHMAT 7f4fd8ee7000 shmid: 0x10000, shmaddr: 0x0, shmflg: 0x0 +19816 client SHMDT 0 shmaddr: 0x7f4fd8ee7000 +19813 server SHMDT 0 shmaddr: 0x7f1cf8b1f000 +19813 server SHMCTL 0 shmid: 0x10000, cmd: 0, buf: 0x0 + + +Every call the shm* syscall (SHM column) is displayed +on separate line together with process info (PID/COMM +columns) and argument details: return value (RET column) +and syscall arguments (ARGs column). + +The ARGs column contains 'arg: value' couples that represent +given syscall arguments as described in their manpage. + +This works by tracing shm* system calls and sending +argument details to the python script. + +A -T option can be used to include a timestamp column, +and a -n option to match on a command name. Regular +expressions are allowed. For example, matching commands +containing "server" with timestamps: + +# ./shmsnoop.py -T -n server +TIME(s) PID COMM SYS RET ARGs +0.563194000 19825 server SHMDT 0 shmaddr: 0x7f74362e4000 +0.563237000 19825 server SHMCTL 0 shmid: 0x18000, cmd: 0, buf: 0x0 + + +A -p option can be used to trace only selected process: + +# ./shmsnoop.py -p 19855 +PID COMM SYS RET ARGs +19855 server SHMDT 0 shmaddr: 0x7f4329ff8000 +19855 server SHMCTL 0 shmid: 0x20000, cmd: 0, buf: 0x0 + +USAGE message: +# ./shmsnoop.py -h +usage: shmsnoop.py [-h] [-T] [-p PID] [-t TID] [-d DURATION] [-n NAME] + +Trace shm*() syscalls + +optional arguments: + -h, --help show this help message and exit + -T, --timestamp include timestamp on output + -p PID, --pid PID trace this PID only + -t TID, --tid TID trace this TID only + -d DURATION, --duration DURATION + total duration of trace in seconds + -n NAME, --name NAME only print process names containing this name + +examples: + ./shmsnoop # trace all shm*() syscalls + ./shmsnoop -T # include timestamps + ./shmsnoop -p 181 # only trace PID 181 + ./shmsnoop -t 123 # only trace TID 123 + ./shmsnoop -d 10 # trace for 10 seconds only + ./shmsnoop -n main # only print process names containing "main" diff --git a/tools/sofdsnoop.py b/tools/sofdsnoop.py new file mode 100755 index 000000000000..d6e131e8bfec --- /dev/null +++ b/tools/sofdsnoop.py @@ -0,0 +1,345 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# sofdsnoop traces file descriptors passed via socket +# For Linux, uses BCC, eBPF. Embedded C. +# +# USAGE: sofdsnoop +# +# Copyright (c) 2018 Jiri Olsa. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 30-Jul-2018 Jiri Olsa Created this. + +from __future__ import print_function +from bcc import ArgString, BPF +import os +import argparse +import ctypes as ct +from datetime import datetime, timedelta + +# arguments +examples = """examples: + ./sofdsnoop # trace file descriptors passes + ./sofdsnoop -T # include timestamps + ./sofdsnoop -p 181 # only trace PID 181 + ./sofdsnoop -t 123 # only trace TID 123 + ./sofdsnoop -d 10 # trace for 10 seconds only + ./sofdsnoop -n main # only print process names containing "main" + +""" +parser = argparse.ArgumentParser( + description="Trace file descriptors passed via socket", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-T", "--timestamp", action="store_true", + help="include timestamp on output") +parser.add_argument("-p", "--pid", + help="trace this PID only") +parser.add_argument("-t", "--tid", + help="trace this TID only") +parser.add_argument("-n", "--name", + type=ArgString, + help="only print process names containing this name") +parser.add_argument("-d", "--duration", + help="total duration of trace in seconds") +args = parser.parse_args() +debug = 0 + +ACTION_SEND=0 +ACTION_RECV=1 +MAX_FD=10 + +if args.duration: + args.duration = timedelta(seconds=int(args.duration)) + +# define BPF program +bpf_text = """ +#include +#include +#include +#include +#include + +#define MAX_FD 10 +#define ACTION_SEND 0 +#define ACTION_RECV 1 + +struct val_t { + u64 id; + u64 ts; + int action; + int sock_fd; + int fd_cnt; + int fd[MAX_FD]; + char comm[TASK_COMM_LEN]; +}; + +BPF_HASH(detach_ptr, u64, struct cmsghdr *); +BPF_HASH(sock_fd, u64, int); +BPF_PERF_OUTPUT(events); + +static void set_fd(int fd) +{ + u64 id = bpf_get_current_pid_tgid(); + + sock_fd.update(&id, &fd); +} + +static int get_fd(void) +{ + u64 id = bpf_get_current_pid_tgid(); + int *fd; + + fd = sock_fd.lookup(&id); + return fd ? *fd : -1; +} + +static void put_fd(void) +{ + u64 id = bpf_get_current_pid_tgid(); + + sock_fd.delete(&id); +} + +static int sent_1(struct pt_regs *ctx, struct val_t *val, int num, void *data) +{ + val->fd_cnt = min(num, MAX_FD); + + if (bpf_probe_read(&val->fd[0], MAX_FD * sizeof(int), data)) + return -1; + + events.perf_submit(ctx, val, sizeof(*val)); + return 0; +} + +#define SEND_1 \ + if (sent_1(ctx, &val, num, (void *) data)) \ + return 0; \ + \ + num -= MAX_FD; \ + if (num < 0) \ + return 0; \ + \ + data += MAX_FD; + +#define SEND_2 SEND_1 SEND_1 +#define SEND_4 SEND_2 SEND_2 +#define SEND_8 SEND_4 SEND_4 +#define SEND_260 SEND_8 SEND_8 SEND_8 SEND_2 + +static int send(struct pt_regs *ctx, struct cmsghdr *cmsg, int action) +{ + struct val_t val = { 0 }; + int *data, num, fd; + u64 tsp = bpf_ktime_get_ns(); + + data = (void *) ((char *) cmsg + sizeof(struct cmsghdr)); + num = (cmsg->cmsg_len - sizeof(struct cmsghdr)) / sizeof(int); + + val.id = bpf_get_current_pid_tgid(); + val.action = action; + val.sock_fd = get_fd(); + val.ts = tsp / 1000; + + if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) != 0) + return 0; + + SEND_260 + return 0; +} + +static bool allow_pid(u64 id) +{ + u32 pid = id >> 32; // PID is higher part + u32 tid = id; // Cast and get the lower part + + FILTER + + return 1; +} + +int trace_scm_send_entry(struct pt_regs *ctx, struct socket *sock, struct msghdr *hdr) +{ + struct cmsghdr *cmsg = NULL; + + if (!allow_pid(bpf_get_current_pid_tgid())) + return 0; + + if (hdr->msg_controllen >= sizeof(struct cmsghdr)) + cmsg = hdr->msg_control; + + if (!cmsg || (cmsg->cmsg_type != SCM_RIGHTS)) + return 0; + + return send(ctx, cmsg, ACTION_SEND); +}; + +int trace_scm_detach_fds_entry(struct pt_regs *ctx, struct msghdr *hdr) +{ + struct cmsghdr *cmsg = NULL; + u64 id = bpf_get_current_pid_tgid(); + + if (!allow_pid(id)) + return 0; + + if (hdr->msg_controllen >= sizeof(struct cmsghdr)) + cmsg = hdr->msg_control; + + if (!cmsg) + return 0; + + detach_ptr.update(&id, &cmsg); + return 0; +}; + +int trace_scm_detach_fds_return(struct pt_regs *ctx) +{ + struct cmsghdr **cmsgp; + u64 id = bpf_get_current_pid_tgid(); + + if (!allow_pid(id)) + return 0; + + cmsgp = detach_ptr.lookup(&id); + + if (!cmsgp) + return 0; + + return send(ctx, *cmsgp, ACTION_RECV); +} + +int syscall__sendmsg(struct pt_regs *ctx, u64 fd, u64 msg, u64 flags) +{ + struct pt_regs p; + + if (!allow_pid(bpf_get_current_pid_tgid())) + return 0; + + set_fd(fd); + return 0; +} + +int trace_sendmsg_return(struct pt_regs *ctx) +{ + if (!allow_pid(bpf_get_current_pid_tgid())) + return 0; + + put_fd(); + return 0; +} + +int syscall__recvmsg(struct pt_regs *ctx, u64 fd, u64 msg, u64 flags) +{ + struct pt_regs p; + + if (!allow_pid(bpf_get_current_pid_tgid())) + return 0; + + fd = fd; + + set_fd(fd); + return 0; +} + +int trace_recvmsg_return(struct pt_regs *ctx) +{ + if (!allow_pid(bpf_get_current_pid_tgid())) + return 0; + + put_fd(); + return 0; +} + +""" + +if args.tid: # TID trumps PID + bpf_text = bpf_text.replace('FILTER', + 'if (tid != %s) { return 0; }' % args.tid) +elif args.pid: + bpf_text = bpf_text.replace('FILTER', + 'if (pid != %s) { return 0; }' % args.pid) +else: + bpf_text = bpf_text.replace('FILTER', '') + +# initialize BPF +b = BPF(text=bpf_text) + +syscall_fnname = b.get_syscall_fnname("sendmsg") +if BPF.ksymname(syscall_fnname) != -1: + b.attach_kprobe(event=syscall_fnname, fn_name="syscall__sendmsg") + b.attach_kretprobe(event=syscall_fnname, fn_name="trace_sendmsg_return") + +syscall_fnname = b.get_syscall_fnname("recvmsg") +if BPF.ksymname(syscall_fnname) != -1: + b.attach_kprobe(event=syscall_fnname, fn_name="syscall__recvmsg") + b.attach_kretprobe(event=syscall_fnname, fn_name="trace_recvmsg_return") + +b.attach_kprobe(event="__scm_send", fn_name="trace_scm_send_entry") +b.attach_kprobe(event="scm_detach_fds", fn_name="trace_scm_detach_fds_entry") +b.attach_kretprobe(event="scm_detach_fds", fn_name="trace_scm_detach_fds_return") + +TASK_COMM_LEN = 16 # linux/sched.h + +initial_ts = 0 + +class Data(ct.Structure): + _fields_ = [ + ("id", ct.c_ulonglong), + ("ts", ct.c_ulonglong), + ("action", ct.c_int), + ("sock_fd", ct.c_int), + ("fd_cnt", ct.c_int), + ("fd", ct.c_int * MAX_FD), + ("comm", ct.c_char * TASK_COMM_LEN), + ] + +# header +if args.timestamp: + print("%-14s" % ("TIME(s)"), end="") +print("%-6s %-6s %-16s %-25s %-5s %s" % + ("ACTION", "TID", "COMM", "SOCKET", "FD", "NAME")) + +def get_file(pid, fd): + proc = "/proc/%d/fd/%d" % (pid, fd) + try: + return os.readlink(proc) + except OSError as err: + return "N/A" + +# process event +def print_event(cpu, data, size): + event = ct.cast(data, ct.POINTER(Data)).contents + tid = event.id & 0xffffffff; + + cnt = min(MAX_FD, event.fd_cnt); + + if args.name and bytes(args.name) not in event.comm: + return + + for i in range(0, cnt): + global initial_ts + + if not initial_ts: + initial_ts = event.ts + + if args.timestamp: + delta = event.ts - initial_ts + print("%-14.9f" % (float(delta) / 1000000), end="") + + print("%-6s %-6d %-16s " % + ("SEND" if event.action == ACTION_SEND else "RECV", + tid, event.comm.decode()), end = '') + + sock = "%d:%s" % (event.sock_fd, get_file(tid, event.sock_fd)) + print("%-25s " % sock, end = '') + + fd = event.fd[i] + fd_file = get_file(tid, fd) if event.action == ACTION_SEND else "" + print("%-5d %s" % (fd, fd_file)) + +# loop with callback to print_event +b["events"].open_perf_buffer(print_event, page_cnt=64) +start_time = datetime.now() +while not args.duration or datetime.now() - start_time < args.duration: + b.perf_buffer_poll(timeout=1000) diff --git a/tools/sofdsnoop_example.txt b/tools/sofdsnoop_example.txt new file mode 100644 index 000000000000..740a26fdf4df --- /dev/null +++ b/tools/sofdsnoop_example.txt @@ -0,0 +1,69 @@ +Demonstrations of sofdsnoop, the Linux eBPF/bcc version. + +sofdsnoop traces FDs passed through unix sockets + +# ./sofdsnoop.py +ACTION TID COMM SOCKET FD NAME +SEND 2576 Web Content 24:socket:[39763] 51 /dev/shm/org.mozilla.ipc.2576.23874 +RECV 2576 Web Content 49:socket:[809997] 51 +SEND 2576 Web Content 24:socket:[39763] 58 N/A +RECV 2464 Gecko_IOThread 75:socket:[39753] 55 + +Every file descriptor that is passed via unix sockets os displayed +on separate line together with process info (TID/COMM columns), +ACTION details (SEND/RECV), file descriptor number (FD) and its +translation to file if available (NAME). + +The file descriptor (fd) value is bound to a process. The SEND +lines display the fd value within the sending process. The RECV +lines display the fd value of the sending process. That's why +there's translation to name only on SEND lines, where we are +able to find it in task proc records. + +This works by tracing sendmsg/recvmsg system calls to provide +the socket fds, and scm_send_entry/scm_detach_fds to provide +the file descriptor details. + +A -T option can be used to include a timestamp column, +and a -n option to match on a command name. Regular +expressions are allowed. For example, matching commands +containing "server" with timestamps: + +# ./sofdsnoop.py -T -n Web +TIME(s) ACTION TID COMM SOCKET FD NAME +0.000000000 SEND 2576 Web Content 24:socket:[39763] 51 /dev/shm/org.mozilla.ipc.2576.25404 (deleted) +0.000413000 RECV 2576 Web Content 49:/dev/shm/org.mozilla.ipc.2576.25404 (deleted) 51 +0.000558000 SEND 2576 Web Content 24:socket:[39763] 58 N/A +0.000952000 SEND 2576 Web Content 24:socket:[39763] 58 socket:[817962] + + +A -p option can be used to trace only selected process: + +# ./sofdsnoop.py -p 2576 -T +TIME(s) ACTION TID COMM SOCKET FD NAME +0.000000000 SEND 2576 Web Content 24:socket:[39763] 51 N/A +0.000138000 RECV 2576 Web Content 49:N/A 5 +0.000191000 SEND 2576 Web Content 24:socket:[39763] 58 N/A +0.000424000 RECV 2576 Web Content 51:/dev/shm/org.mozilla.ipc.2576.25319 (deleted) 49 + +USAGE message: +usage: sofdsnoop.py [-h] [-T] [-p PID] [-t TID] [-n NAME] [-d DURATION] + +Trace file descriptors passed via socket + +optional arguments: + -h, --help show this help message and exit + -T, --timestamp include timestamp on output + -p PID, --pid PID trace this PID only + -t TID, --tid TID trace this TID only + -n NAME, --name NAME only print process names containing this name + -d DURATION, --duration DURATION + total duration of trace in seconds + +examples: + ./sofdsnoop # trace file descriptors passes + ./sofdsnoop -T # include timestamps + ./sofdsnoop -p 181 # only trace PID 181 + ./sofdsnoop -t 123 # only trace TID 123 + ./sofdsnoop -d 10 # trace for 10 seconds only + ./sofdsnoop -n main # only print process names containing "main"