diff --git a/libbpf-tools/.gitignore b/libbpf-tools/.gitignore index f1305ba526fa..b5ff0eff086b 100644 --- a/libbpf-tools/.gitignore +++ b/libbpf-tools/.gitignore @@ -11,6 +11,7 @@ /execsnoop /ext4dist /filelife +/fsdist /funclatency /gethostlatency /hardirqs diff --git a/libbpf-tools/Makefile b/libbpf-tools/Makefile index 3b43c3073e24..edf4852f1827 100644 --- a/libbpf-tools/Makefile +++ b/libbpf-tools/Makefile @@ -28,6 +28,7 @@ APPS = \ execsnoop \ ext4dist \ filelife \ + fsdist \ funclatency \ gethostlatency \ hardirqs \ diff --git a/libbpf-tools/fsdist.bpf.c b/libbpf-tools/fsdist.bpf.c new file mode 100644 index 000000000000..4321e3b3950f --- /dev/null +++ b/libbpf-tools/fsdist.bpf.c @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2021 Hengqi Chen */ +#include +#include +#include +#include "bits.bpf.h" +#include "fsdist.h" + +#define MAX_ENTRIES 10240 + +const volatile pid_t target_pid = 0; +const volatile bool in_ms = false; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u64); +} starts SEC(".maps"); + +struct hist hists[MAX_OP] = {}; + +static int probe_entry() +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u32 pid = pid_tgid >> 32; + __u32 tid = (__u32)pid_tgid; + __u64 ts; + + if (target_pid && target_pid != pid) + return 0; + + ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&starts, &tid, &ts, BPF_ANY); + return 0; +} + +static int probe_return(enum fs_file_op op) +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u32 pid = pid_tgid >> 32; + __u32 tid = (__u32)pid_tgid; + __u64 ts = bpf_ktime_get_ns(); + __u64 *tsp, slot; + __s64 delta; + + tsp = bpf_map_lookup_elem(&starts, &tid); + if (!tsp) + return 0; + + if (op >= MAX_OP) + goto cleanup; + + delta = (__s64)(ts - *tsp); + if (delta < 0) + goto cleanup; + + if (in_ms) + delta /= 1000000; + else + delta /= 1000; + + slot = log2l(delta); + if (slot >= MAX_SLOTS) + slot = MAX_SLOTS - 1; + __sync_fetch_and_add(&hists[op].slots[slot], 1); + +cleanup: + bpf_map_delete_elem(&starts, &tid); + return 0; +} + +SEC("kprobe/dummy_file_read") +int BPF_KPROBE(file_read_entry) +{ + return probe_entry(); +} + +SEC("kretprobe/dummy_file_read") +int BPF_KRETPROBE(file_read_exit) +{ + return probe_return(READ); +} + +SEC("kprobe/dummy_file_write") +int BPF_KPROBE(file_write_entry) +{ + return probe_entry(); +} + +SEC("kretprobe/dummy_file_write") +int BPF_KRETPROBE(file_write_exit) +{ + return probe_return(WRITE); +} + +SEC("kprobe/dummy_file_open") +int BPF_KPROBE(file_open_entry) +{ + return probe_entry(); +} + +SEC("kretprobe/dummy_file_open") +int BPF_KRETPROBE(file_open_exit) +{ + return probe_return(OPEN); +} + +SEC("kprobe/dummy_file_sync") +int BPF_KPROBE(file_sync_entry) +{ + return probe_entry(); +} + +SEC("kretprobe/dummy_file_sync") +int BPF_KRETPROBE(file_sync_exit) +{ + return probe_return(FSYNC); +} + +SEC("kprobe/dummy_getattr") +int BPF_KPROBE(getattr_entry) +{ + return probe_entry(); +} + +SEC("kretprobe/dummy_getattr") +int BPF_KRETPROBE(getattr_exit) +{ + return probe_return(GETATTR); +} + +SEC("fentry/dummy_file_read") +int BPF_PROG(file_read_fentry) +{ + return probe_entry(); +} + +SEC("fexit/dummy_file_read") +int BPF_PROG(file_read_fexit) +{ + return probe_return(READ); +} + +SEC("fentry/dummy_file_write") +int BPF_PROG(file_write_fentry) +{ + return probe_entry(); +} + +SEC("fexit/dummy_file_write") +int BPF_PROG(file_write_fexit) +{ + return probe_return(WRITE); +} + +SEC("fentry/dummy_file_open") +int BPF_PROG(file_open_fentry) +{ + return probe_entry(); +} + +SEC("fexit/dummy_file_open") +int BPF_PROG(file_open_fexit) +{ + return probe_return(OPEN); +} + +SEC("fentry/dummy_file_sync") +int BPF_PROG(file_sync_fentry) +{ + return probe_entry(); +} + +SEC("fexit/dummy_file_sync") +int BPF_PROG(file_sync_fexit) +{ + return probe_return(FSYNC); +} + +SEC("fentry/dummy_getattr") +int BPF_PROG(getattr_fentry) +{ + return probe_entry(); +} + +SEC("fexit/dummy_getattr") +int BPF_PROG(getattr_fexit) +{ + return probe_return(GETATTR); +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/libbpf-tools/fsdist.c b/libbpf-tools/fsdist.c new file mode 100644 index 000000000000..c8b6e24888e6 --- /dev/null +++ b/libbpf-tools/fsdist.c @@ -0,0 +1,453 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * fsdist Summarize file system operations latency. + * + * Copyright (c) 2021 Hengqi Chen + * 20-May-2021 Hengqi Chen Created this. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fsdist.h" +#include "fsdist.skel.h" +#include "trace_helpers.h" + +#define warn(...) fprintf(stderr, __VA_ARGS__) + +enum fs_type { + NONE, + BTRFS, + EXT4, + NFS, + XFS, +}; + +static struct fs_config { + const char *fs; + const char *op_funcs[MAX_OP]; +} fs_configs[] = { + [BTRFS] = { "btrfs", { + [READ] = "btrfs_file_read_iter", + [WRITE] = "btrfs_file_write_iter", + [OPEN] = "btrfs_file_open", + [FSYNC] = "btrfs_sync_file", + [GETATTR] = NULL, /* not supported */ + }}, + [EXT4] = { "ext4", { + [READ] = "ext4_file_read_iter", + [WRITE] = "ext4_file_write_iter", + [OPEN] = "ext4_file_open", + [FSYNC] = "ext4_sync_file", + [GETATTR] = "ext4_file_getattr", + }}, + [NFS] = { "nfs", { + [READ] = "nfs_file_read", + [WRITE] = "nfs_file_write", + [OPEN] = "nfs_file_open", + [FSYNC] = "nfs_file_fsync", + [GETATTR] = "nfs_getattr", + }}, + [XFS] = { "xfs", { + [READ] = "xfs_file_read_iter", + [WRITE] = "xfs_file_write_iter", + [OPEN] = "xfs_file_open", + [FSYNC] = "xfs_file_fsync", + [GETATTR] = NULL, /* not supported */ + }}, +}; + +static char *file_op_names[] = { + [READ] = "read", + [WRITE] = "write", + [OPEN] = "open", + [FSYNC] = "fsync", + [GETATTR] = "getattr", +}; + +static struct hist zero; +static volatile sig_atomic_t exiting; + +/* options */ +static enum fs_type fs_type = NONE; +static bool emit_timestamp = false; +static bool timestamp_in_ms = false; +static pid_t target_pid = 0; +static int interval = 99999999; +static int count = 99999999; +static bool verbose = false; + +const char *argp_program_version = "fsdist 0.1"; +const char *argp_program_bug_address = + "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; +const char argp_program_doc[] = +"Summarize file system operations latency.\n" +"\n" +"Usage: fsdist [-h] [-t] [-T] [-m] [-p PID] [interval] [count]\n" +"\n" +"EXAMPLES:\n" +" fsdist -t ext4 # show ext4 operations latency as a histogram\n" +" fsdist -t nfs -p 1216 # trace nfs operations with PID 1216 only\n" +" fsdist -t xfs 1 10 # trace xfs operations, 1s summaries, 10 times\n" +" fsdist -t btrfs -m 5 # trace btrfs operation, 5s summaries, in ms\n"; + +static const struct argp_option opts[] = { + { "timestamp", 'T', NULL, 0, "Print timestamp" }, + { "milliseconds", 'm', NULL, 0, "Millisecond histogram" }, + { "pid", 'p', "PID", 0, "Process ID to trace" }, + { "type", 't', "Filesystem", 0, "Which filesystem to trace, [btrfs/ext4/nfs/xfs]" }, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + static int pos_args; + + switch (key) { + case 'v': + verbose = true; + break; + case 'T': + emit_timestamp = true; + break; + case 'm': + timestamp_in_ms = true; + break; + case 't': + if (!strcmp(arg, "btrfs")) { + fs_type = BTRFS; + } else if (!strcmp(arg, "ext4")) { + fs_type = EXT4; + } else if (!strcmp(arg, "nfs")) { + fs_type = NFS; + } else if (!strcmp(arg, "xfs")) { + fs_type = XFS; + } else { + warn("invalid filesystem\n"); + argp_usage(state); + } + break; + case 'p': + errno = 0; + target_pid = strtol(arg, NULL, 10); + if (errno || target_pid <= 0) { + warn("invalid PID: %s\n", arg); + argp_usage(state); + } + break; + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + case ARGP_KEY_ARG: + errno = 0; + if (pos_args == 0) { + interval = strtol(arg, NULL, 10); + if (errno) { + warn("invalid internal\n"); + argp_usage(state); + } + } else if (pos_args == 1) { + count = strtol(arg, NULL, 10); + if (errno) { + warn("invalid count\n"); + argp_usage(state); + } + } else { + warn("unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + pos_args++; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void alias_parse(char *prog) +{ + char *name = basename(prog); + + if (!strcmp(name, "btrfsdist")) { + fs_type = BTRFS; + } else if (!strcmp(name, "ext4dist")) { + fs_type = EXT4; + } else if (!strcmp(name, "nfsdist")) { + fs_type = NFS; + } else if (!strcmp(name, "xfsdist")) { + fs_type = XFS; + } +} + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static void sig_handler(int sig) +{ + exiting = 1; +} + +static int print_hists(struct fsdist_bpf__bss *bss) +{ + const char *units = timestamp_in_ms ? "msecs" : "usecs"; + enum fs_file_op op; + + for (op = READ; op < MAX_OP; op++) { + struct hist hist = bss->hists[op]; + + bss->hists[op] = zero; + if (!memcmp(&zero, &hist, sizeof(hist))) + continue; + printf("operation = '%s'\n", file_op_names[op]); + print_log2_hist(hist.slots, MAX_SLOTS, units); + printf("\n"); + } + return 0; +} + +static bool check_fentry() +{ + int i; + const char *fn_name, *module; + bool support_fentry = true; + + for (i = 0; i < MAX_OP; i++) { + fn_name = fs_configs[fs_type].op_funcs[i]; + module = fs_configs[fs_type].fs; + if (fn_name && !fentry_exists(fn_name, NULL) + && !fentry_exists(fn_name, module)) { + support_fentry = false; + break; + } + } + return support_fentry; +} + +static int fentry_set_attach_target(struct fsdist_bpf *obj) +{ + struct fs_config *cfg = &fs_configs[fs_type]; + int err = 0; + + err = err ?: bpf_program__set_attach_target(obj->progs.file_read_fentry, 0, cfg->op_funcs[READ]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_read_fexit, 0, cfg->op_funcs[READ]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_write_fentry, 0, cfg->op_funcs[WRITE]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_write_fexit, 0, cfg->op_funcs[WRITE]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_open_fentry, 0, cfg->op_funcs[OPEN]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_open_fexit, 0, cfg->op_funcs[OPEN]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_sync_fentry, 0, cfg->op_funcs[FSYNC]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_sync_fexit, 0, cfg->op_funcs[FSYNC]); + if (cfg->op_funcs[GETATTR]) { + err = err ?: bpf_program__set_attach_target(obj->progs.getattr_fentry, 0, cfg->op_funcs[GETATTR]); + err = err ?: bpf_program__set_attach_target(obj->progs.getattr_fexit, 0, cfg->op_funcs[GETATTR]); + } else { + bpf_program__set_autoload(obj->progs.getattr_fentry, false); + bpf_program__set_autoload(obj->progs.getattr_fexit, false); + } + return err; +} + +static void disable_fentry(struct fsdist_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.file_read_fentry, false); + bpf_program__set_autoload(obj->progs.file_read_fexit, false); + bpf_program__set_autoload(obj->progs.file_write_fentry, false); + bpf_program__set_autoload(obj->progs.file_write_fexit, false); + bpf_program__set_autoload(obj->progs.file_open_fentry, false); + bpf_program__set_autoload(obj->progs.file_open_fexit, false); + bpf_program__set_autoload(obj->progs.file_sync_fentry, false); + bpf_program__set_autoload(obj->progs.file_sync_fexit, false); + bpf_program__set_autoload(obj->progs.getattr_fentry, false); + bpf_program__set_autoload(obj->progs.getattr_fexit, false); +} + +static void disable_kprobes(struct fsdist_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.file_read_entry, false); + bpf_program__set_autoload(obj->progs.file_read_exit, false); + bpf_program__set_autoload(obj->progs.file_write_entry, false); + bpf_program__set_autoload(obj->progs.file_write_exit, false); + bpf_program__set_autoload(obj->progs.file_open_entry, false); + bpf_program__set_autoload(obj->progs.file_open_exit, false); + bpf_program__set_autoload(obj->progs.file_sync_entry, false); + bpf_program__set_autoload(obj->progs.file_sync_exit, false); + bpf_program__set_autoload(obj->progs.getattr_entry, false); + bpf_program__set_autoload(obj->progs.getattr_exit, false); +} + +static int attach_kprobes(struct fsdist_bpf *obj) +{ + long err = 0; + struct fs_config *cfg = &fs_configs[fs_type]; + + /* READ */ + obj->links.file_read_entry = bpf_program__attach_kprobe(obj->progs.file_read_entry, false, cfg->op_funcs[READ]); + err = libbpf_get_error(obj->links.file_read_entry); + if (err) + goto errout; + obj->links.file_read_exit = bpf_program__attach_kprobe(obj->progs.file_read_exit, true, cfg->op_funcs[READ]); + err = libbpf_get_error(obj->links.file_read_exit); + if (err) + goto errout; + /* WRITE */ + obj->links.file_write_entry = bpf_program__attach_kprobe(obj->progs.file_write_entry, false, cfg->op_funcs[WRITE]); + err = libbpf_get_error(obj->links.file_write_entry); + if (err) + goto errout; + obj->links.file_write_exit = bpf_program__attach_kprobe(obj->progs.file_write_exit, true, cfg->op_funcs[WRITE]); + err = libbpf_get_error(obj->links.file_write_exit); + if (err) + goto errout; + /* OPEN */ + obj->links.file_open_entry = bpf_program__attach_kprobe(obj->progs.file_open_entry, false, cfg->op_funcs[OPEN]); + err = libbpf_get_error(obj->links.file_open_entry); + if (err) + goto errout; + obj->links.file_open_exit = bpf_program__attach_kprobe(obj->progs.file_open_exit, true, cfg->op_funcs[OPEN]); + err = libbpf_get_error(obj->links.file_open_exit); + if (err) + goto errout; + /* FSYNC */ + obj->links.file_sync_entry = bpf_program__attach_kprobe(obj->progs.file_sync_entry, false, cfg->op_funcs[FSYNC]); + err = libbpf_get_error(obj->links.file_sync_entry); + if (err) + goto errout; + obj->links.file_sync_exit = bpf_program__attach_kprobe(obj->progs.file_sync_exit, true, cfg->op_funcs[FSYNC]); + err = libbpf_get_error(obj->links.file_sync_exit); + if (err) + goto errout; + /* GETATTR */ + if (!cfg->op_funcs[GETATTR]) + return 0; + obj->links.getattr_entry = bpf_program__attach_kprobe(obj->progs.getattr_entry, false, cfg->op_funcs[GETATTR]); + err = libbpf_get_error(obj->links.getattr_entry); + if (err) + goto errout; + obj->links.getattr_exit = bpf_program__attach_kprobe(obj->progs.getattr_exit, true, cfg->op_funcs[GETATTR]); + err = libbpf_get_error(obj->links.getattr_exit); + if (err) + goto errout; + return 0; +errout: + warn("failed to attach kprobe: %ld\n", err); + return err; +} + +int main(int argc, char **argv) +{ + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + struct fsdist_bpf *skel; + struct tm *tm; + char ts[32]; + time_t t; + int err; + bool support_fentry; + + alias_parse(argv[0]); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + if (fs_type == NONE) { + warn("filesystem must be specified using -t option.\n"); + return 1; + } + + libbpf_set_print(libbpf_print_fn); + + err = bump_memlock_rlimit(); + if (err) { + warn("failed to increase rlimit: %d\n", err); + return 1; + } + + skel = fsdist_bpf__open(); + if (!skel) { + warn("failed to open BPF object\n"); + return 1; + } + + skel->rodata->target_pid = target_pid; + skel->rodata->in_ms = timestamp_in_ms; + + /* + * before load + * if fentry is supported, we set attach target and disable kprobes + * otherwise, we disable fentry and attach kprobes after loading + */ + support_fentry = check_fentry(); + if (support_fentry) { + err = fentry_set_attach_target(skel); + if (err) { + warn("failed to set attach target: %d\n", err); + goto cleanup; + } + disable_kprobes(skel); + } else { + disable_fentry(skel); + } + + err = fsdist_bpf__load(skel); + if (err) { + warn("failed to load BPF object: %d\n", err); + goto cleanup; + } + + /* + * after load + * if fentry is supported, let libbpf do auto load + * otherwise, we attach to kprobes manually + */ + err = support_fentry ? fsdist_bpf__attach(skel) : attach_kprobes(skel); + if (err) { + warn("failed to attach BPF programs: %d\n", err); + goto cleanup; + } + + signal(SIGINT, sig_handler); + + printf("Tracing %s operation latency... Hit Ctrl-C to end.\n", + fs_configs[fs_type].fs); + + while (1) { + sleep(interval); + printf("\n"); + + if (emit_timestamp) { + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + printf("%-8s\n", ts); + } + + err = print_hists(skel->bss); + if (err) + break; + + if (exiting || --count == 0) + break; + } + +cleanup: + fsdist_bpf__destroy(skel); + + return err != 0; +} diff --git a/libbpf-tools/fsdist.h b/libbpf-tools/fsdist.h new file mode 100644 index 000000000000..a4184fc78858 --- /dev/null +++ b/libbpf-tools/fsdist.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __FSDIST_H +#define __FSDIST_H + +enum fs_file_op { + READ, + WRITE, + OPEN, + FSYNC, + GETATTR, + MAX_OP, +}; + +#define MAX_SLOTS 32 + +struct hist { + __u32 slots[MAX_SLOTS]; +}; + +#endif /* __FSDIST_H */