From 71c22fcaa688a1aea92a9588b1a60a7e2329f38b Mon Sep 17 00:00:00 2001 From: Wenbo Zhang Date: Thu, 20 Aug 2020 03:08:14 -0400 Subject: [PATCH] libbpf-tools: add CO-RE biostacks Signed-off-by: Wenbo Zhang --- libbpf-tools/.gitignore | 1 + libbpf-tools/Makefile | 1 + libbpf-tools/biostacks.bpf.c | 115 +++++++++++++++++ libbpf-tools/biostacks.c | 232 +++++++++++++++++++++++++++++++++++ libbpf-tools/biostacks.h | 27 ++++ libbpf-tools/trace_helpers.c | 108 ++++++++++++++++ libbpf-tools/trace_helpers.h | 14 +++ 7 files changed, 498 insertions(+) create mode 100644 libbpf-tools/biostacks.bpf.c create mode 100644 libbpf-tools/biostacks.c create mode 100644 libbpf-tools/biostacks.h diff --git a/libbpf-tools/.gitignore b/libbpf-tools/.gitignore index 9b681d4cdae8..65d47f9237d2 100644 --- a/libbpf-tools/.gitignore +++ b/libbpf-tools/.gitignore @@ -2,6 +2,7 @@ /biolatency /biopattern /biosnoop +/biostacks /bitesize /cpudist /drsnoop diff --git a/libbpf-tools/Makefile b/libbpf-tools/Makefile index 232450e41717..b4a444786c1e 100644 --- a/libbpf-tools/Makefile +++ b/libbpf-tools/Makefile @@ -13,6 +13,7 @@ APPS = \ biolatency \ biopattern \ biosnoop \ + biostacks \ bitesize \ cpudist \ drsnoop \ diff --git a/libbpf-tools/biostacks.bpf.c b/libbpf-tools/biostacks.bpf.c new file mode 100644 index 000000000000..30e5c9d43dca --- /dev/null +++ b/libbpf-tools/biostacks.bpf.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Wenbo Zhang +#include "vmlinux.h" +#include +#include +#include +#include "biostacks.h" +#include "bits.bpf.h" +#include "maps.bpf.h" + +#define MAX_ENTRIES 10240 +#define NULL 0 + +const volatile bool targ_ms = false; +const volatile dev_t targ_dev = -1; + +struct internal_rqinfo { + u64 start_ts; + struct rqinfo rqinfo; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, struct request *); + __type(value, struct internal_rqinfo); + __uint(map_flags, BPF_F_NO_PREALLOC); +} rqinfos SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, struct rqinfo); + __type(value, struct hist); + __uint(map_flags, BPF_F_NO_PREALLOC); +} hists SEC(".maps"); + +static struct hist zero; + +static __always_inline +int trace_start(void *ctx, struct request *rq, bool merge_bio) +{ + struct internal_rqinfo *i_rqinfop = NULL, i_rqinfo = {}; + struct gendisk *disk = BPF_CORE_READ(rq, rq_disk); + dev_t dev; + + dev = disk ? MKDEV(BPF_CORE_READ(disk, major), + BPF_CORE_READ(disk, first_minor)) : 0; + if (targ_dev != -1 && targ_dev != dev) + return 0; + + if (merge_bio) + i_rqinfop = bpf_map_lookup_elem(&rqinfos, &rq); + if (!i_rqinfop) + i_rqinfop = &i_rqinfo; + + i_rqinfop->start_ts = bpf_ktime_get_ns(); + i_rqinfop->rqinfo.pid = bpf_get_current_pid_tgid(); + i_rqinfop->rqinfo.kern_stack_size = + bpf_get_stack(ctx, i_rqinfop->rqinfo.kern_stack, + sizeof(i_rqinfop->rqinfo.kern_stack), 0); + bpf_get_current_comm(&i_rqinfop->rqinfo.comm, + sizeof(&i_rqinfop->rqinfo.comm)); + i_rqinfop->rqinfo.dev = dev; + + if (i_rqinfop == &i_rqinfo) + bpf_map_update_elem(&rqinfos, &rq, i_rqinfop, 0); + return 0; +} + +SEC("fentry/blk_account_io_start") +int BPF_PROG(blk_account_io_start, struct request *rq) +{ + return trace_start(ctx, rq, false); +} + +SEC("kprobe/blk_account_io_merge_bio") +int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq) +{ + return trace_start(ctx, rq, true); +} + +SEC("fentry/blk_account_io_done") +int BPF_PROG(blk_account_io_done, struct request *rq) +{ + u64 slot, ts = bpf_ktime_get_ns(); + struct internal_rqinfo *i_rqinfop; + struct rqinfo *rqinfop; + struct hist *histp; + s64 delta; + + i_rqinfop = bpf_map_lookup_elem(&rqinfos, &rq); + if (!i_rqinfop) + return 0; + delta = (s64)(ts - i_rqinfop->start_ts); + if (delta < 0) + goto cleanup; + histp = bpf_map_lookup_or_try_init(&hists, &i_rqinfop->rqinfo, &zero); + if (!histp) + goto cleanup; + if (targ_ms) + delta /= 1000000; + else + delta /= 1000; + slot = log2l(delta); + if (slot >= MAX_SLOTS) + slot = MAX_SLOTS - 1; + __sync_fetch_and_add(&histp->slots[slot], 1); + +cleanup: + bpf_map_delete_elem(&rqinfos, &rq); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/libbpf-tools/biostacks.c b/libbpf-tools/biostacks.c new file mode 100644 index 000000000000..ed788944c074 --- /dev/null +++ b/libbpf-tools/biostacks.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2020 Wenbo Zhang +// +// Based on biostacks(8) from BPF-Perf-Tools-Book by Brendan Gregg. +// 10-Aug-2020 Wenbo Zhang Created this. +#include +#include +#include +#include +#include +#include +#include "biostacks.h" +#include "biostacks.skel.h" +#include "trace_helpers.h" + +static struct env { + char *disk; + int duration; + bool milliseconds; + bool verbose; +} env = { + .duration = -1, +}; + +const char *argp_program_version = "biostacks 0.1"; +const char *argp_program_bug_address = ""; +const char argp_program_doc[] = +"Tracing block I/O with init stacks.\n" +"\n" +"USAGE: biostacks [--help] [-d disk] [duration]\n" +"\n" +"EXAMPLES:\n" +" biostacks # trace block I/O with init stacks.\n" +" biostacks 1 # trace for 1 seconds only\n" +" biostacks -d sdc # trace sdc only\n"; + +static const struct argp_option opts[] = { + { "disk", 'd', "DISK", 0, "Trace this disk only" }, + { "milliseconds", 'm', NULL, 0, "Millisecond histogram" }, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + static int pos_args; + + switch (key) { + case 'v': + env.verbose = true; + break; + case 'h': + argp_usage(state); + break; + case 'd': + env.disk = arg; + if (strlen(arg) + 1 > DISK_NAME_LEN) { + fprintf(stderr, "invaild disk name: too long\n"); + argp_usage(state); + } + break; + case 'm': + env.milliseconds = true; + break; + case ARGP_KEY_ARG: + if (pos_args++) { + fprintf(stderr, + "unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + errno = 0; + env.duration = strtoll(arg, NULL, 10); + if (errno || env.duration <= 0) { + fprintf(stderr, "invalid delay (in us): %s\n", arg); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static void sig_handler(int sig) +{ +} + +static +void print_map(struct ksyms *ksyms, struct partitions *partitions, int fd) +{ + char *units = env.milliseconds ? "msecs" : "usecs"; + struct rqinfo lookup_key = {}, next_key; + const struct partition *partition; + const struct ksym *ksym; + int num_stack, i, err; + struct hist hist; + + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + err = bpf_map_lookup_elem(fd, &next_key, &hist); + if (err < 0) { + fprintf(stderr, "failed to lookup hist: %d\n", err); + return; + } + partition = partitions__get_by_dev(partitions, next_key.dev); + printf("%-14.14s %-6d %-7s\n", + next_key.comm, next_key.pid, + partition ? partition->name : "Unknown"); + num_stack = next_key.kern_stack_size / + sizeof(next_key.kern_stack[0]); + for (i = 0; i < num_stack; i++) { + ksym = ksyms__map_addr(ksyms, next_key.kern_stack[i]); + printf("%s\n", ksym ? ksym->name : "Unknown"); + } + print_log2_hist(hist.slots, MAX_SLOTS, units); + printf("\n"); + lookup_key = next_key; + } + + return; +} + +int main(int argc, char **argv) +{ + struct partitions *partitions = NULL; + const struct partition *partition; + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + struct ksyms *ksyms = NULL; + struct biostacks_bpf *obj; + int err; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + libbpf_set_print(libbpf_print_fn); + + err = bump_memlock_rlimit(); + if (err) { + fprintf(stderr, "failed to increase rlimit: %d\n", err); + return 1; + } + + obj = biostacks_bpf__open(); + if (!obj) { + fprintf(stderr, "failed to open and/or load BPF ojbect\n"); + return 1; + } + + partitions = partitions__load(); + if (!partitions) { + fprintf(stderr, "failed to load partitions info\n"); + goto cleanup; + } + + /* initialize global data (filtering options) */ + if (env.disk) { + partition = partitions__get_by_name(partitions, env.disk); + if (!partition) { + fprintf(stderr, "invaild partition name: not exit\n"); + goto cleanup; + } + obj->rodata->targ_dev = partition->dev; + } + + obj->rodata->targ_ms = env.milliseconds; + + err = biostacks_bpf__load(obj); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + obj->links.blk_account_io_start = + bpf_program__attach(obj->progs.blk_account_io_start); + err = libbpf_get_error(obj->links.blk_account_io_start); + if (err) { + fprintf(stderr, "failed to attach blk_account_io_start: %s\n", + strerror(err)); + goto cleanup; + } + ksyms = ksyms__load(); + if (!ksyms) { + fprintf(stderr, "failed to load kallsyms\n"); + goto cleanup; + } + if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) { + obj->links.blk_account_io_merge_bio = + bpf_program__attach(obj-> + progs.blk_account_io_merge_bio); + err = libbpf_get_error(obj-> + links.blk_account_io_merge_bio); + if (err) { + fprintf(stderr, "failed to attach " + "blk_account_io_merge_bio: %s\n", + strerror(err)); + goto cleanup; + } + } + obj->links.blk_account_io_done = + bpf_program__attach(obj->progs.blk_account_io_done); + err = libbpf_get_error(obj->links.blk_account_io_done); + if (err) { + fprintf(stderr, "failed to attach blk_account_io_done: %s\n", + strerror(err)); + goto cleanup; + } + + signal(SIGINT, sig_handler); + + printf("Tracing block I/O with init stacks. Hit Ctrl-C to end.\n"); + sleep(env.duration); + print_map(ksyms, partitions, bpf_map__fd(obj->maps.hists)); + +cleanup: + biostacks_bpf__destroy(obj); + ksyms__free(ksyms); + partitions__free(partitions); + + return err != 0; +} diff --git a/libbpf-tools/biostacks.h b/libbpf-tools/biostacks.h new file mode 100644 index 000000000000..fdb5999ef4ef --- /dev/null +++ b/libbpf-tools/biostacks.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BIOSTACKS_H +#define __BIOSTACKS_H + +#define DISK_NAME_LEN 32 +#define TASK_COMM_LEN 16 +#define MAX_SLOTS 20 +#define MAX_STACK 20 + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + +struct rqinfo { + __u32 pid; + int kern_stack_size; + __u64 kern_stack[MAX_STACK]; + char comm[TASK_COMM_LEN]; + __u32 dev; +}; + +struct hist { + __u32 slots[MAX_SLOTS]; +}; + +#endif /* __BIOSTACKS_H */ diff --git a/libbpf-tools/trace_helpers.c b/libbpf-tools/trace_helpers.c index a1bbf81e0ef3..9452d208b6bf 100644 --- a/libbpf-tools/trace_helpers.c +++ b/libbpf-tools/trace_helpers.c @@ -13,6 +13,13 @@ (void) (&_min1 == &_min2); \ _min1 < _min2 ? _min1 : _min2; }) +#define DISK_NAME_LEN 32 + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + struct ksyms { struct ksym *syms; int syms_sz; @@ -159,6 +166,107 @@ const struct ksym *ksyms__get_symbol(const struct ksyms *ksyms, return NULL; } +struct partitions { + struct partition *items; + int sz; +}; + +static int partitions__add_partition(struct partitions *partitions, + const char *name, unsigned int dev) +{ + struct partition *partition; + void *tmp; + + tmp = realloc(partitions->items, (partitions->sz + 1) * + sizeof(*partitions->items)); + if (!tmp) + return -1; + partitions->items = tmp; + partition = &partitions->items[partitions->sz]; + partition->name = strdup(name); + partition->dev = dev; + partitions->sz++; + + return 0; +} + +struct partitions *partitions__load(void) +{ + char part_name[DISK_NAME_LEN]; + unsigned int devmaj, devmin; + unsigned long long nop; + struct partitions *partitions; + char buf[64]; + FILE *f; + + f = fopen("/proc/partitions", "r"); + if (!f) + return NULL; + + partitions = calloc(1, sizeof(*partitions)); + if (!partitions) + goto err_out; + + while (fgets(buf, sizeof(buf), f) != NULL) { + /* skip heading */ + if (buf[0] != ' ' || buf[0] == '\n') + continue; + if (sscanf(buf, "%u %u %llu %s", &devmaj, &devmin, &nop, + part_name) != 4) + goto err_out; + if (partitions__add_partition(partitions, part_name, + MKDEV(devmaj, devmin))) + goto err_out; + } + + fclose(f); + return partitions; + +err_out: + partitions__free(partitions); + fclose(f); + return NULL; +} + +void partitions__free(struct partitions *partitions) +{ + int i; + + if (!partitions) + return; + + for (i = 0; i < partitions->sz; i++) + free(partitions->items[i].name); + free(partitions->items); + free(partitions); +} + +const struct partition * +partitions__get_by_dev(const struct partitions *partitions, unsigned int dev) +{ + int i; + + for (i = 0; i < partitions->sz; i++) { + if (partitions->items[i].dev == dev) + return &partitions->items[i]; + } + + return NULL; +} + +const struct partition * +partitions__get_by_name(const struct partitions *partitions, const char *name) +{ + int i; + + for (i = 0; i < partitions->sz; i++) { + if (strcmp(partitions->items[i].name, name) == 0) + return &partitions->items[i]; + } + + return NULL; +} + static void print_stars(unsigned int val, unsigned int val_max, int width) { int num_stars, num_spaces, i; diff --git a/libbpf-tools/trace_helpers.h b/libbpf-tools/trace_helpers.h index 8d9510441ba3..eba6bc1c532c 100644 --- a/libbpf-tools/trace_helpers.h +++ b/libbpf-tools/trace_helpers.h @@ -18,6 +18,20 @@ const struct ksym *ksyms__map_addr(const struct ksyms *ksyms, const struct ksym *ksyms__get_symbol(const struct ksyms *ksyms, const char *name); +struct partition { + char *name; + unsigned int dev; +}; + +struct partitions; + +struct partitions *partitions__load(void); +void partitions__free(struct partitions *partitions); +const struct partition * +partitions__get_by_dev(const struct partitions *partitions, unsigned int dev); +const struct partition * +partitions__get_by_name(const struct partitions *partitions, const char *name); + void print_log2_hist(unsigned int *vals, int vals_size, char *val_type); unsigned long long get_ktime_ns(void);