From 69ceaca09a992a59dced4130a1321335b64c2ea2 Mon Sep 17 00:00:00 2001 From: Sasha Goldshtein Date: Sat, 8 Apr 2017 12:35:09 +0300 Subject: [PATCH] softirqs: Migrate to kernel tracepoints instead of kprobes (#1091) This commit migrates softirqs to use kernel tracepoints instead of kprobes. Because tracepoints only provide the vector number and not the function name, we use a conversion table, which is borrowed from kernel/softirq.c, to translate the vector number to a display name. This table is expected to be fairly stable. Notably, new names have not been added since approximately 2009, and the last rename (without adding or removing a name) was in 2014. Resolves #1031. --- man/man8/softirqs.8 | 12 +-- tests/python/test_tools_smoke.py | 6 +- tools/old/softirqs.py | 146 +++++++++++++++++++++++++++++++ tools/softirqs.py | 68 +++++++------- 4 files changed, 190 insertions(+), 42 deletions(-) create mode 100755 tools/old/softirqs.py diff --git a/man/man8/softirqs.8 b/man/man8/softirqs.8 index 218d92815e78..a9a14414c237 100644 --- a/man/man8/softirqs.8 +++ b/man/man8/softirqs.8 @@ -9,10 +9,10 @@ show this time as either totals or histogram distributions. A system-wide summary of this time is shown by the %soft column of mpstat(1), and soft IRQ event counts (but not times) are available in /proc/softirqs. -WARNING: This currently uses dynamic tracing of various soft interrupt -functions, and can easily not work with different kernel versions. Check and -adjust the code as necessary. Also try in a test environment and ensure this -tool is safe before use. Future versions should switch to tracepoints. +This tool uses the irq:softirq_enter and irq:softirq_exit kernel tracepoints, +which is a stable tracing mechanism. BPF programs can attach to tracepoints +from Linux 4.7 only. An older version of this tool is available in tools/old, +and uses kprobes instead of tracepoints. Since this uses BPF, only the root user can use this tool. .SH REQUIREMENTS @@ -87,7 +87,7 @@ example usage, output, and commentary for this tool. Linux .SH STABILITY Unstable - in development. -.SH AUTHOR -Brendan Gregg +.SH AUTHORS +Brendan Gregg, Sasha Goldshtein .SH SEE ALSO hardirqs(8) diff --git a/tests/python/test_tools_smoke.py b/tests/python/test_tools_smoke.py index dce0daab43fd..e437354a07de 100755 --- a/tests/python/test_tools_smoke.py +++ b/tests/python/test_tools_smoke.py @@ -242,11 +242,9 @@ def test_runqlen(self): def test_slabratetop(self): self.run_with_duration("slabratetop.py 1 1") + @skipUnless(kernel_version_ge(4,7), "requires kernel >= 4.7") def test_softirqs(self): - # TODO Temporary disabled as softirqs.py doesn't work on recent - # kernels (can't find some of its attach targets). Need to revisit - # it to use the softirq tracepoints. Tracked in bcc#1031. - # self.run_with_duration("softirqs.py 1 1") + self.run_with_duration("softirqs.py 1 1") pass @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4") diff --git a/tools/old/softirqs.py b/tools/old/softirqs.py new file mode 100755 index 000000000000..0b2e50c7b72b --- /dev/null +++ b/tools/old/softirqs.py @@ -0,0 +1,146 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# softirqs Summarize soft IRQ (interrupt) event time. +# For Linux, uses BCC, eBPF. +# +# USAGE: softirqs [-h] [-T] [-N] [-d] [interval] [count] +# +# Copyright (c) 2015 Brendan Gregg. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 20-Oct-2015 Brendan Gregg Created this. + +from __future__ import print_function +from bcc import BPF +from time import sleep, strftime +import argparse + +# arguments +examples = """examples: + ./softirqs # sum soft irq event time + ./softirqs -d # show soft irq event time as histograms + ./softirqs 1 10 # print 1 second summaries, 10 times + ./softirqs -NT 1 # 1s summaries, nanoseconds, and timestamps +""" +parser = argparse.ArgumentParser( + description="Summarize soft irq event time as histograms", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-T", "--timestamp", action="store_true", + help="include timestamp on output") +parser.add_argument("-N", "--nanoseconds", action="store_true", + help="output in nanoseconds") +parser.add_argument("-d", "--dist", action="store_true", + help="show distributions as histograms") +parser.add_argument("interval", nargs="?", default=99999999, + help="output interval, in seconds") +parser.add_argument("count", nargs="?", default=99999999, + help="number of outputs") +args = parser.parse_args() +countdown = int(args.count) +if args.nanoseconds: + factor = 1 + label = "nsecs" +else: + factor = 1000 + label = "usecs" +debug = 0 + +# define BPF program +bpf_text = """ +#include + +typedef struct irq_key { + u64 ip; + u64 slot; +} irq_key_t; +BPF_HASH(start, u32); +BPF_HASH(iptr, u32); +BPF_HISTOGRAM(dist, irq_key_t); + +// time IRQ +int trace_start(struct pt_regs *ctx) +{ + u32 pid = bpf_get_current_pid_tgid(); + u64 ip = PT_REGS_IP(ctx), ts = bpf_ktime_get_ns(); + start.update(&pid, &ts); + iptr.update(&pid, &ip); + return 0; +} + +int trace_completion(struct pt_regs *ctx) +{ + u64 *tsp, delta, ip, *ipp; + u32 pid = bpf_get_current_pid_tgid(); + + // fetch timestamp and calculate delta + tsp = start.lookup(&pid); + ipp = iptr.lookup(&pid); + if (tsp == 0 || ipp == 0) { + return 0; // missed start + } + delta = bpf_ktime_get_ns() - *tsp; + ip = *ipp; + + // store as sum or histogram + STORE + + start.delete(&pid); + iptr.delete(&pid); + return 0; +} +""" + +# code substitutions +if args.dist: + bpf_text = bpf_text.replace('STORE', + 'irq_key_t key = {.ip = ip, .slot = bpf_log2l(delta)};' + + 'dist.increment(key);') +else: + bpf_text = bpf_text.replace('STORE', + 'irq_key_t key = {.ip = ip, .slot = 0 /* ignore */};' + + 'u64 zero = 0, *vp = dist.lookup_or_init(&key, &zero);' + + '(*vp) += delta;') +if debug: + print(bpf_text) + +# load BPF program +b = BPF(text=bpf_text) + +# this should really use irq:softirq_entry/exit tracepoints; for now the +# soft irq functions are individually traced (search your kernel for +# open_softirq() calls, and adjust the following list as needed). +for softirqfunc in ("blk_iopoll_softirq", "blk_done_softirq", + "rcu_process_callbacks", "run_rebalance_domains", "tasklet_action", + "tasklet_hi_action", "run_timer_softirq", "net_tx_action", + "net_rx_action"): + b.attach_kprobe(event=softirqfunc, fn_name="trace_start") + b.attach_kretprobe(event=softirqfunc, fn_name="trace_completion") + +print("Tracing soft irq event time... Hit Ctrl-C to end.") + +# output +exiting = 0 if args.interval else 1 +dist = b.get_table("dist") +while (1): + try: + sleep(int(args.interval)) + except KeyboardInterrupt: + exiting = 1 + + print() + if args.timestamp: + print("%-8s\n" % strftime("%H:%M:%S"), end="") + + if args.dist: + dist.print_log2_hist(label, "softirq", section_print_fn=b.ksym) + else: + print("%-26s %11s" % ("SOFTIRQ", "TOTAL_" + label)) + for k, v in sorted(dist.items(), key=lambda dist: dist[1].value): + print("%-26s %11d" % (b.ksym(k.ip), v.value / factor)) + dist.clear() + + countdown -= 1 + if exiting or countdown == 0: + exit() diff --git a/tools/softirqs.py b/tools/softirqs.py index 0b2e50c7b72b..94413329fde7 100755 --- a/tools/softirqs.py +++ b/tools/softirqs.py @@ -9,7 +9,8 @@ # Copyright (c) 2015 Brendan Gregg. # Licensed under the Apache License, Version 2.0 (the "License") # -# 20-Oct-2015 Brendan Gregg Created this. +# 20-Oct-2015 Brendan Gregg Created this. +# 03-Apr-2017 Sasha Goldshtein Migrated to kernel tracepoints. from __future__ import print_function from bcc import BPF @@ -24,7 +25,7 @@ ./softirqs -NT 1 # 1s summaries, nanoseconds, and timestamps """ parser = argparse.ArgumentParser( - description="Summarize soft irq event time as histograms", + description="Summarize soft irq event time as histograms.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples) parser.add_argument("-T", "--timestamp", action="store_true", @@ -52,42 +53,49 @@ #include typedef struct irq_key { - u64 ip; + u32 vec; u64 slot; } irq_key_t; -BPF_HASH(start, u32); + +typedef struct account_val { + u64 ts; + u32 vec; +} account_val_t; + +BPF_HASH(start, u32, account_val_t); BPF_HASH(iptr, u32); BPF_HISTOGRAM(dist, irq_key_t); -// time IRQ -int trace_start(struct pt_regs *ctx) +TRACEPOINT_PROBE(irq, softirq_entry) { u32 pid = bpf_get_current_pid_tgid(); - u64 ip = PT_REGS_IP(ctx), ts = bpf_ktime_get_ns(); - start.update(&pid, &ts); - iptr.update(&pid, &ip); + account_val_t val = {}; + val.ts = bpf_ktime_get_ns(); + val.vec = args->vec; + start.update(&pid, &val); return 0; } -int trace_completion(struct pt_regs *ctx) +TRACEPOINT_PROBE(irq, softirq_exit) { - u64 *tsp, delta, ip, *ipp; + u64 delta; + u32 vec; u32 pid = bpf_get_current_pid_tgid(); + account_val_t *valp; + irq_key_t key = {0}; // fetch timestamp and calculate delta - tsp = start.lookup(&pid); - ipp = iptr.lookup(&pid); - if (tsp == 0 || ipp == 0) { + valp = start.lookup(&pid); + if (valp == 0) { return 0; // missed start } - delta = bpf_ktime_get_ns() - *tsp; - ip = *ipp; + delta = bpf_ktime_get_ns() - valp->ts; + vec = valp->vec; // store as sum or histogram STORE start.delete(&pid); - iptr.delete(&pid); return 0; } """ @@ -95,12 +103,12 @@ # code substitutions if args.dist: bpf_text = bpf_text.replace('STORE', - 'irq_key_t key = {.ip = ip, .slot = bpf_log2l(delta)};' + + 'key.vec = vec; key.slot = bpf_log2l(delta); ' + 'dist.increment(key);') else: bpf_text = bpf_text.replace('STORE', - 'irq_key_t key = {.ip = ip, .slot = 0 /* ignore */};' + - 'u64 zero = 0, *vp = dist.lookup_or_init(&key, &zero);' + + 'key.vec = valp->vec; ' + + 'u64 zero = 0, *vp = dist.lookup_or_init(&key, &zero); ' + '(*vp) += delta;') if debug: print(bpf_text) @@ -108,15 +116,11 @@ # load BPF program b = BPF(text=bpf_text) -# this should really use irq:softirq_entry/exit tracepoints; for now the -# soft irq functions are individually traced (search your kernel for -# open_softirq() calls, and adjust the following list as needed). -for softirqfunc in ("blk_iopoll_softirq", "blk_done_softirq", - "rcu_process_callbacks", "run_rebalance_domains", "tasklet_action", - "tasklet_hi_action", "run_timer_softirq", "net_tx_action", - "net_rx_action"): - b.attach_kprobe(event=softirqfunc, fn_name="trace_start") - b.attach_kretprobe(event=softirqfunc, fn_name="trace_completion") +def vec_to_name(vec): + # copied from softirq_to_name() in kernel/softirq.c + # may need updates if new softirq handlers are added + return ["hi", "timer", "net_tx", "net_rx", "block", "irq_poll", + "tasklet", "sched", "hrtimer", "rcu"][vec] print("Tracing soft irq event time... Hit Ctrl-C to end.") @@ -134,11 +138,11 @@ print("%-8s\n" % strftime("%H:%M:%S"), end="") if args.dist: - dist.print_log2_hist(label, "softirq", section_print_fn=b.ksym) + dist.print_log2_hist(label, "softirq", section_print_fn=vec_to_name) else: - print("%-26s %11s" % ("SOFTIRQ", "TOTAL_" + label)) + print("%-16s %11s" % ("SOFTIRQ", "TOTAL_" + label)) for k, v in sorted(dist.items(), key=lambda dist: dist[1].value): - print("%-26s %11d" % (b.ksym(k.ip), v.value / factor)) + print("%-16s %11d" % (vec_to_name(k.vec), v.value / factor)) dist.clear() countdown -= 1