From ac00ac5d7fec86524ff815360d0cda2ec583cf24 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Nov 2019 12:45:59 +0100 Subject: [PATCH] Add lockstat tool Adding lockstat tool to trace kernel mutex lock events and display locks statistics and displays following data: Caller Avg Spin Count Max spin Total spin psi_avgs_work+0x2e 3675 5 5468 18379 flush_to_ldisc+0x22 2833 2 4210 5667 n_tty_write+0x30c 3914 1 3914 3914 isig+0x5d 2390 1 2390 2390 tty_buffer_flush+0x2a 1604 1 1604 1604 commit_echoes+0x22 1400 1 1400 1400 n_tty_receive_buf_common+0x3b9 1399 1 1399 1399 Caller Avg Hold Count Max hold Total hold flush_to_ldisc+0x22 42558 2 76135 85116 psi_avgs_work+0x2e 14821 5 20446 74106 n_tty_receive_buf_common+0x3b9 12300 1 12300 12300 n_tty_write+0x30c 10712 1 10712 10712 isig+0x5d 3362 1 3362 3362 tty_buffer_flush+0x2a 3078 1 3078 3078 commit_echoes+0x22 3017 1 3017 3017 Every caller to using kernel's mutex is displayed on every line. First portion of lines show the lock acquiring data, showing the amount of time it took to acquired given lock. 'Caller' - symbol acquiring the mutex 'Average Spin' - average time to acquire the mutex 'Count' - number of times mutex was acquired 'Max spin' - maximum time to acquire the mutex 'Total spin' - total time spent in acquiring the mutex Second portion of lines show the lock holding data, showing the amount of time it took to hold given lock. 'Caller' - symbol holding the mutex 'Average Hold' - average time mutex was held 'Count' - number of times mutex was held 'Max hold' - maximum time mutex was held 'Total hold' - total time spent in holding the mutex This works by tracing mutex_lock/unlock kprobes, udating the lock stats in maps and processing them in the python part. Examples: lockstats # trace system wide lockstats -d 5 # trace for 5 seconds only lockstats -i 5 # display stats every 5 seconds lockstats -p 123 # trace locks for PID 123 lockstats -t 321 # trace locks for PID 321 lockstats -c pipe_ # display stats only for lock callers with 'pipe_' substring lockstats -S acq_count # sort lock acquired results on acquired count lockstats -S hld_total # sort lock held results on total held time lockstats -S acq_count,hld_total # combination of above lockstats -n 3 # display 3 locks lockstats -s 3 # display 3 levels of stack Signed-off-by: Jiri Olsa --- README.md | 1 + man/man8/lockstat.8 | 190 +++++++++++++ snapcraft/snapcraft.yaml | 2 + tests/python/test_tools_smoke.py | 4 + tools/lockstat.py | 452 +++++++++++++++++++++++++++++++ tools/lockstat_example.txt | 167 ++++++++++++ tools/offwaketime.py | 6 +- 7 files changed, 819 insertions(+), 3 deletions(-) create mode 100644 man/man8/lockstat.8 create mode 100755 tools/lockstat.py create mode 100644 tools/lockstat_example.txt diff --git a/README.md b/README.md index 66e167dae01e..8ff346413253 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,7 @@ pair of .c and .py files, and some are directories of files. - tools/[hardirqs](tools/hardirqs.py): Measure hard IRQ (hard interrupt) event time. [Examples](tools/hardirqs_example.txt). - tools/[inject](tools/inject.py): Targeted error injection with call chain and predicates [Examples](tools/inject_example.txt). - tools/[killsnoop](tools/killsnoop.py): Trace signals issued by the kill() syscall. [Examples](tools/killsnoop_example.txt). +- tools/[lockstat](tools/lockstat.py): Traces kernel mutex lock events and display locks statistics. [Examples](tools/lockstat_example.txt). - tools/[llcstat](tools/llcstat.py): Summarize CPU cache references and misses by process. [Examples](tools/llcstat_example.txt). - tools/[mdflush](tools/mdflush.py): Trace md flush events. [Examples](tools/mdflush_example.txt). - tools/[memleak](tools/memleak.py): Display outstanding memory allocations to find memory leaks. [Examples](tools/memleak_example.txt). diff --git a/man/man8/lockstat.8 b/man/man8/lockstat.8 new file mode 100644 index 000000000000..6d871f147b9c --- /dev/null +++ b/man/man8/lockstat.8 @@ -0,0 +1,190 @@ +.TH lockstat 8 "2019-10-22" "USER COMMANDS" +.SH NAME +lockstat \- Traces kernel mutex lock events and display locks statistics. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B lockstat [\-h] [\-i] [\-n] [\-s] [\-c] [\-S FIELDS] [\-p] [\-t] [\-d DURATION] +.SH DESCRIPTION +lockstats traces kernel mutex lock events and display locks statistics +and displays following data: + + Caller Avg Spin Count Max spin Total spin + psi_avgs_work+0x2e 3675 5 5468 18379 + flush_to_ldisc+0x22 2833 2 4210 5667 + n_tty_write+0x30c 3914 1 3914 3914 + isig+0x5d 2390 1 2390 2390 + tty_buffer_flush+0x2a 1604 1 1604 1604 + commit_echoes+0x22 1400 1 1400 1400 + n_tty_receive_buf_common+0x3b9 1399 1 1399 1399 + + Caller Avg Hold Count Max hold Total hold + flush_to_ldisc+0x22 42558 2 76135 85116 + psi_avgs_work+0x2e 14821 5 20446 74106 + n_tty_receive_buf_common+0x3b9 12300 1 12300 12300 + n_tty_write+0x30c 10712 1 10712 10712 + isig+0x5d 3362 1 3362 3362 + tty_buffer_flush+0x2a 3078 1 3078 3078 + commit_echoes+0x22 3017 1 3017 3017 + + +Every caller to using kernel's mutex is displayed on every line. + +First portion of lines show the lock acquiring data, showing the +amount of time it took to acquired given lock. + + 'Caller' - symbol acquiring the mutex + 'Average Spin' - average time to acquire the mutex + 'Count' - number of times mutex was acquired + 'Max spin' - maximum time to acquire the mutex + 'Total spin' - total time spent in acquiring the mutex + +Second portion of lines show the lock holding data, showing the +amount of time it took to hold given lock. + + 'Caller' - symbol holding the mutex + 'Average Hold' - average time mutex was held + 'Count' - number of times mutex was held + 'Max hold' - maximum time mutex was held + 'Total hold' - total time spent in holding the mutex + +This works by tracing mutex_lock/unlock kprobes, updating the +lock stats in maps and processing them in the python part. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH OPTIONS +.TP +\-h +Print usage message. +.TP +\-i SEC +print summary at this interval (seconds) +.TP +\-c CALLER +print locks taken by given caller +.TP +\-S FIELDS +sort data on specific columns defined by FIELDS string (by default the data is sorted by Max columns) + +FIELDS string contains 1 or 2 fields describing columns to sort on +for both acquiring and holding data. Following fields are available: + + acq_max for 'Max spin' column + acq_total for 'Total spin' column + acq_count for 'Count' column + + hld_max for 'Max hold' column + hld_total for 'Total hold' column + hld_count for 'Count' column + +See EXAMPLES. + +.TP +\-n COUNT +print COUNT number of locks +.TP +\-s COUNT +print COUNT number of stack entries +.TP +\-p PID +Trace this process ID only (filtered in-kernel). +.TP +\-t TID +Trace this thread ID only (filtered in-kernel). +.TP +\-d DURATION +Total duration of trace in seconds. +.TP +\-\-stack-storage-size STACK_STORAGE_SIZE +Change the number of unique stack traces that can be stored and displayed. +.SH EXAMPLES +.TP +Sort lock acquired results on acquired count: +# +.B lockstats -S acq_count + +.TP +Sort lock held results on total held time: +# +.B lockstats -S hld_total + +.TP +Combination of above: +# +.B lockstats -S acq_count,hld_total + +.TP +Trace system wide: +# +.B lockstats + +.TP +Trace for 5 seconds only: +# +.B lockstats -d 5 + +.TP +Display stats every 5 seconds +# +.B lockstats -i 5 + +.TP +Trace locks for PID 123: +# +.B lockstats -p 123 + +.TP +Trace locks for PID 321: +# +.B lockstats -t 321 + +.TP +Display stats only for lock callers with 'pipe_' substring +# +.B lockstats -c pipe_ + +.TP +Display 3 locks: +# +.B lockstats -n 3 + +.TP +Display 10 levels of stack for the most expensive lock: +# +.B lockstats -n 1 -s 10 + +Tracing lock events... Hit Ctrl-C to end. +^C + Caller Avg Spin Count Max spin Total spin + pipe_wait+0xa9 670 397691 17273 266775437 + pipe_wait+0xa9 + pipe_read+0x206 + new_sync_read+0x12a + vfs_read+0x9d + ksys_read+0x5f + do_syscall_64+0x5b + entry_SYSCALL_64_after_hwframe+0x44 + + Caller Avg Hold Count Max hold Total hold + flush_to_ldisc+0x22 28381 3 65484 85144 + flush_to_ldisc+0x22 + process_one_work+0x1b0 + worker_thread+0x50 + kthread+0xfb + ret_from_fork+0x35 + +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH CREDITS +This tool is based on work of David Valin and his script. +.SH AUTHOR +Jiri Olsa diff --git a/snapcraft/snapcraft.yaml b/snapcraft/snapcraft.yaml index ee815b9572d7..09d71564a525 100644 --- a/snapcraft/snapcraft.yaml +++ b/snapcraft/snapcraft.yaml @@ -151,6 +151,8 @@ apps: command: bcc-wrapper javathreads killsnoop: command: bcc-wrapper killsnoop + lockstat: + command: bcc-wrapper lockstat llcstat: command: bcc-wrapper llcstat mdflush: diff --git a/tests/python/test_tools_smoke.py b/tests/python/test_tools_smoke.py index e1b10ae2810c..d220004c5449 100755 --- a/tests/python/test_tools_smoke.py +++ b/tests/python/test_tools_smoke.py @@ -199,6 +199,10 @@ def test_killsnoop(self): # a traceback but will not exit. self.run_with_int("killsnoop.py", kill=True) + @skipUnless(kernel_version_ge(4,18), "requires kernel >= 4.18") + def test_lockstat(self): + self.run_with_int("lockstat.py") + @skipUnless(kernel_version_ge(4,9), "requires kernel >= 4.9") def test_llcstat(self): # Requires PMU, which is not available in virtual machines. diff --git a/tools/lockstat.py b/tools/lockstat.py new file mode 100755 index 000000000000..07bff3ee5f38 --- /dev/null +++ b/tools/lockstat.py @@ -0,0 +1,452 @@ +#!/usr/bin/python +# +# lockstats traces lock events and display locks statistics. +# +# USAGE: lockstats +# + +from __future__ import print_function +from bcc import BPF, USDT +import argparse +import subprocess +import ctypes as ct +from time import sleep, strftime +from datetime import datetime, timedelta +import errno +from sys import stderr + +examples = """ + lockstats # trace system wide + lockstats -d 5 # trace for 5 seconds only + lockstats -i 5 # display stats every 5 seconds + lockstats -p 123 # trace locks for PID 123 + lockstats -t 321 # trace locks for PID 321 + lockstats -c pipe_ # display stats only for lock callers with 'pipe_' substring + lockstats -S acq_count # sort lock acquired results on acquired count + lockstats -S hld_total # sort lock held results on total held time + lockstats -S acq_count,hld_total # combination of above + lockstats -n 3 # display 3 locks + lockstats -s 3 # display 3 levels of stack +""" + +# arg validation +def positive_int(val): + try: + ival = int(val) + except ValueError: + raise argparse.ArgumentTypeError("must be an integer") + + if ival < 0: + raise argparse.ArgumentTypeError("must be positive") + return ival + +def positive_nonzero_int(val): + ival = positive_int(val) + if ival == 0: + raise argparse.ArgumentTypeError("must be nonzero") + return ival + +def stack_id_err(stack_id): + # -EFAULT in get_stackid normally means the stack-trace is not availible, + # Such as getting kernel stack trace in userspace code + return (stack_id < 0) and (stack_id != -errno.EFAULT) + +parser = argparse.ArgumentParser( + description="", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) + +time_group = parser.add_mutually_exclusive_group() +time_group.add_argument("-d", "--duration", type=int, + help="total duration of trace in seconds") +time_group.add_argument("-i", "--interval", type=int, + help="print summary at this interval (seconds)") +parser.add_argument("-n", "--locks", type=int, default=999999999, + help="print given number of locks") +parser.add_argument("-s", "--stacks", type=int, default=1, + help="print given number of stack entries") +parser.add_argument("-c", "--caller", + help="print locks taken by given caller") +parser.add_argument("-S", "--sort", + help="sort data on , fields: acq_[max|total|count] hld_[max|total|count]") +parser.add_argument("-p", "--pid", + help="trace this PID only") +parser.add_argument("-t", "--tid", + help="trace this TID only") +parser.add_argument("--stack-storage-size", default=16384, + type=positive_nonzero_int, + help="the number of unique stack traces that can be stored and " + "displayed (default 16384)") + +args = parser.parse_args() + +program = """ +#include + +struct depth_id { + u64 id; + u64 depth; +}; + +BPF_ARRAY(enabled, u64, 1); +BPF_HASH(track, u64, u64); +BPF_HASH(time_aq, u64, u64); +BPF_HASH(lock_depth, u64, u64); +BPF_HASH(time_held, struct depth_id, u64); +BPF_HASH(stack, struct depth_id, int); + +BPF_HASH(aq_report_count, int, u64); +BPF_HASH(aq_report_max, int, u64); +BPF_HASH(aq_report_total, int, u64); + +BPF_HASH(hl_report_count, int, u64); +BPF_HASH(hl_report_max, int, u64); +BPF_HASH(hl_report_total, int, u64); + +BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE); + +static bool is_enabled(void) +{ + int key = 0; + u64 *ret; + + ret = enabled.lookup(&key); + return ret && *ret == 1; +} + +static bool allow_pid(u64 id) +{ + u32 pid = id >> 32; // PID is higher part + u32 tid = id; // Cast and get the lower part + + FILTER + + return 1; +} + +int mutex_lock_enter(struct pt_regs *ctx) +{ + if (!is_enabled()) + return 0; + + u64 id = bpf_get_current_pid_tgid(); + + if (!allow_pid(id)) + return 0; + + u64 one = 1, zero = 0; + + track.update(&id, &one); + + u64 *depth = lock_depth.lookup(&id); + + if (!depth) { + lock_depth.update(&id, &zero); + + depth = lock_depth.lookup(&id); + /* something is wrong.. */ + if (!depth) + return 0; + } + + int stackid = stack_traces.get_stackid(ctx, 0); + struct depth_id did = { + .id = id, + .depth = *depth, + }; + stack.update(&did, &stackid); + + u64 ts = bpf_ktime_get_ns(); + time_aq.update(&id, &ts); + + *depth += 1; + return 0; +} + +static void update_aq_report_count(int *stackid) +{ + u64 *count, one = 1; + + count = aq_report_count.lookup(stackid); + if (!count) { + aq_report_count.update(stackid, &one); + } else { + *count += 1; + } +} + +static void update_hl_report_count(int *stackid) +{ + u64 *count, one = 1; + + count = hl_report_count.lookup(stackid); + if (!count) { + hl_report_count.update(stackid, &one); + } else { + *count += 1; + } +} + +static void update_aq_report_max(int *stackid, u64 time) +{ + u64 *max; + + max = aq_report_max.lookup(stackid); + if (!max || *max < time) + aq_report_max.update(stackid, &time); +} + +static void update_hl_report_max(int *stackid, u64 time) +{ + u64 *max; + + max = hl_report_max.lookup(stackid); + if (!max || *max < time) + hl_report_max.update(stackid, &time); +} + +static void update_aq_report_total(int *stackid, u64 delta) +{ + u64 *count, *time; + + count = aq_report_count.lookup(stackid); + if (!count) + return; + + time = aq_report_total.lookup(stackid); + if (!time) { + aq_report_total.update(stackid, &delta); + } else { + *time = *time + delta; + } +} + +static void update_hl_report_total(int *stackid, u64 delta) +{ + u64 *count, *time; + + count = hl_report_count.lookup(stackid); + if (!count) + return; + + time = hl_report_total.lookup(stackid); + if (!time) { + hl_report_total.update(stackid, &delta); + } else { + *time = *time + delta; + } +} + +int mutex_lock_return(struct pt_regs *ctx) +{ + if (!is_enabled()) + return 0; + + u64 id = bpf_get_current_pid_tgid(); + + if (!allow_pid(id)) + return 0; + + u64 *one = track.lookup(&id); + + if (!one) + return 0; + + track.delete(&id); + + u64 *depth = lock_depth.lookup(&id); + if (!depth) + return 0; + + struct depth_id did = { + .id = id, + .depth = *depth - 1, + }; + + u64 *aq = time_aq.lookup(&id); + if (!aq) + return 0; + + int *stackid = stack.lookup(&did); + if (!stackid) + return 0; + + u64 cur = bpf_ktime_get_ns(); + + if (cur > *aq) { + int val = cur - *aq; + update_aq_report_count(stackid); + update_aq_report_max(stackid, val); + update_aq_report_total(stackid, val); + } + + time_held.update(&did, &cur); + return 0; +} + +int mutex_unlock_enter(struct pt_regs *ctx) +{ + if (!is_enabled()) + return 0; + + u64 id = bpf_get_current_pid_tgid(); + + if (!allow_pid(id)) + return 0; + + u64 *depth = lock_depth.lookup(&id); + + if (!depth || *depth == 0) + return 0; + + *depth -= 1; + + struct depth_id did = { + .id = id, + .depth = *depth, + }; + + u64 *held = time_held.lookup(&did); + if (!held) + return 0; + + int *stackid = stack.lookup(&did); + if (!stackid) + return 0; + + u64 cur = bpf_ktime_get_ns(); + + if (cur > *held) { + u64 val = cur - *held; + update_hl_report_count(stackid); + update_hl_report_max(stackid, val); + update_hl_report_total(stackid, val); + } + + stack.delete(&did); + time_held.delete(&did); + return 0; +} + +""" + +def sort_list(maxs, totals, counts): + if (not args.sort): + return maxs; + + for field in args.sort.split(','): + if (field == "acq_max" or field == "hld_max"): + return maxs + if (field == "acq_total" or field == "hld_total"): + return totals + if (field == "acq_count" or field == "hld_count"): + return counts + + print("Wrong sort argument: %s", args.sort) + exit(-1) + +def display(sort, maxs, totals, counts): + global missing_stacks + global has_enomem + + for k, v in sorted(sort.items(), key=lambda sort: sort[1].value, reverse=True)[:args.locks]: + missing_stacks += int(stack_id_err(k.value)) + has_enomem = has_enomem or (k.value == -errno.ENOMEM) + + caller = "[Missed Kernel Stack]" + stack = [] + + if (k.value >= 0): + stack = list(stack_traces.walk(k.value)) + caller = b.ksym(stack[1], show_offset=True) + + if (args.caller and caller.find(args.caller)): + continue + + avg = totals[k].value / counts[k].value + + print("%40s %10lu %6lu %10lu %10lu" % (caller, avg, counts[k].value, maxs[k].value, totals[k].value)) + + for addr in stack[1:args.stacks]: + print("%40s" % b.ksym(addr, show_offset=True)) + + +if args.tid: # TID trumps PID + program = program.replace('FILTER', + 'if (tid != %s) { return 0; }' % args.tid) +elif args.pid: + program = program.replace('FILTER', + 'if (pid != %s) { return 0; }' % args.pid) +else: + program = program.replace('FILTER', '') + +program = program.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) + +b = BPF(text=program) + +b.attach_kprobe(event="mutex_unlock", fn_name="mutex_unlock_enter") +b.attach_kretprobe(event="mutex_lock", fn_name="mutex_lock_return") +b.attach_kprobe(event="mutex_lock", fn_name="mutex_lock_enter") + +enabled = b.get_table("enabled"); + +stack_traces = b.get_table("stack_traces") +aq_counts = b.get_table("aq_report_count") +aq_maxs = b.get_table("aq_report_max") +aq_totals = b.get_table("aq_report_total") + +hl_counts = b.get_table("hl_report_count") +hl_maxs = b.get_table("hl_report_max") +hl_totals = b.get_table("hl_report_total") + +aq_sort = sort_list(aq_maxs, aq_totals, aq_counts) +hl_sort = sort_list(hl_maxs, hl_totals, hl_counts) + +print("Tracing lock events... Hit Ctrl-C to end.") + +# duration and interval are mutualy exclusive +exiting = 0 if args.interval else 1 +exiting = 1 if args.duration else 0 + +seconds = 999999999 +if args.interval: + seconds = args.interval +if args.duration: + seconds = args.duration + +missing_stacks = 0 +has_enomem = False + +while (1): + enabled[ct.c_int(0)] = ct.c_int(1) + + try: + sleep(seconds) + except KeyboardInterrupt: + exiting = 1 + + enabled[ct.c_int(0)] = ct.c_int(0) + + print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Spin", "Count", "Max spin", "Total spin")) + display(aq_sort, aq_maxs, aq_totals, aq_counts) + + + print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Hold", "Count", "Max hold", "Total hold")) + display(hl_sort, hl_maxs, hl_totals, hl_counts) + + if exiting: + break; + + stack_traces.clear() + aq_counts.clear() + aq_maxs.clear() + aq_totals.clear() + hl_counts.clear() + hl_maxs.clear() + hl_totals.clear() + +if missing_stacks > 0: + enomem_str = " Consider increasing --stack-storage-size." + print("WARNING: %d stack traces lost and could not be displayed.%s" % + (missing_stacks, (enomem_str if has_enomem else "")), + file=stderr) diff --git a/tools/lockstat_example.txt b/tools/lockstat_example.txt new file mode 100644 index 000000000000..3f51e31fa095 --- /dev/null +++ b/tools/lockstat_example.txt @@ -0,0 +1,167 @@ +Demonstrations of lockstat, the Linux eBPF/bcc version. + +lockstats traces kernel mutex lock events and display locks statistics + +# lockstat.py +Tracing lock events... Hit Ctrl-C to end. +^C + Caller Avg Spin Count Max spin Total spin + psi_avgs_work+0x2e 3675 5 5468 18379 + flush_to_ldisc+0x22 2833 2 4210 5667 + n_tty_write+0x30c 3914 1 3914 3914 + isig+0x5d 2390 1 2390 2390 + tty_buffer_flush+0x2a 1604 1 1604 1604 + commit_echoes+0x22 1400 1 1400 1400 + n_tty_receive_buf_common+0x3b9 1399 1 1399 1399 + + Caller Avg Hold Count Max hold Total hold + flush_to_ldisc+0x22 42558 2 76135 85116 + psi_avgs_work+0x2e 14821 5 20446 74106 + n_tty_receive_buf_common+0x3b9 12300 1 12300 12300 + n_tty_write+0x30c 10712 1 10712 10712 + isig+0x5d 3362 1 3362 3362 + tty_buffer_flush+0x2a 3078 1 3078 3078 + commit_echoes+0x22 3017 1 3017 3017 + + +Every caller to using kernel's mutex is displayed on every line. + +First portion of lines show the lock acquiring data, showing the +amount of time it took to acquired given lock. + + 'Caller' - symbol acquiring the mutex + 'Average Spin' - average time to acquire the mutex + 'Count' - number of times mutex was acquired + 'Max spin' - maximum time to acquire the mutex + 'Total spin' - total time spent in acquiring the mutex + +Second portion of lines show the lock holding data, showing the +amount of time it took to hold given lock. + + 'Caller' - symbol holding the mutex + 'Average Hold' - average time mutex was held + 'Count' - number of times mutex was held + 'Max hold' - maximum time mutex was held + 'Total hold' - total time spent in holding the mutex + +This works by tracing mutex_lock/unlock kprobes, updating the +lock stats in maps and processing them in the python part. + + +An -i option can be used to display stats in interval (5 seconds in example below): + +# lockstat.py -i 5 +Tracing lock events... Hit Ctrl-C to end. + + Caller Avg Spin Count Max spin Total spin + psi_avgs_work+0x2e 3822 15 5650 57338 + flush_to_ldisc+0x22 4630 1 4630 4630 + work_fn+0x4f 4185 1 4185 4185 + + Caller Avg Hold Count Max hold Total hold + psi_avgs_work+0x2e 12155 15 15739 182329 + flush_to_ldisc+0x22 13809 1 13809 13809 + work_fn+0x4f 5274 1 5274 5274 + + Caller Avg Spin Count Max spin Total spin + psi_avgs_work+0x2e 3715 17 4374 63163 + + Caller Avg Hold Count Max hold Total hold + psi_avgs_work+0x2e 13141 17 19510 223399 +^C + + +A -p option can be used to trace only selected process: + +# lockstat.py -p 883 +Tracing lock events... Hit Ctrl-C to end. +^C + Caller Avg Spin Count Max spin Total spin + pipe_wait+0xa9 625 412686 16930 258277958 + pipe_read+0x3f 420 413425 16872 174017649 + pipe_write+0x35 471 413425 16765 194792253 + + Caller Avg Hold Count Max hold Total hold + pipe_read+0x3f 473 413425 20063 195773647 + pipe_wait+0xa9 604 412686 16972 249598153 + pipe_write+0x35 481 413425 16944 199008064 + + +A -c option can be used to display only callers with specific substring: + +# lockstat.py -c pipe_ +Tracing lock events... Hit Ctrl-C to end. +^C + Caller Avg Spin Count Max spin Total spin + pipe_read+0x3f 422 469554 18665 198354705 + pipe_wait+0xa9 679 469536 17196 319017069 + pipe_write+0x35 469 469554 17057 220338525 + + Caller Avg Hold Count Max hold Total hold + pipe_write+0x35 638 469554 17330 299857180 + pipe_wait+0xa9 779 469535 16912 366047392 + pipe_read+0x3f 575 469554 13251 270005394 + + +An -n option can be used to display only specific number of callers: + +# lockstat.py -n 3 +Tracing lock events... Hit Ctrl-C to end. +^C + Caller Avg Spin Count Max spin Total spin + pipe_read+0x3f 420 334120 16964 140632284 + pipe_wait+0xa9 688 334116 16876 229957062 + pipe_write+0x35 463 334120 16791 154981747 + + Caller Avg Hold Count Max hold Total hold + flush_to_ldisc+0x22 27754 3 63270 83264 + pipe_read+0x3f 571 334120 17123 190976463 + pipe_wait+0xa9 759 334115 17068 253747213 + + +An -s option can be used to display number of callers backtrace entries: + +# lockstat.py -n 1 -s 3 +Tracing lock events... Hit Ctrl-C to end. +^C + Caller Avg Spin Count Max spin Total spin + pipe_wait+0xa9 685 811947 17376 556542328 + pipe_wait+0xa9 + pipe_read+0x206 + + Caller Avg Hold Count Max hold Total hold + flush_to_ldisc+0x22 28145 3 63872 84437 + flush_to_ldisc+0x22 + process_one_work+0x1b0 + + +Output can be sorted by using -S option on various +fields, the acq_total will force the acquired table to be +sorted on 'Total spin' column: + +# lockstat.py -S acq_total +Tracing lock events... Hit Ctrl-C to end. +^C + Caller Avg Spin Count Max spin Total spin + pipe_wait+0xa9 691 269343 17190 186263983 + pipe_write+0x35 464 269351 11730 125205417 + pipe_read+0x3f 422 269351 17107 113724697 + psi_avgs_work+0x2e 2499 11 4454 27494 + flush_to_ldisc+0x22 3111 3 5096 9334 + n_tty_write+0x30c 2764 1 2764 2764 + isig+0x5d 1287 1 1287 1287 + tty_buffer_flush+0x2a 961 1 961 961 + commit_echoes+0x22 892 1 892 892 + n_tty_receive_buf_common+0x3b9 868 1 868 868 + + Caller Avg Hold Count Max hold Total hold + pipe_wait+0xa9 788 269343 17128 212496240 + pipe_write+0x35 637 269351 17209 171596811 + pipe_read+0x3f 585 269351 11834 157606323 + psi_avgs_work+0x2e 8726 11 19177 95996 + flush_to_ldisc+0x22 22158 3 43731 66474 + n_tty_write+0x30c 9770 1 9770 9770 + n_tty_receive_buf_common+0x3b9 6830 1 6830 6830 + isig+0x5d 3114 1 3114 3114 + tty_buffer_flush+0x2a 2032 1 2032 2032 + commit_echoes+0x22 1616 1 1616 1616 diff --git a/tools/offwaketime.py b/tools/offwaketime.py index b46e9e1b4bb0..4809a3b65e1d 100755 --- a/tools/offwaketime.py +++ b/tools/offwaketime.py @@ -140,7 +140,7 @@ def signal_ignore(signal, frame): // of the Process who wakes it BPF_HASH(wokeby, u32, struct wokeby_t); -BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE); +BPF_STACK_TRACE(stack_traces, 2); int waker(struct pt_regs *ctx, struct task_struct *p) { // PID and TGID of the target Process to be waken @@ -321,7 +321,7 @@ def signal_ignore(signal, frame): line = [k.target.decode('utf-8', 'replace')] if not args.kernel_stacks_only: if stack_id_err(k.t_u_stack_id): - line.append("[Missed User Stack]") + line.append("[Missed User Stack] %d" % k.t_u_stack_id) else: line.extend([b.sym(addr, k.t_tgid).decode('utf-8', 'replace') for addr in reversed(list(target_user_stack)[1:])]) @@ -353,7 +353,7 @@ def signal_ignore(signal, frame): print(" %-16s %s %s" % ("waker:", k.waker.decode('utf-8', 'replace'), k.w_pid)) if not args.kernel_stacks_only: if stack_id_err(k.w_u_stack_id): - print(" [Missed User Stack]") + print(" [Missed User Stack] %d" % k.w_u_stack_id) else: for addr in waker_user_stack: print(" %s" % b.sym(addr, k.w_tgid))