From ac00ac5d7fec86524ff815360d0cda2ec583cf24 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 15 Nov 2019 12:45:59 +0100
Subject: [PATCH] Add lockstat tool

Adding lockstat tool to trace kernel mutex lock events and
display locks statistics and displays following data:

                                  Caller   Avg Spin  Count   Max spin Total spin
                      psi_avgs_work+0x2e       3675      5       5468      18379
                     flush_to_ldisc+0x22       2833      2       4210       5667
                       n_tty_write+0x30c       3914      1       3914       3914
                               isig+0x5d       2390      1       2390       2390
                   tty_buffer_flush+0x2a       1604      1       1604       1604
                      commit_echoes+0x22       1400      1       1400       1400
          n_tty_receive_buf_common+0x3b9       1399      1       1399       1399

                                  Caller   Avg Hold  Count   Max hold Total hold
                     flush_to_ldisc+0x22      42558      2      76135      85116
                      psi_avgs_work+0x2e      14821      5      20446      74106
          n_tty_receive_buf_common+0x3b9      12300      1      12300      12300
                       n_tty_write+0x30c      10712      1      10712      10712
                               isig+0x5d       3362      1       3362       3362
                   tty_buffer_flush+0x2a       3078      1       3078       3078
                      commit_echoes+0x22       3017      1       3017       3017

Every caller to using kernel's mutex is displayed on every line.

First portion of lines show the lock acquiring data, showing the amount
of time it took to acquired given lock.

  'Caller'       - symbol acquiring the mutex
  'Average Spin' - average time to acquire the mutex
  'Count'        - number of times mutex was acquired
  'Max spin'     - maximum time to acquire the mutex
  'Total spin'   - total time spent in acquiring the mutex

Second portion of lines show the lock holding data, showing the amount
of time it took to hold given lock.

  'Caller'       - symbol holding the mutex
  'Average Hold' - average time mutex was held
  'Count'        - number of times mutex was held
  'Max hold'     - maximum time mutex was held
  'Total hold'   - total time spent in holding the mutex

This works by tracing mutex_lock/unlock kprobes, udating the lock stats
in maps and processing them in the python part.

Examples:
    lockstats                           # trace system wide
    lockstats -d 5                      # trace for 5 seconds only
    lockstats -i 5                      # display stats every 5 seconds
    lockstats -p 123                    # trace locks for PID 123
    lockstats -t 321                    # trace locks for PID 321
    lockstats -c pipe_                  # display stats only for lock callers with 'pipe_' substring
    lockstats -S acq_count              # sort lock acquired results on acquired count
    lockstats -S hld_total              # sort lock held results on total held time
    lockstats -S acq_count,hld_total    # combination of above
    lockstats -n 3                      # display 3 locks
    lockstats -s 3                      # display 3 levels of stack

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 README.md                        |   1 +
 man/man8/lockstat.8              | 190 +++++++++++++
 snapcraft/snapcraft.yaml         |   2 +
 tests/python/test_tools_smoke.py |   4 +
 tools/lockstat.py                | 452 +++++++++++++++++++++++++++++++
 tools/lockstat_example.txt       | 167 ++++++++++++
 tools/offwaketime.py             |   6 +-
 7 files changed, 819 insertions(+), 3 deletions(-)
 create mode 100644 man/man8/lockstat.8
 create mode 100755 tools/lockstat.py
 create mode 100644 tools/lockstat_example.txt

diff --git a/README.md b/README.md
index 66e167dae01e..8ff346413253 100644
--- a/README.md
+++ b/README.md
@@ -118,6 +118,7 @@ pair of .c and .py files, and some are directories of files.
 - tools/[hardirqs](tools/hardirqs.py):  Measure hard IRQ (hard interrupt) event time. [Examples](tools/hardirqs_example.txt).
 - tools/[inject](tools/inject.py): Targeted error injection with call chain and predicates [Examples](tools/inject_example.txt).
 - tools/[killsnoop](tools/killsnoop.py): Trace signals issued by the kill() syscall. [Examples](tools/killsnoop_example.txt).
+- tools/[lockstat](tools/lockstat.py): Traces kernel mutex lock events and display locks statistics. [Examples](tools/lockstat_example.txt).
 - tools/[llcstat](tools/llcstat.py): Summarize CPU cache references and misses by process. [Examples](tools/llcstat_example.txt).
 - tools/[mdflush](tools/mdflush.py): Trace md flush events. [Examples](tools/mdflush_example.txt).
 - tools/[memleak](tools/memleak.py): Display outstanding memory allocations to find memory leaks. [Examples](tools/memleak_example.txt).
diff --git a/man/man8/lockstat.8 b/man/man8/lockstat.8
new file mode 100644
index 000000000000..6d871f147b9c
--- /dev/null
+++ b/man/man8/lockstat.8
@@ -0,0 +1,190 @@
+.TH lockstat 8  "2019-10-22" "USER COMMANDS"
+.SH NAME
+lockstat \- Traces kernel mutex lock events and display locks statistics. Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B lockstat [\-h] [\-i] [\-n] [\-s] [\-c] [\-S FIELDS] [\-p] [\-t] [\-d DURATION]
+.SH DESCRIPTION
+lockstats traces kernel mutex lock events and display locks statistics
+and displays following data:
+
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                      psi_avgs_work+0x2e       3675      5       5468      18379
+                     flush_to_ldisc+0x22       2833      2       4210       5667
+                       n_tty_write+0x30c       3914      1       3914       3914
+                               isig+0x5d       2390      1       2390       2390
+                   tty_buffer_flush+0x2a       1604      1       1604       1604
+                      commit_echoes+0x22       1400      1       1400       1400
+          n_tty_receive_buf_common+0x3b9       1399      1       1399       1399
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                     flush_to_ldisc+0x22      42558      2      76135      85116
+                      psi_avgs_work+0x2e      14821      5      20446      74106
+          n_tty_receive_buf_common+0x3b9      12300      1      12300      12300
+                       n_tty_write+0x30c      10712      1      10712      10712
+                               isig+0x5d       3362      1       3362       3362
+                   tty_buffer_flush+0x2a       3078      1       3078       3078
+                      commit_echoes+0x22       3017      1       3017       3017
+
+
+Every caller to using kernel's mutex is displayed on every line.
+
+First portion of lines show the lock acquiring data, showing the
+amount of time it took to acquired given lock.
+
+  'Caller'       - symbol acquiring the mutex
+  'Average Spin' - average time to acquire the mutex
+  'Count'        - number of times mutex was acquired
+  'Max spin'     - maximum time to acquire the mutex
+  'Total spin'   - total time spent in acquiring the mutex
+
+Second portion of lines show the lock holding data, showing the
+amount of time it took to hold given lock.
+
+  'Caller'       - symbol holding the mutex
+  'Average Hold' - average time mutex was held
+  'Count'        - number of times mutex was held
+  'Max hold'     - maximum time mutex was held
+  'Total hold'   - total time spent in holding the mutex
+
+This works by tracing mutex_lock/unlock kprobes, updating the
+lock stats in maps and processing them in the python part.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-i SEC
+print summary at this interval (seconds)
+.TP
+\-c CALLER
+print locks taken by given caller
+.TP
+\-S FIELDS
+sort data on specific columns defined by FIELDS string (by default the data is sorted by Max columns)
+
+FIELDS string contains 1 or 2 fields describing columns to sort on
+for both acquiring and holding data. Following fields are available:
+
+  acq_max      for 'Max spin' column
+  acq_total    for 'Total spin' column
+  acq_count    for 'Count' column
+
+  hld_max      for 'Max hold' column
+  hld_total    for 'Total hold' column
+  hld_count    for 'Count' column
+
+See EXAMPLES.
+
+.TP
+\-n COUNT
+print COUNT number of locks
+.TP
+\-s COUNT
+print COUNT number of stack entries
+.TP
+\-p PID
+Trace this process ID only (filtered in-kernel).
+.TP
+\-t TID
+Trace this thread ID only (filtered in-kernel).
+.TP
+\-d DURATION
+Total duration of trace in seconds.
+.TP
+\-\-stack-storage-size STACK_STORAGE_SIZE
+Change the number of unique stack traces that can be stored and displayed.
+.SH EXAMPLES
+.TP
+Sort lock acquired results on acquired count:
+#
+.B lockstats -S acq_count
+
+.TP
+Sort lock held results on total held time:
+#
+.B lockstats -S hld_total
+
+.TP
+Combination of above:
+#
+.B lockstats -S acq_count,hld_total
+
+.TP
+Trace system wide:
+#
+.B lockstats
+
+.TP
+Trace for 5 seconds only:
+#
+.B lockstats -d 5
+
+.TP
+Display stats every 5 seconds
+#
+.B lockstats -i 5
+
+.TP
+Trace locks for PID 123:
+#
+.B lockstats -p 123
+
+.TP
+Trace locks for PID 321:
+#
+.B lockstats -t 321
+
+.TP
+Display stats only for lock callers with 'pipe_' substring
+#
+.B lockstats -c pipe_
+
+.TP
+Display 3 locks:
+#
+.B lockstats -n 3
+
+.TP
+Display 10 levels of stack for the most expensive lock:
+#
+.B lockstats -n 1 -s 10
+
+Tracing lock events... Hit Ctrl-C to end.
+^C
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                          pipe_wait+0xa9        670 397691      17273  266775437
+                          pipe_wait+0xa9
+                         pipe_read+0x206
+                     new_sync_read+0x12a
+                           vfs_read+0x9d
+                          ksys_read+0x5f
+                      do_syscall_64+0x5b
+     entry_SYSCALL_64_after_hwframe+0x44
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                     flush_to_ldisc+0x22      28381      3      65484      85144
+                     flush_to_ldisc+0x22
+                  process_one_work+0x1b0
+                      worker_thread+0x50
+                            kthread+0xfb
+                      ret_from_fork+0x35
+
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH CREDITS
+This tool is based on work of David Valin <dvalin@redhat.com> and his script.
+.SH AUTHOR
+Jiri Olsa
diff --git a/snapcraft/snapcraft.yaml b/snapcraft/snapcraft.yaml
index ee815b9572d7..09d71564a525 100644
--- a/snapcraft/snapcraft.yaml
+++ b/snapcraft/snapcraft.yaml
@@ -151,6 +151,8 @@ apps:
         command: bcc-wrapper javathreads
     killsnoop:
         command: bcc-wrapper killsnoop
+    lockstat:
+        command: bcc-wrapper lockstat
     llcstat:
         command: bcc-wrapper llcstat
     mdflush:
diff --git a/tests/python/test_tools_smoke.py b/tests/python/test_tools_smoke.py
index e1b10ae2810c..d220004c5449 100755
--- a/tests/python/test_tools_smoke.py
+++ b/tests/python/test_tools_smoke.py
@@ -199,6 +199,10 @@ def test_killsnoop(self):
         # a traceback but will not exit.
         self.run_with_int("killsnoop.py", kill=True)
 
+    @skipUnless(kernel_version_ge(4,18), "requires kernel >= 4.18")
+    def test_lockstat(self):
+        self.run_with_int("lockstat.py")
+
     @skipUnless(kernel_version_ge(4,9), "requires kernel >= 4.9")
     def test_llcstat(self):
         # Requires PMU, which is not available in virtual machines.
diff --git a/tools/lockstat.py b/tools/lockstat.py
new file mode 100755
index 000000000000..07bff3ee5f38
--- /dev/null
+++ b/tools/lockstat.py
@@ -0,0 +1,452 @@
+#!/usr/bin/python
+#
+# lockstats traces lock events and display locks statistics.
+#
+# USAGE: lockstats
+#
+
+from __future__ import print_function
+from bcc import BPF, USDT
+import argparse
+import subprocess
+import ctypes as ct
+from time import sleep, strftime
+from datetime import datetime, timedelta
+import errno
+from sys import stderr
+
+examples = """
+    lockstats                           # trace system wide
+    lockstats -d 5                      # trace for 5 seconds only
+    lockstats -i 5                      # display stats every 5 seconds
+    lockstats -p 123                    # trace locks for PID 123
+    lockstats -t 321                    # trace locks for PID 321
+    lockstats -c pipe_                  # display stats only for lock callers with 'pipe_' substring
+    lockstats -S acq_count              # sort lock acquired results on acquired count
+    lockstats -S hld_total              # sort lock held results on total held time
+    lockstats -S acq_count,hld_total    # combination of above
+    lockstats -n 3                      # display 3 locks
+    lockstats -s 3                      # display 3 levels of stack
+"""
+
+# arg validation
+def positive_int(val):
+    try:
+        ival = int(val)
+    except ValueError:
+        raise argparse.ArgumentTypeError("must be an integer")
+
+    if ival < 0:
+        raise argparse.ArgumentTypeError("must be positive")
+    return ival
+
+def positive_nonzero_int(val):
+    ival = positive_int(val)
+    if ival == 0:
+        raise argparse.ArgumentTypeError("must be nonzero")
+    return ival
+
+def stack_id_err(stack_id):
+    # -EFAULT in get_stackid normally means the stack-trace is not availible,
+    # Such as getting kernel stack trace in userspace code
+    return (stack_id < 0) and (stack_id != -errno.EFAULT)
+
+parser = argparse.ArgumentParser(
+    description="",
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    epilog=examples)
+
+time_group = parser.add_mutually_exclusive_group()
+time_group.add_argument("-d", "--duration", type=int,
+    help="total duration of trace in seconds")
+time_group.add_argument("-i", "--interval", type=int,
+    help="print summary at this interval (seconds)")
+parser.add_argument("-n", "--locks", type=int, default=999999999,
+    help="print given number of locks")
+parser.add_argument("-s", "--stacks", type=int, default=1,
+    help="print given number of stack entries")
+parser.add_argument("-c", "--caller",
+    help="print locks taken by given caller")
+parser.add_argument("-S", "--sort",
+    help="sort data on <aq_field,hd_field>, fields: acq_[max|total|count] hld_[max|total|count]")
+parser.add_argument("-p", "--pid",
+    help="trace this PID only")
+parser.add_argument("-t", "--tid",
+    help="trace this TID only")
+parser.add_argument("--stack-storage-size", default=16384,
+    type=positive_nonzero_int,
+    help="the number of unique stack traces that can be stored and "
+         "displayed (default 16384)")
+
+args = parser.parse_args()
+
+program = """
+#include <uapi/linux/ptrace.h>
+
+struct depth_id {
+  u64 id;
+  u64 depth;
+};
+
+BPF_ARRAY(enabled,   u64, 1);
+BPF_HASH(track,      u64, u64);
+BPF_HASH(time_aq,    u64, u64);
+BPF_HASH(lock_depth, u64, u64);
+BPF_HASH(time_held,  struct depth_id, u64);
+BPF_HASH(stack,      struct depth_id, int);
+
+BPF_HASH(aq_report_count, int, u64);
+BPF_HASH(aq_report_max,   int, u64);
+BPF_HASH(aq_report_total, int, u64);
+
+BPF_HASH(hl_report_count, int, u64);
+BPF_HASH(hl_report_max,   int, u64);
+BPF_HASH(hl_report_total, int, u64);
+
+BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
+
+static bool is_enabled(void)
+{
+    int key = 0;
+    u64 *ret;
+
+    ret = enabled.lookup(&key);
+    return ret && *ret == 1;
+}
+
+static bool allow_pid(u64 id)
+{
+    u32 pid = id >> 32; // PID is higher part
+    u32 tid = id;       // Cast and get the lower part
+
+    FILTER
+
+    return 1;
+}
+
+int mutex_lock_enter(struct pt_regs *ctx)
+{
+    if (!is_enabled())
+        return 0;
+
+    u64 id = bpf_get_current_pid_tgid();
+
+    if (!allow_pid(id))
+        return 0;
+
+    u64 one = 1, zero = 0;
+
+    track.update(&id, &one);
+
+    u64 *depth = lock_depth.lookup(&id);
+
+    if (!depth) {
+        lock_depth.update(&id, &zero);
+
+        depth = lock_depth.lookup(&id);
+        /* something is wrong.. */
+        if (!depth)
+            return 0;
+    }
+
+    int stackid = stack_traces.get_stackid(ctx, 0);
+    struct depth_id did = {
+      .id    = id,
+      .depth = *depth,
+    };
+    stack.update(&did, &stackid);
+
+    u64 ts = bpf_ktime_get_ns();
+    time_aq.update(&id, &ts);
+
+    *depth += 1;
+    return 0;
+}
+
+static void update_aq_report_count(int *stackid)
+{
+    u64 *count, one = 1;
+
+    count = aq_report_count.lookup(stackid);
+    if (!count) {
+        aq_report_count.update(stackid, &one);
+    } else {
+        *count += 1;
+    }
+}
+
+static void update_hl_report_count(int *stackid)
+{
+    u64 *count, one = 1;
+
+    count = hl_report_count.lookup(stackid);
+    if (!count) {
+        hl_report_count.update(stackid, &one);
+    } else {
+        *count += 1;
+    }
+}
+
+static void update_aq_report_max(int *stackid, u64 time)
+{
+    u64 *max;
+
+    max = aq_report_max.lookup(stackid);
+    if (!max || *max < time)
+        aq_report_max.update(stackid, &time);
+}
+
+static void update_hl_report_max(int *stackid, u64 time)
+{
+    u64 *max;
+
+    max = hl_report_max.lookup(stackid);
+    if (!max || *max < time)
+        hl_report_max.update(stackid, &time);
+}
+
+static void update_aq_report_total(int *stackid, u64 delta)
+{
+    u64 *count, *time;
+
+    count = aq_report_count.lookup(stackid);
+    if (!count)
+        return;
+
+    time = aq_report_total.lookup(stackid);
+    if (!time) {
+        aq_report_total.update(stackid, &delta);
+    } else {
+        *time = *time + delta;
+    }
+}
+
+static void update_hl_report_total(int *stackid, u64 delta)
+{
+    u64 *count, *time;
+
+    count = hl_report_count.lookup(stackid);
+    if (!count)
+        return;
+
+    time = hl_report_total.lookup(stackid);
+    if (!time) {
+        hl_report_total.update(stackid, &delta);
+    } else {
+        *time = *time + delta;
+    }
+}
+
+int mutex_lock_return(struct pt_regs *ctx)
+{
+    if (!is_enabled())
+        return 0;
+
+    u64 id = bpf_get_current_pid_tgid();
+
+    if (!allow_pid(id))
+        return 0;
+
+    u64 *one = track.lookup(&id);
+
+    if (!one)
+        return 0;
+
+    track.delete(&id);
+
+    u64 *depth = lock_depth.lookup(&id);
+    if (!depth)
+        return 0;
+
+    struct depth_id did = {
+      .id    = id,
+      .depth = *depth - 1,
+    };
+
+    u64 *aq = time_aq.lookup(&id);
+    if (!aq)
+        return 0;
+
+    int *stackid = stack.lookup(&did);
+    if (!stackid)
+        return 0;
+
+    u64 cur = bpf_ktime_get_ns();
+
+    if (cur > *aq) {
+        int val = cur - *aq;
+        update_aq_report_count(stackid);
+        update_aq_report_max(stackid, val);
+        update_aq_report_total(stackid, val);
+    }
+
+    time_held.update(&did, &cur);
+    return 0;
+}
+
+int mutex_unlock_enter(struct pt_regs *ctx)
+{
+    if (!is_enabled())
+        return 0;
+
+    u64 id = bpf_get_current_pid_tgid();
+
+    if (!allow_pid(id))
+        return 0;
+
+    u64 *depth = lock_depth.lookup(&id);
+
+    if (!depth || *depth == 0)
+        return 0;
+
+    *depth -= 1;
+
+    struct depth_id did = {
+      .id    = id,
+      .depth = *depth,
+    };
+
+    u64 *held = time_held.lookup(&did);
+    if (!held)
+        return 0;
+
+    int *stackid = stack.lookup(&did);
+    if (!stackid)
+        return 0;
+
+    u64 cur = bpf_ktime_get_ns();
+
+    if (cur > *held) {
+        u64 val = cur - *held;
+        update_hl_report_count(stackid);
+        update_hl_report_max(stackid, val);
+        update_hl_report_total(stackid, val);
+    }
+
+    stack.delete(&did);
+    time_held.delete(&did);
+    return 0;
+}
+
+"""
+
+def sort_list(maxs, totals, counts):
+    if (not args.sort):
+        return maxs;
+
+    for field in args.sort.split(','):
+        if (field == "acq_max" or field == "hld_max"):
+            return maxs
+        if (field == "acq_total" or field == "hld_total"):
+            return totals
+        if (field == "acq_count" or field == "hld_count"):
+            return counts
+
+    print("Wrong sort argument: %s", args.sort)
+    exit(-1)
+
+def display(sort, maxs, totals, counts):
+    global missing_stacks
+    global has_enomem
+
+    for k, v in sorted(sort.items(), key=lambda sort: sort[1].value, reverse=True)[:args.locks]:
+        missing_stacks += int(stack_id_err(k.value))
+        has_enomem      = has_enomem or (k.value == -errno.ENOMEM)
+
+        caller = "[Missed Kernel Stack]"
+        stack  = []
+
+        if (k.value >= 0):
+            stack  = list(stack_traces.walk(k.value))
+            caller = b.ksym(stack[1], show_offset=True)
+
+            if (args.caller and caller.find(args.caller)):
+                continue
+
+        avg = totals[k].value / counts[k].value
+
+        print("%40s %10lu %6lu %10lu %10lu" % (caller, avg, counts[k].value, maxs[k].value, totals[k].value))
+
+        for addr in stack[1:args.stacks]:
+            print("%40s" %  b.ksym(addr, show_offset=True))
+
+
+if args.tid:  # TID trumps PID
+    program = program.replace('FILTER',
+        'if (tid != %s) { return 0; }' % args.tid)
+elif args.pid:
+    program = program.replace('FILTER',
+        'if (pid != %s) { return 0; }' % args.pid)
+else:
+    program = program.replace('FILTER', '')
+
+program = program.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size))
+
+b = BPF(text=program)
+
+b.attach_kprobe(event="mutex_unlock", fn_name="mutex_unlock_enter")
+b.attach_kretprobe(event="mutex_lock", fn_name="mutex_lock_return")
+b.attach_kprobe(event="mutex_lock", fn_name="mutex_lock_enter")
+
+enabled = b.get_table("enabled");
+
+stack_traces = b.get_table("stack_traces")
+aq_counts = b.get_table("aq_report_count")
+aq_maxs   = b.get_table("aq_report_max")
+aq_totals = b.get_table("aq_report_total")
+
+hl_counts = b.get_table("hl_report_count")
+hl_maxs   = b.get_table("hl_report_max")
+hl_totals = b.get_table("hl_report_total")
+
+aq_sort = sort_list(aq_maxs, aq_totals, aq_counts)
+hl_sort = sort_list(hl_maxs, hl_totals, hl_counts)
+
+print("Tracing lock events... Hit Ctrl-C to end.")
+
+# duration and interval are mutualy exclusive
+exiting = 0 if args.interval else 1
+exiting = 1 if args.duration else 0
+
+seconds = 999999999
+if args.interval:
+    seconds = args.interval
+if args.duration:
+    seconds = args.duration
+
+missing_stacks = 0
+has_enomem     = False
+
+while (1):
+    enabled[ct.c_int(0)] = ct.c_int(1)
+
+    try:
+        sleep(seconds)
+    except KeyboardInterrupt:
+        exiting = 1
+
+    enabled[ct.c_int(0)] = ct.c_int(0)
+
+    print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Spin", "Count", "Max spin", "Total spin"))
+    display(aq_sort, aq_maxs, aq_totals, aq_counts)
+
+
+    print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Hold", "Count", "Max hold", "Total hold"))
+    display(hl_sort, hl_maxs, hl_totals, hl_counts)
+
+    if exiting:
+        break;
+
+    stack_traces.clear()
+    aq_counts.clear()
+    aq_maxs.clear()
+    aq_totals.clear()
+    hl_counts.clear()
+    hl_maxs.clear()
+    hl_totals.clear()
+
+if missing_stacks > 0:
+    enomem_str = " Consider increasing --stack-storage-size."
+    print("WARNING: %d stack traces lost and could not be displayed.%s" %
+        (missing_stacks, (enomem_str if has_enomem else "")),
+        file=stderr)
diff --git a/tools/lockstat_example.txt b/tools/lockstat_example.txt
new file mode 100644
index 000000000000..3f51e31fa095
--- /dev/null
+++ b/tools/lockstat_example.txt
@@ -0,0 +1,167 @@
+Demonstrations of lockstat, the Linux eBPF/bcc version.
+
+lockstats traces kernel mutex lock events and display locks statistics
+
+# lockstat.py
+Tracing lock events... Hit Ctrl-C to end.
+^C
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                      psi_avgs_work+0x2e       3675      5       5468      18379
+                     flush_to_ldisc+0x22       2833      2       4210       5667
+                       n_tty_write+0x30c       3914      1       3914       3914
+                               isig+0x5d       2390      1       2390       2390
+                   tty_buffer_flush+0x2a       1604      1       1604       1604
+                      commit_echoes+0x22       1400      1       1400       1400
+          n_tty_receive_buf_common+0x3b9       1399      1       1399       1399
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                     flush_to_ldisc+0x22      42558      2      76135      85116
+                      psi_avgs_work+0x2e      14821      5      20446      74106
+          n_tty_receive_buf_common+0x3b9      12300      1      12300      12300
+                       n_tty_write+0x30c      10712      1      10712      10712
+                               isig+0x5d       3362      1       3362       3362
+                   tty_buffer_flush+0x2a       3078      1       3078       3078
+                      commit_echoes+0x22       3017      1       3017       3017
+
+
+Every caller to using kernel's mutex is displayed on every line.
+
+First portion of lines show the lock acquiring data, showing the
+amount of time it took to acquired given lock.
+
+  'Caller'       - symbol acquiring the mutex
+  'Average Spin' - average time to acquire the mutex
+  'Count'        - number of times mutex was acquired
+  'Max spin'     - maximum time to acquire the mutex
+  'Total spin'   - total time spent in acquiring the mutex
+
+Second portion of lines show the lock holding data, showing the
+amount of time it took to hold given lock.
+
+  'Caller'       - symbol holding the mutex
+  'Average Hold' - average time mutex was held
+  'Count'        - number of times mutex was held
+  'Max hold'     - maximum time mutex was held
+  'Total hold'   - total time spent in holding the mutex
+
+This works by tracing mutex_lock/unlock kprobes, updating the
+lock stats in maps and processing them in the python part.
+
+
+An -i option can be used to display stats in interval (5 seconds in example below):
+
+# lockstat.py -i 5
+Tracing lock events... Hit Ctrl-C to end.
+
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                      psi_avgs_work+0x2e       3822     15       5650      57338
+                     flush_to_ldisc+0x22       4630      1       4630       4630
+                            work_fn+0x4f       4185      1       4185       4185
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                      psi_avgs_work+0x2e      12155     15      15739     182329
+                     flush_to_ldisc+0x22      13809      1      13809      13809
+                            work_fn+0x4f       5274      1       5274       5274
+
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                      psi_avgs_work+0x2e       3715     17       4374      63163
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                      psi_avgs_work+0x2e      13141     17      19510     223399
+^C
+
+
+A -p option can be used to trace only selected process:
+
+# lockstat.py -p 883
+Tracing lock events... Hit Ctrl-C to end.
+^C
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                          pipe_wait+0xa9        625 412686      16930  258277958
+                          pipe_read+0x3f        420 413425      16872  174017649
+                         pipe_write+0x35        471 413425      16765  194792253
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                          pipe_read+0x3f        473 413425      20063  195773647
+                          pipe_wait+0xa9        604 412686      16972  249598153
+                         pipe_write+0x35        481 413425      16944  199008064
+
+
+A -c option can be used to display only callers with specific substring:
+
+# lockstat.py  -c pipe_
+Tracing lock events... Hit Ctrl-C to end.
+^C
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                          pipe_read+0x3f        422 469554      18665  198354705
+                          pipe_wait+0xa9        679 469536      17196  319017069
+                         pipe_write+0x35        469 469554      17057  220338525
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                         pipe_write+0x35        638 469554      17330  299857180
+                          pipe_wait+0xa9        779 469535      16912  366047392
+                          pipe_read+0x3f        575 469554      13251  270005394
+
+
+An -n option can be used to display only specific number of callers:
+
+# lockstat.py -n 3
+Tracing lock events... Hit Ctrl-C to end.
+^C
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                          pipe_read+0x3f        420 334120      16964  140632284
+                          pipe_wait+0xa9        688 334116      16876  229957062
+                         pipe_write+0x35        463 334120      16791  154981747
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                     flush_to_ldisc+0x22      27754      3      63270      83264
+                          pipe_read+0x3f        571 334120      17123  190976463
+                          pipe_wait+0xa9        759 334115      17068  253747213
+
+
+An -s option can be used to display number of callers backtrace entries:
+
+# lockstat.py -n 1 -s 3
+Tracing lock events... Hit Ctrl-C to end.
+^C
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                          pipe_wait+0xa9        685 811947      17376  556542328
+                          pipe_wait+0xa9
+                         pipe_read+0x206
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                     flush_to_ldisc+0x22      28145      3      63872      84437
+                     flush_to_ldisc+0x22
+                  process_one_work+0x1b0
+
+
+Output can be sorted by using -S <fields> option on various
+fields, the acq_total will force the acquired table to be
+sorted on 'Total spin' column:
+
+# lockstat.py -S acq_total
+Tracing lock events... Hit Ctrl-C to end.
+^C
+                                  Caller   Avg Spin  Count   Max spin Total spin
+                          pipe_wait+0xa9        691 269343      17190  186263983
+                         pipe_write+0x35        464 269351      11730  125205417
+                          pipe_read+0x3f        422 269351      17107  113724697
+                      psi_avgs_work+0x2e       2499     11       4454      27494
+                     flush_to_ldisc+0x22       3111      3       5096       9334
+                       n_tty_write+0x30c       2764      1       2764       2764
+                               isig+0x5d       1287      1       1287       1287
+                   tty_buffer_flush+0x2a        961      1        961        961
+                      commit_echoes+0x22        892      1        892        892
+          n_tty_receive_buf_common+0x3b9        868      1        868        868
+
+                                  Caller   Avg Hold  Count   Max hold Total hold
+                          pipe_wait+0xa9        788 269343      17128  212496240
+                         pipe_write+0x35        637 269351      17209  171596811
+                          pipe_read+0x3f        585 269351      11834  157606323
+                      psi_avgs_work+0x2e       8726     11      19177      95996
+                     flush_to_ldisc+0x22      22158      3      43731      66474
+                       n_tty_write+0x30c       9770      1       9770       9770
+          n_tty_receive_buf_common+0x3b9       6830      1       6830       6830
+                               isig+0x5d       3114      1       3114       3114
+                   tty_buffer_flush+0x2a       2032      1       2032       2032
+                      commit_echoes+0x22       1616      1       1616       1616
diff --git a/tools/offwaketime.py b/tools/offwaketime.py
index b46e9e1b4bb0..4809a3b65e1d 100755
--- a/tools/offwaketime.py
+++ b/tools/offwaketime.py
@@ -140,7 +140,7 @@ def signal_ignore(signal, frame):
 // of the Process who wakes it
 BPF_HASH(wokeby, u32, struct wokeby_t);
 
-BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
+BPF_STACK_TRACE(stack_traces, 2);
 
 int waker(struct pt_regs *ctx, struct task_struct *p) {
     // PID and TGID of the target Process to be waken
@@ -321,7 +321,7 @@ def signal_ignore(signal, frame):
         line = [k.target.decode('utf-8', 'replace')]
         if not args.kernel_stacks_only:
             if stack_id_err(k.t_u_stack_id):
-                line.append("[Missed User Stack]")
+                line.append("[Missed User Stack] %d" % k.t_u_stack_id)
             else:
                 line.extend([b.sym(addr, k.t_tgid).decode('utf-8', 'replace')
                     for addr in reversed(list(target_user_stack)[1:])])
@@ -353,7 +353,7 @@ def signal_ignore(signal, frame):
         print("    %-16s %s %s" % ("waker:", k.waker.decode('utf-8', 'replace'), k.w_pid))
         if not args.kernel_stacks_only:
             if stack_id_err(k.w_u_stack_id):
-                print("    [Missed User Stack]")
+                print("    [Missed User Stack] %d" % k.w_u_stack_id)
             else:
                 for addr in waker_user_stack:
                     print("    %s" % b.sym(addr, k.w_tgid))