From 3a391c2a27b2de4b2529a369d849334a05f201ba Mon Sep 17 00:00:00 2001 From: Brendan Gregg Date: Mon, 8 Feb 2016 01:20:31 -0800 Subject: [PATCH] runqlat --- README.md | 1 + man/man8/runqlat.8 | 105 ++++++++ tools/runqlat.py | 167 +++++++++++++ tools/runqlat_example.txt | 486 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 759 insertions(+) create mode 100644 man/man8/runqlat.8 create mode 100755 tools/runqlat.py create mode 100644 tools/runqlat_example.txt diff --git a/README.md b/README.md index 6eb247435685..9eb3ea0519b9 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ Tools: - tools/[offwaketime](tools/offwaketime.py): Summarize blocked time by kernel off-CPU stack and waker stack. [Examples](tools/offwaketime_example.txt). - tools/[opensnoop](tools/opensnoop.py): Trace open() syscalls. [Examples](tools/opensnoop_example.txt). - tools/[pidpersec](tools/pidpersec.py): Count new processes (via fork). [Examples](tools/pidpersec_example.txt). +- tools/[runqlat](tools/runqlat.py): Run queue (scheduler) latency as a histogram. [Examples](tools/runqlat_example.txt). - tools/[softirqs](tools/softirqs.py): Measure soft IRQ (soft interrupt) event time. [Examples](tools/softirqs_example.txt). - tools/[stackcount](tools/stackcount.py): Count kernel function calls and their stack traces. [Examples](tools/stackcount_example.txt). - tools/[stacksnoop](tools/stacksnoop.py): Trace a kernel function and print all kernel stack traces. [Examples](tools/stacksnoop_example.txt). diff --git a/man/man8/runqlat.8 b/man/man8/runqlat.8 new file mode 100644 index 000000000000..62ab0a64f5c6 --- /dev/null +++ b/man/man8/runqlat.8 @@ -0,0 +1,105 @@ +.TH runqlat 8 "2016-02-07" "USER COMMANDS" +.SH NAME +runqlat \- Run queue (scheduler) latency as a histogram. +.SH SYNOPSIS +.B runqlat [\-h] [\-T] [\-m] [\-P] [\-p PID] [interval] [count] +.SH DESCRIPTION +This measures the time a task spends waiting on a run queue (or equivalent +scheduler data structure) for a turn on-CPU, and shows this time as a +histogram. This time should be small, but a task may need to wait its turn due +to CPU load. The higher the CPU load, the longer a task will generally need to +wait its turn. + +This tool measures two types of run queue latency: + +1. The time from a task being enqueued on a run queue to its context switch +and execution. This traces enqueue_task_*() -> finish_task_switch(), +and instruments the run queue latency after a voluntary context switch. + +2. The time from when a task was involuntary context switched and still +in the runnable state, to when it next executed. This is instrumented +from finish_task_switch() alone. + +This tool uses in-kernel eBPF maps for storing timestamps and the histogram, +for efficiency. Despite this, the overhead of this tool may become significant +for some workloads: see the OVERHEAD section. + +This works by tracing various kernel scheduler functions using dynamic tracing, +and will need updating to match any changes to these functions. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH OPTIONS +.TP +\-h +Print usage message. +.TP +\-T +Include timestamps on output. +.TP +\-m +Output histogram in milliseconds. +.TP +\-P +Print a histogram for each PID. +.TP +\-p PID +Only show this PID (filtered in kernel for efficiency). +.TP +interval +Output interval, in seconds. +.TP +count +Number of outputs. +.SH EXAMPLES +.TP +Summarize run queue latency as a histogram: +# +.B runqlat +.TP +Print 1 second summaries, 10 times: +# +.B runqlat 1 10 +.TP +Print 1 second summaries, using milliseconds as units for the histogram, and include timestamps on output: +# +.B runqlat \-mT 1 +.TP +Trace PID 186 only, 1 second summaries: +# +.B runqlat -P 185 1 +.SH FIELDS +.TP +usecs +Microsecond range +.TP +mecs +Millisecond range +.TP +count +How many times a task event fell into this range +.TP +distribution +An ASCII bar chart to visualize the distribution (count column) +.SH OVERHEAD +This traces scheduler functions, which can become very frequent. While eBPF +has very low overhead, and this tool uses in-kernel maps for efficiency, the +frequency of scheduler events for some workloads may be high enough that the +overhead of this tool becomes significant. Measure in a lab environment +to quantify the overhead before use. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Brendan Gregg +.SH SEE ALSO +pidstat(1) diff --git a/tools/runqlat.py b/tools/runqlat.py new file mode 100755 index 000000000000..558981dae852 --- /dev/null +++ b/tools/runqlat.py @@ -0,0 +1,167 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# runqlat Run queue (scheduler) latency as a histogram. +# For Linux, uses BCC, eBPF. +# +# USAGE: runqlat [-h] [-T] [-m] [-P] [-p PID] [interval] [count] +# +# This measures the time a task spends waiting on a run queue for a turn +# on-CPU, and shows this time as a histogram. This time should be small, but a +# task may need to wait its turn due to CPU load. +# +# This measures two types of run queue latency: +# 1. The time from a task being enqueued on a run queue to its context switch +# and execution. This traces enqueue_task_*() -> finish_task_switch(), +# and instruments the run queue latency after a voluntary context switch. +# 2. The time from when a task was involuntary context switched and still +# in the runnable state, to when it next executed. This is instrumented +# from finish_task_switch() alone. +# +# Copyright 2016 Netflix, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 07-Feb-2016 Brendan Gregg Created this. + +from __future__ import print_function +from bcc import BPF +from time import sleep, strftime +import argparse + +# arguments +examples = """examples: + ./runqlat # summarize run queue latency as a histogram + ./runqlat 1 10 # print 1 second summaries, 10 times + ./runqlat -mT 1 # 1s summaries, milliseconds, and timestamps + ./runqlat -P # show each PID separately + ./runqlat -p 185 # trace PID 185 only +""" +parser = argparse.ArgumentParser( + description="Summarize run queue (schedular) latency as a histogram", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-T", "--timestamp", action="store_true", + help="include timestamp on output") +parser.add_argument("-m", "--milliseconds", action="store_true", + help="millisecond histogram") +parser.add_argument("-P", "--pids", action="store_true", + help="print a histogram per process ID") +parser.add_argument("-p", "--pid", + help="trace this PID only") +parser.add_argument("interval", nargs="?", default=99999999, + help="output interval, in seconds") +parser.add_argument("count", nargs="?", default=99999999, + help="number of outputs") +args = parser.parse_args() +countdown = int(args.count) +debug = 0 + +# define BPF program +bpf_text = """ +#include +#include "kernel/sched/sched.h" + +typedef struct pid_key { + u64 pid; // work around + u64 slot; +} pid_key_t; +BPF_HASH(start, u32); +STORAGE + +// record enqueue timestamp +int trace_enqueue(struct pt_regs *ctx, struct rq *rq, struct task_struct *p, + int flags) +{ + u32 pid = p->pid; + if (FILTER) + return 0; + u64 ts = bpf_ktime_get_ns(); + start.update(&pid, &ts); + return 0; +} + +// calculate latency +int trace_run(struct pt_regs *ctx, struct task_struct *prev) +{ + u32 pid; + + // ivcsw: treat like an enqueue event and store timestamp + if (prev->state == TASK_RUNNING) { + pid = prev->pid; + if (!(FILTER)) { + u64 ts = bpf_ktime_get_ns(); + start.update(&pid, &ts); + } + } + + pid = bpf_get_current_pid_tgid(); + if (FILTER) + return 0; + u64 *tsp, delta; + + // fetch timestamp and calculate delta + tsp = start.lookup(&pid); + if (tsp == 0) { + return 0; // missed enqueue + } + delta = bpf_ktime_get_ns() - *tsp; + FACTOR + + // store as histogram + STORE + + start.delete(&pid); + return 0; +} +""" + +# code substitutions +if args.pid: + bpf_text = bpf_text.replace('FILTER', 'pid != %s' % args.pid) +else: + bpf_text = bpf_text.replace('FILTER', '0') +if args.milliseconds: + bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;') + label = "msecs" +else: + bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;') + label = "usecs" +if args.pids: + bpf_text = bpf_text.replace('STORAGE', + 'BPF_HISTOGRAM(dist, pid_key_t);') + bpf_text = bpf_text.replace('STORE', + 'pid_key_t key = {.pid = pid, .slot = bpf_log2l(delta)}; ' + + 'dist.increment(key);') +else: + bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);') + bpf_text = bpf_text.replace('STORE', + 'dist.increment(bpf_log2l(delta));') +if debug: + print(bpf_text) + +# load BPF program +b = BPF(text=bpf_text) +b.attach_kprobe(event_re="enqueue_task_*", fn_name="trace_enqueue") +b.attach_kprobe(event="finish_task_switch", fn_name="trace_run") + +print("Tracing run queue latency... Hit Ctrl-C to end.") + +# output +exiting = 0 if args.interval else 1 +dist = b.get_table("dist") +while (1): + try: + sleep(int(args.interval)) + except KeyboardInterrupt: + exiting = 1 + + print() + if args.timestamp: + print("%-8s\n" % strftime("%H:%M:%S"), end="") + + dist.print_log2_hist(label, "pid", section_print_fn=int) + dist.clear() + + countdown -= 1 + if exiting or countdown == 0: + exit() diff --git a/tools/runqlat_example.txt b/tools/runqlat_example.txt new file mode 100644 index 000000000000..9999dd306b3f --- /dev/null +++ b/tools/runqlat_example.txt @@ -0,0 +1,486 @@ +Demonstrations of runqlat, the Linux eBPF/bcc version. + + +This program summarizes scheduler run queue latency as a histogram, showing +how long tasks spent waiting their turn to run on-CPU. + +Here is a heavily loaded system: + +# ./runqlat +Tracing run queue latency... Hit Ctrl-C to end. +^C + usecs : count distribution + 0 -> 1 : 233 |*********** | + 2 -> 3 : 742 |************************************ | + 4 -> 7 : 203 |********** | + 8 -> 15 : 173 |******** | + 16 -> 31 : 24 |* | + 32 -> 63 : 0 | | + 64 -> 127 : 30 |* | + 128 -> 255 : 6 | | + 256 -> 511 : 3 | | + 512 -> 1023 : 5 | | + 1024 -> 2047 : 27 |* | + 2048 -> 4095 : 30 |* | + 4096 -> 8191 : 20 | | + 8192 -> 16383 : 29 |* | + 16384 -> 32767 : 809 |****************************************| + 32768 -> 65535 : 64 |*** | + +The distribution is bimodal, with one mode between 0 and 15 microseconds, +and another between 16 and 65 milliseconds. These modes are visible as the +spikes in the ASCII distribution (which is merely a visual representation +of the "count" column). As an example of reading one line: 809 events fell +into the 16384 to 32767 microsecond range (16 to 32 ms) while tracing. + +I would expect the two modes to be due the workload: 16 hot CPU-bound threads, +and many other mostly idle threads doing occasional work. I suspect the mostly +idle threads will run with a higher priority when they wake up, and are +the reason for the low latency mode. The high latency mode will be the +CPU-bound threads. More analysis with this and other tools can confirm. + + +A -m option can be used to show milliseconds instead, as well as an interval +and a count. For example, showing three x five second summary in milliseconds: + +# ./runqlat -m 5 3 +Tracing run queue latency... Hit Ctrl-C to end. + + msecs : count distribution + 0 -> 1 : 3818 |****************************************| + 2 -> 3 : 39 | | + 4 -> 7 : 39 | | + 8 -> 15 : 62 | | + 16 -> 31 : 2214 |*********************** | + 32 -> 63 : 226 |** | + + msecs : count distribution + 0 -> 1 : 3775 |****************************************| + 2 -> 3 : 52 | | + 4 -> 7 : 37 | | + 8 -> 15 : 65 | | + 16 -> 31 : 2230 |*********************** | + 32 -> 63 : 212 |** | + + msecs : count distribution + 0 -> 1 : 3816 |****************************************| + 2 -> 3 : 49 | | + 4 -> 7 : 40 | | + 8 -> 15 : 53 | | + 16 -> 31 : 2228 |*********************** | + 32 -> 63 : 221 |** | + +This shows a similar distribution across the three summaries. + + +A -p option can be used to show one PID only, which is filtered in kernel for +efficiency. For example, PID 4505, and one second summaries: + +# ./runqlat -mp 4505 1 +Tracing run queue latency... Hit Ctrl-C to end. + + msecs : count distribution + 0 -> 1 : 1 |* | + 2 -> 3 : 2 |*** | + 4 -> 7 : 1 |* | + 8 -> 15 : 0 | | + 16 -> 31 : 25 |****************************************| + 32 -> 63 : 3 |**** | + + msecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 2 |** | + 4 -> 7 : 0 | | + 8 -> 15 : 1 |* | + 16 -> 31 : 30 |****************************************| + 32 -> 63 : 1 |* | + + msecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 28 |****************************************| + 32 -> 63 : 2 |** | + + msecs : count distribution + 0 -> 1 : 1 |* | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 27 |****************************************| + 32 -> 63 : 4 |***** | +[...] + +For comparison, here is pidstat(1) for that process: + +# pidstat -p 4505 1 +Linux 4.4.0-virtual (bgregg-xxxxxxxx) 02/08/2016 _x86_64_ (8 CPU) + +08:56:11 AM UID PID %usr %system %guest %CPU CPU Command +08:56:12 AM 0 4505 9.00 3.00 0.00 12.00 0 bash +08:56:13 AM 0 4505 7.00 5.00 0.00 12.00 0 bash +08:56:14 AM 0 4505 10.00 2.00 0.00 12.00 0 bash +08:56:15 AM 0 4505 11.00 2.00 0.00 13.00 0 bash +08:56:16 AM 0 4505 9.00 3.00 0.00 12.00 0 bash +[...] + +This is a synthetic workload that is CPU bound. It's only spending 12% on-CPU +each second because of high CPU demand on this server: the remaining time +is spent waiting on a run queue, as visualized by runqlat. + + +Here is the same system, but when it is CPU idle: + +# ./runqlat 5 1 +Tracing run queue latency... Hit Ctrl-C to end. + + usecs : count distribution + 0 -> 1 : 2250 |******************************** | + 2 -> 3 : 2340 |********************************** | + 4 -> 7 : 2746 |****************************************| + 8 -> 15 : 418 |****** | + 16 -> 31 : 93 |* | + 32 -> 63 : 28 | | + 64 -> 127 : 119 |* | + 128 -> 255 : 9 | | + 256 -> 511 : 4 | | + 512 -> 1023 : 20 | | + 1024 -> 2047 : 22 | | + 2048 -> 4095 : 5 | | + 4096 -> 8191 : 2 | | + +Back to a microsecond scale, this time there is little run queue latency past 1 +millisecond, as would be expected. + + +Now 16 threads are performing heavy disk I/O: + +# ./runqlat 5 1 +Tracing run queue latency... Hit Ctrl-C to end. + + usecs : count distribution + 0 -> 1 : 204 | | + 2 -> 3 : 944 |* | + 4 -> 7 : 16315 |********************* | + 8 -> 15 : 29897 |****************************************| + 16 -> 31 : 1044 |* | + 32 -> 63 : 23 | | + 64 -> 127 : 128 | | + 128 -> 255 : 24 | | + 256 -> 511 : 5 | | + 512 -> 1023 : 13 | | + 1024 -> 2047 : 15 | | + 2048 -> 4095 : 13 | | + 4096 -> 8191 : 10 | | + +The distribution hasn't changed too much. While the disks are 100% busy, there +is still plenty of CPU headroom, and threads still don't spend much time +waiting their turn. + + +A -P option will print a distribution for each PID: + +# ./runqlat -P +Tracing run queue latency... Hit Ctrl-C to end. +^C + +pid = 0 + usecs : count distribution + 0 -> 1 : 351 |******************************** | + 2 -> 3 : 96 |******** | + 4 -> 7 : 437 |****************************************| + 8 -> 15 : 12 |* | + 16 -> 31 : 10 | | + 32 -> 63 : 0 | | + 64 -> 127 : 16 |* | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 0 | | + 1024 -> 2047 : 0 | | + 2048 -> 4095 : 0 | | + 4096 -> 8191 : 0 | | + 8192 -> 16383 : 1 | | + +pid = 12929 + usecs : count distribution + 0 -> 1 : 1 |****************************************| + 2 -> 3 : 0 | | + 4 -> 7 : 1 |****************************************| + +pid = 12930 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 1 |****************************************| + 32 -> 63 : 0 | | + 64 -> 127 : 1 |****************************************| + +pid = 12931 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 1 |******************** | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 2 |****************************************| + +pid = 12932 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 1 |****************************************| + 256 -> 511 : 0 | | + 512 -> 1023 : 1 |****************************************| + +pid = 7 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 426 |************************************* | + 4 -> 7 : 457 |****************************************| + 8 -> 15 : 16 |* | + +pid = 9 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 425 |****************************************| + 8 -> 15 : 16 |* | + +pid = 11 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 10 |****************************************| + +pid = 14 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 8 |****************************************| + 4 -> 7 : 2 |********** | + +pid = 18 + usecs : count distribution + 0 -> 1 : 414 |****************************************| + 2 -> 3 : 0 | | + 4 -> 7 : 20 |* | + 8 -> 15 : 8 | | + +pid = 12928 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 1 |****************************************| + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 1 |****************************************| + +pid = 1867 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 15 |****************************************| + 16 -> 31 : 1 |** | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 4 |********** | + +pid = 1871 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 2 |****************************************| + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 1 |******************** | + +pid = 1876 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 1 |****************************************| + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 1 |****************************************| + +pid = 1878 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 3 |****************************************| + +pid = 1880 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 3 |****************************************| + +pid = 9307 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 1 |****************************************| + +pid = 1886 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 1 |******************** | + 8 -> 15 : 2 |****************************************| + +pid = 1888 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 3 |****************************************| + +pid = 3297 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 1 |****************************************| + +pid = 1892 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 1 |******************** | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 2 |****************************************| + +pid = 7024 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 4 |****************************************| + +pid = 16468 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 3 |****************************************| + +pid = 12922 + usecs : count distribution + 0 -> 1 : 1 |****************************************| + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 1 |****************************************| + 16 -> 31 : 1 |****************************************| + 32 -> 63 : 0 | | + 64 -> 127 : 1 |****************************************| + +pid = 12923 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 1 |******************** | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 2 |****************************************| + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 1 |******************** | + 1024 -> 2047 : 1 |******************** | + +pid = 12924 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 2 |******************** | + 8 -> 15 : 4 |****************************************| + 16 -> 31 : 1 |********** | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 0 | | + 1024 -> 2047 : 1 |********** | + +pid = 12925 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 1 |****************************************| + +pid = 12926 + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 1 |****************************************| + 4 -> 7 : 0 | | + 8 -> 15 : 1 |****************************************| + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 1 |****************************************| + +pid = 12927 + usecs : count distribution + 0 -> 1 : 1 |****************************************| + 2 -> 3 : 0 | | + 4 -> 7 : 1 |****************************************| + + +USAGE message: + +# ./runqlat -h +usage: runqlat [-h] [-T] [-m] [-P] [-p PID] [interval] [count] + +Summarize run queue (schedular) latency as a histogram + +positional arguments: + interval output interval, in seconds + count number of outputs + +optional arguments: + -h, --help show this help message and exit + -T, --timestamp include timestamp on output + -m, --milliseconds millisecond histogram + -P, --pids print a histogram per process ID + -p PID, --pid PID trace this PID only + +examples: + ./runqlat # summarize run queue latency as a histogram + ./runqlat 1 10 # print 1 second summaries, 10 times + ./runqlat -mT 1 # 1s summaries, milliseconds, and timestamps + ./runqlat -P # show each PID separately + ./runqlat -p 185 # trace PID 185 only