forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
75d3e9d
commit 3a391c2
Showing
4 changed files
with
759 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
.TH runqlat 8 "2016-02-07" "USER COMMANDS" | ||
.SH NAME | ||
runqlat \- Run queue (scheduler) latency as a histogram. | ||
.SH SYNOPSIS | ||
.B runqlat [\-h] [\-T] [\-m] [\-P] [\-p PID] [interval] [count] | ||
.SH DESCRIPTION | ||
This measures the time a task spends waiting on a run queue (or equivalent | ||
scheduler data structure) for a turn on-CPU, and shows this time as a | ||
histogram. This time should be small, but a task may need to wait its turn due | ||
to CPU load. The higher the CPU load, the longer a task will generally need to | ||
wait its turn. | ||
|
||
This tool measures two types of run queue latency: | ||
|
||
1. The time from a task being enqueued on a run queue to its context switch | ||
and execution. This traces enqueue_task_*() -> finish_task_switch(), | ||
and instruments the run queue latency after a voluntary context switch. | ||
|
||
2. The time from when a task was involuntary context switched and still | ||
in the runnable state, to when it next executed. This is instrumented | ||
from finish_task_switch() alone. | ||
|
||
This tool uses in-kernel eBPF maps for storing timestamps and the histogram, | ||
for efficiency. Despite this, the overhead of this tool may become significant | ||
for some workloads: see the OVERHEAD section. | ||
|
||
This works by tracing various kernel scheduler functions using dynamic tracing, | ||
and will need updating to match any changes to these functions. | ||
|
||
Since this uses BPF, only the root user can use this tool. | ||
.SH REQUIREMENTS | ||
CONFIG_BPF and bcc. | ||
.SH OPTIONS | ||
.TP | ||
\-h | ||
Print usage message. | ||
.TP | ||
\-T | ||
Include timestamps on output. | ||
.TP | ||
\-m | ||
Output histogram in milliseconds. | ||
.TP | ||
\-P | ||
Print a histogram for each PID. | ||
.TP | ||
\-p PID | ||
Only show this PID (filtered in kernel for efficiency). | ||
.TP | ||
interval | ||
Output interval, in seconds. | ||
.TP | ||
count | ||
Number of outputs. | ||
.SH EXAMPLES | ||
.TP | ||
Summarize run queue latency as a histogram: | ||
# | ||
.B runqlat | ||
.TP | ||
Print 1 second summaries, 10 times: | ||
# | ||
.B runqlat 1 10 | ||
.TP | ||
Print 1 second summaries, using milliseconds as units for the histogram, and include timestamps on output: | ||
# | ||
.B runqlat \-mT 1 | ||
.TP | ||
Trace PID 186 only, 1 second summaries: | ||
# | ||
.B runqlat -P 185 1 | ||
.SH FIELDS | ||
.TP | ||
usecs | ||
Microsecond range | ||
.TP | ||
mecs | ||
Millisecond range | ||
.TP | ||
count | ||
How many times a task event fell into this range | ||
.TP | ||
distribution | ||
An ASCII bar chart to visualize the distribution (count column) | ||
.SH OVERHEAD | ||
This traces scheduler functions, which can become very frequent. While eBPF | ||
has very low overhead, and this tool uses in-kernel maps for efficiency, the | ||
frequency of scheduler events for some workloads may be high enough that the | ||
overhead of this tool becomes significant. Measure in a lab environment | ||
to quantify the overhead before use. | ||
.SH SOURCE | ||
This is from bcc. | ||
.IP | ||
https://github.com/iovisor/bcc | ||
.PP | ||
Also look in the bcc distribution for a companion _examples.txt file containing | ||
example usage, output, and commentary for this tool. | ||
.SH OS | ||
Linux | ||
.SH STABILITY | ||
Unstable - in development. | ||
.SH AUTHOR | ||
Brendan Gregg | ||
.SH SEE ALSO | ||
pidstat(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
#!/usr/bin/python | ||
# @lint-avoid-python-3-compatibility-imports | ||
# | ||
# runqlat Run queue (scheduler) latency as a histogram. | ||
# For Linux, uses BCC, eBPF. | ||
# | ||
# USAGE: runqlat [-h] [-T] [-m] [-P] [-p PID] [interval] [count] | ||
# | ||
# This measures the time a task spends waiting on a run queue for a turn | ||
# on-CPU, and shows this time as a histogram. This time should be small, but a | ||
# task may need to wait its turn due to CPU load. | ||
# | ||
# This measures two types of run queue latency: | ||
# 1. The time from a task being enqueued on a run queue to its context switch | ||
# and execution. This traces enqueue_task_*() -> finish_task_switch(), | ||
# and instruments the run queue latency after a voluntary context switch. | ||
# 2. The time from when a task was involuntary context switched and still | ||
# in the runnable state, to when it next executed. This is instrumented | ||
# from finish_task_switch() alone. | ||
# | ||
# Copyright 2016 Netflix, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License") | ||
# | ||
# 07-Feb-2016 Brendan Gregg Created this. | ||
|
||
from __future__ import print_function | ||
from bcc import BPF | ||
from time import sleep, strftime | ||
import argparse | ||
|
||
# arguments | ||
examples = """examples: | ||
./runqlat # summarize run queue latency as a histogram | ||
./runqlat 1 10 # print 1 second summaries, 10 times | ||
./runqlat -mT 1 # 1s summaries, milliseconds, and timestamps | ||
./runqlat -P # show each PID separately | ||
./runqlat -p 185 # trace PID 185 only | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Summarize run queue (schedular) latency as a histogram", | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
epilog=examples) | ||
parser.add_argument("-T", "--timestamp", action="store_true", | ||
help="include timestamp on output") | ||
parser.add_argument("-m", "--milliseconds", action="store_true", | ||
help="millisecond histogram") | ||
parser.add_argument("-P", "--pids", action="store_true", | ||
help="print a histogram per process ID") | ||
parser.add_argument("-p", "--pid", | ||
help="trace this PID only") | ||
parser.add_argument("interval", nargs="?", default=99999999, | ||
help="output interval, in seconds") | ||
parser.add_argument("count", nargs="?", default=99999999, | ||
help="number of outputs") | ||
args = parser.parse_args() | ||
countdown = int(args.count) | ||
debug = 0 | ||
|
||
# define BPF program | ||
bpf_text = """ | ||
#include <uapi/linux/ptrace.h> | ||
#include "kernel/sched/sched.h" | ||
typedef struct pid_key { | ||
u64 pid; // work around | ||
u64 slot; | ||
} pid_key_t; | ||
BPF_HASH(start, u32); | ||
STORAGE | ||
// record enqueue timestamp | ||
int trace_enqueue(struct pt_regs *ctx, struct rq *rq, struct task_struct *p, | ||
int flags) | ||
{ | ||
u32 pid = p->pid; | ||
if (FILTER) | ||
return 0; | ||
u64 ts = bpf_ktime_get_ns(); | ||
start.update(&pid, &ts); | ||
return 0; | ||
} | ||
// calculate latency | ||
int trace_run(struct pt_regs *ctx, struct task_struct *prev) | ||
{ | ||
u32 pid; | ||
// ivcsw: treat like an enqueue event and store timestamp | ||
if (prev->state == TASK_RUNNING) { | ||
pid = prev->pid; | ||
if (!(FILTER)) { | ||
u64 ts = bpf_ktime_get_ns(); | ||
start.update(&pid, &ts); | ||
} | ||
} | ||
pid = bpf_get_current_pid_tgid(); | ||
if (FILTER) | ||
return 0; | ||
u64 *tsp, delta; | ||
// fetch timestamp and calculate delta | ||
tsp = start.lookup(&pid); | ||
if (tsp == 0) { | ||
return 0; // missed enqueue | ||
} | ||
delta = bpf_ktime_get_ns() - *tsp; | ||
FACTOR | ||
// store as histogram | ||
STORE | ||
start.delete(&pid); | ||
return 0; | ||
} | ||
""" | ||
|
||
# code substitutions | ||
if args.pid: | ||
bpf_text = bpf_text.replace('FILTER', 'pid != %s' % args.pid) | ||
else: | ||
bpf_text = bpf_text.replace('FILTER', '0') | ||
if args.milliseconds: | ||
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;') | ||
label = "msecs" | ||
else: | ||
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;') | ||
label = "usecs" | ||
if args.pids: | ||
bpf_text = bpf_text.replace('STORAGE', | ||
'BPF_HISTOGRAM(dist, pid_key_t);') | ||
bpf_text = bpf_text.replace('STORE', | ||
'pid_key_t key = {.pid = pid, .slot = bpf_log2l(delta)}; ' + | ||
'dist.increment(key);') | ||
else: | ||
bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);') | ||
bpf_text = bpf_text.replace('STORE', | ||
'dist.increment(bpf_log2l(delta));') | ||
if debug: | ||
print(bpf_text) | ||
|
||
# load BPF program | ||
b = BPF(text=bpf_text) | ||
b.attach_kprobe(event_re="enqueue_task_*", fn_name="trace_enqueue") | ||
b.attach_kprobe(event="finish_task_switch", fn_name="trace_run") | ||
|
||
print("Tracing run queue latency... Hit Ctrl-C to end.") | ||
|
||
# output | ||
exiting = 0 if args.interval else 1 | ||
dist = b.get_table("dist") | ||
while (1): | ||
try: | ||
sleep(int(args.interval)) | ||
except KeyboardInterrupt: | ||
exiting = 1 | ||
|
||
print() | ||
if args.timestamp: | ||
print("%-8s\n" % strftime("%H:%M:%S"), end="") | ||
|
||
dist.print_log2_hist(label, "pid", section_print_fn=int) | ||
dist.clear() | ||
|
||
countdown -= 1 | ||
if exiting or countdown == 0: | ||
exit() |
Oops, something went wrong.