forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
39cece4
commit 251823a
Showing
5 changed files
with
568 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -102,4 +102,4 @@ Unstable - in development. | |
.SH AUTHOR | ||
Brendan Gregg | ||
.SH SEE ALSO | ||
pidstat(1) | ||
runqlen(8), pidstat(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
.TH runqlen 8 "2016-12-12" "USER COMMANDS" | ||
.SH NAME | ||
runqlen \- Scheduler run queue length as a histogram. | ||
.SH SYNOPSIS | ||
.B runqlen [\-h] [\-T] [\-O] [\-C] [interval] [count] | ||
.SH DESCRIPTION | ||
This program summarizes scheduler queue length as a histogram, and can also | ||
show run queue occupancy. It works by sampling the run queue length on all | ||
CPUs at 99 Hertz. | ||
|
||
This tool can be used to identify imbalances, eg, when processes are bound | ||
to CPUs causing queueing, or interrupt mappings causing the same. | ||
|
||
Since this uses BPF, only the root user can use this tool. | ||
.SH REQUIREMENTS | ||
CONFIG_BPF and bcc. | ||
.SH OPTIONS | ||
.TP | ||
\-h | ||
Print usage message. | ||
.TP | ||
\-T | ||
Include timestamps on output. | ||
.TP | ||
\-O | ||
Report run queue occupancy. | ||
.TP | ||
\-C | ||
Report for each CPU. | ||
.TP | ||
interval | ||
Output interval, in seconds. | ||
.TP | ||
count | ||
Number of outputs. | ||
.SH EXAMPLES | ||
.TP | ||
Summarize run queue length as a histogram: | ||
# | ||
.B runqlen | ||
.TP | ||
Print 1 second summaries, 10 times: | ||
# | ||
.B runqlen 1 10 | ||
.TP | ||
Print output every second, with timestamps, and show each CPU separately: | ||
# | ||
.B runqlen \-CT 1 | ||
.TP | ||
Print run queue occupancy every second: | ||
# | ||
.B runqlen \-O 1 | ||
.TP | ||
Print run queue occupancy, with timetamps, for each CPU: | ||
# | ||
.B runqlen \-COT 1 | ||
.SH FIELDS | ||
.TP | ||
runqlen | ||
Scheduler run queue length: the number of threads (tasks) waiting to run, | ||
(excluding including the currently running task). | ||
.TP | ||
count | ||
Number of samples at this queue length. | ||
.TP | ||
distribution | ||
An ASCII bar chart to visualize the distribution (count column) | ||
.SH OVERHEAD | ||
This uses sampling at 99 Hertz (on all CPUs), and in-kernel summaries, which | ||
should make overhead negligible. This does not trace scheduler events, like | ||
runqlen does, which comes at a much higher overhead cost. | ||
.SH SOURCE | ||
This is from bcc. | ||
.IP | ||
https://github.com/iovisor/bcc | ||
.PP | ||
Also look in the bcc distribution for a companion _examples.txt file containing | ||
example usage, output, and commentary for this tool. | ||
.SH OS | ||
Linux | ||
.SH STABILITY | ||
Unstable - in development. | ||
.SH AUTHOR | ||
Brendan Gregg | ||
.SH SEE ALSO | ||
runqlat(8), pidstat(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
#!/usr/bin/python | ||
# @lint-avoid-python-3-compatibility-imports | ||
# | ||
# runqlen Summarize scheduler run queue length as a histogram. | ||
# For Linux, uses BCC, eBPF. | ||
# | ||
# This counts the length of the run queue, excluding the currently running | ||
# thread, and shows it as a histogram. | ||
# | ||
# Also answers run queue occupancy. | ||
# | ||
# USAGE: runqlen [-h] [-T] [-Q] [-m] [-D] [interval] [count] | ||
# | ||
# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is | ||
# a version of this tool that may work on Linux 4.6 - 4.8. | ||
# | ||
# Copyright 2016 Netflix, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License") | ||
# | ||
# 12-Dec-2016 Brendan Gregg Created this. | ||
|
||
from __future__ import print_function | ||
from bcc import BPF, PerfType, PerfSWConfig | ||
from time import sleep, strftime | ||
import argparse | ||
|
||
# arguments | ||
examples = """examples: | ||
./runqlen # summarize run queue length as a histogram | ||
./runqlen 1 10 # print 1 second summaries, 10 times | ||
./runqlen -T 1 # 1s summaries and timestamps | ||
./runqlen -O # report run queue occupancy | ||
./runqlen -C # show each CPU separately | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Summarize scheduler run queue length as a histogram", | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
epilog=examples) | ||
parser.add_argument("-T", "--timestamp", action="store_true", | ||
help="include timestamp on output") | ||
parser.add_argument("-O", "--runqocc", action="store_true", | ||
help="report run queue occupancy") | ||
parser.add_argument("-C", "--cpus", action="store_true", | ||
help="print output for each CPU separately") | ||
parser.add_argument("interval", nargs="?", default=99999999, | ||
help="output interval, in seconds") | ||
parser.add_argument("count", nargs="?", default=99999999, | ||
help="number of outputs") | ||
args = parser.parse_args() | ||
countdown = int(args.count) | ||
debug = 0 | ||
frequency = 99 | ||
|
||
# define BPF program | ||
bpf_text = """ | ||
#include <uapi/linux/ptrace.h> | ||
#include <linux/sched.h> | ||
// Declare enough of cfs_rq to find nr_running, since we can't #import the | ||
// header. This will need maintenance. It is from kernel/sched/sched.h: | ||
struct cfs_rq_partial { | ||
struct load_weight load; | ||
unsigned int nr_running, h_nr_running; | ||
}; | ||
typedef struct cpu_key { | ||
int cpu; | ||
unsigned int slot; | ||
} cpu_key_t; | ||
STORAGE | ||
int do_perf_event() | ||
{ | ||
unsigned int len = 0; | ||
pid_t pid = 0; | ||
struct task_struct *task = NULL; | ||
struct cfs_rq_partial *my_q = NULL; | ||
// Fetch the run queue length from task->se.cfs_rq->nr_running. This is an | ||
// unstable interface and may need maintenance. Perhaps a future version | ||
// of BPF will support task_rq(p) or something similar as a more reliable | ||
// interface. | ||
task = (struct task_struct *)bpf_get_current_task(); | ||
bpf_probe_read(&my_q, sizeof(my_q), &task->se.cfs_rq); | ||
bpf_probe_read(&len, sizeof(len), &my_q->nr_running); | ||
// Decrement idle thread by dropping the run queue by one. We could do | ||
// this other ways if needed, like matching on task->pid. | ||
if (len > 0) | ||
len--; | ||
STORE | ||
return 0; | ||
} | ||
""" | ||
|
||
if args.cpus: | ||
bpf_text = bpf_text.replace('STORAGE', | ||
'BPF_HISTOGRAM(dist, cpu_key_t);') | ||
bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' + | ||
'bpf_probe_read(&key.cpu, sizeof(key.cpu), &task->wake_cpu);' + | ||
'dist.increment(key);') | ||
else: | ||
bpf_text = bpf_text.replace('STORAGE', | ||
'BPF_HISTOGRAM(dist, unsigned int);') | ||
bpf_text = bpf_text.replace('STORE', 'dist.increment(len);') | ||
|
||
# code substitutions | ||
if debug: | ||
print(bpf_text) | ||
|
||
# load BPF program | ||
b = BPF(text=bpf_text) | ||
# initialize BPF & perf_events | ||
b = BPF(text=bpf_text) | ||
b.attach_perf_event(ev_type=PerfType.SOFTWARE, | ||
ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event", | ||
sample_period=0, sample_freq=frequency) | ||
|
||
print("Sampling run queue length... Hit Ctrl-C to end.") | ||
|
||
# output | ||
exiting = 0 if args.interval else 1 | ||
dist = b.get_table("dist") | ||
while (1): | ||
try: | ||
sleep(int(args.interval)) | ||
except KeyboardInterrupt: | ||
exiting = 1 | ||
|
||
print() | ||
if args.timestamp: | ||
print("%-8s\n" % strftime("%H:%M:%S"), end="") | ||
|
||
if args.runqocc: | ||
if args.cpus: | ||
# run queue occupancy, per-CPU summary | ||
idle = {} | ||
queued = {} | ||
cpumax = 0 | ||
for k, v in dist.items(): | ||
if k.cpu > cpumax: | ||
cpumax = k.cpu | ||
for c in range(0, cpumax + 1): | ||
idle[c] = 0 | ||
queued[c] = 0 | ||
for k, v in dist.items(): | ||
if k.slot == 0: | ||
idle[k.cpu] += v.value | ||
else: | ||
queued[k.cpu] += v.value | ||
for c in range(0, cpumax + 1): | ||
samples = idle[c] + queued[c] | ||
if samples: | ||
runqocc = float(queued[c]) / samples | ||
else: | ||
runqocc = 0 | ||
print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc)) | ||
|
||
else: | ||
# run queue occupancy, system-wide summary | ||
idle = 0 | ||
queued = 0 | ||
for k, v in dist.items(): | ||
if k.value == 0: | ||
idle += v.value | ||
else: | ||
queued += v.value | ||
samples = idle + queued | ||
if samples: | ||
runqocc = float(queued) / samples | ||
else: | ||
runqocc = 0 | ||
print("runqocc: %0.2f%%" % (100 * runqocc)) | ||
|
||
else: | ||
# run queue length histograms | ||
dist.print_linear_hist("runqlen", "cpu") | ||
|
||
dist.clear() | ||
|
||
countdown -= 1 | ||
if exiting or countdown == 0: | ||
exit() |
Oops, something went wrong.