Skip to content

Commit

Permalink
Merge pull request iovisor#150 from brendangregg/master
Browse files Browse the repository at this point in the history
some bcc examples and tools
  • Loading branch information
drzaeus77 committed Aug 19, 2015
2 parents 5484bad + ebdb242 commit 307ac0b
Show file tree
Hide file tree
Showing 24 changed files with 1,254 additions and 0 deletions.
48 changes: 48 additions & 0 deletions examples/bitehist.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* bitehist.c Block I/O size histogram.
* For Linux, uses BCC, eBPF. See .py file.
*
* Based on eBPF sample tracex2 by Alexi Starovoitov.
* Copyright (c) 2013-2015 PLUMgrid, http:https://plumgrid.com
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* 15-Aug-2015 Brendan Gregg Created this.
*/

#include <uapi/linux/ptrace.h>
#include <linux/blkdev.h>

BPF_TABLE("array", int, u64, dist, 64);

static unsigned int log2(unsigned int v)
{
unsigned int r;
unsigned int shift;

r = (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
shift = (v > 0xF) << 2; v >>= shift; r |= shift;
shift = (v > 0x3) << 1; v >>= shift; r |= shift;
r |= (v >> 1);
return r;
}

static unsigned int log2l(unsigned long v)
{
unsigned int hi = v >> 32;
if (hi)
return log2(hi) + 32 + 1;
else
return log2(v) + 1;
}

int do_request(struct pt_regs *ctx, struct request *req)
{
int index = log2l(req->__data_len / 1024);
u64 *leaf = dist.lookup(&index);
if (leaf) (*leaf)++;

return 0;
}
109 changes: 109 additions & 0 deletions examples/bitehist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/python
#
# bitehist.py Block I/O size histogram.
# For Linux, uses BCC, eBPF. See .c file.
#
# Written as a basic example of using a histogram to show a distribution.
#
# USAGE: bitehist.py [interval [count]]
#
# The default interval is 5 seconds. A Ctrl-C will print the partially
# gathered histogram then exit.
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 15-Aug-2015 Brendan Gregg Created this.

from bpf import BPF
from ctypes import c_ushort, c_int, c_ulonglong
from time import sleep
from sys import argv

def usage():
print("USAGE: %s [interval [count]]" % argv[0])
exit()

# arguments
interval = 5
count = -1
if len(argv) > 1:
try:
interval = int(argv[1])
if interval == 0:
raise
if len(argv) > 2:
count = int(argv[2])
except: # also catches -h, --help
usage()

# load BPF program
b = BPF(src_file = "bitehist.c")
BPF.attach_kprobe(b.load_func("do_request", BPF.KPROBE), "blk_start_request")
dist = b.get_table("dist")
dist_max = 64

# header
print("Tracing... Hit Ctrl-C to end.")
last = {}
for i in range(1, dist_max + 1):
last[i] = 0

# functions
stars_max = 38
def stars(val, val_max, width):
i = 0
text = ""
while (1):
if (i > (width * val / val_max) - 1) or (i > width - 1):
break
text += "*"
i += 1
if val > val_max:
text = text[:-1] + "+"
return text

def print_log2_hist(d, val_type):
idx_max = -1
val_max = 0
for i in range(1, dist_max + 1):
try:
val = dist[c_int(i)].value - last[i]
if (val > 0):
idx_max = i
if (val > val_max):
val_max = val
except:
break
if idx_max > 0:
print(" %-15s : count distribution" % val_type);
for i in range(1, idx_max + 1):
low = (1 << i) >> 1
high = (1 << i) - 1
if (low == high):
low -= 1
try:
val = dist[c_int(i)].value - last[i]
print("%8d -> %-8d : %-8d |%-*s|" % (low, high, val,
stars_max, stars(val, val_max, stars_max)))
last[i] = dist[c_int(i)].value
except:
break

# output
loop = 0
do_exit = 0
while (1):
if count > 0:
loop += 1
if loop > count:
exit()
try:
sleep(interval)
except KeyboardInterrupt:
pass; do_exit = 1

print
print_log2_hist(dist, "kbytes")
if do_exit:
exit()
70 changes: 70 additions & 0 deletions examples/bitehist_example.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
Demonstrations of bitehist.py, the Linux eBPF/bcc version.

This prints a power-of-2 histogram to show the block I/O size distribution.
By default, a summary is printed every five seconds:

# ./bitehist.py
Tracing... Hit Ctrl-C to end.

kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 26 |************* |
8 -> 15 : 3 |* |
16 -> 31 : 5 |** |
32 -> 63 : 6 |*** |
64 -> 127 : 7 |*** |
128 -> 255 : 75 |**************************************|

kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 83 |**************************************|
8 -> 15 : 2 | |
16 -> 31 : 6 |** |
32 -> 63 : 6 |** |
64 -> 127 : 5 |** |
128 -> 255 : 21 |********* |
^C
kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 8 |**************************************|

The first output shows a bimodal distribution. The largest mode of 75 I/O were
between 128 and 255 Kbytes in size, and another mode of 26 I/O were between 4
and 7 Kbytes in size.

The next output summary shows the workload is doing more 4 - 7 Kbyte I/O.

The final output is partial, showing what was measured until Ctrl-C was hit.


For an output interval of one second, and three summaries:

# ./bitehist.py 1 3
Tracing... Hit Ctrl-C to end.

kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 4 |**************************************|

kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 5 |**************************************|
8 -> 15 : 0 | |
16 -> 31 : 0 | |
32 -> 63 : 1 |******* |

kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 4 |**************************************|


Full usage:

# ./bitehist.py -h
USAGE: ./bitehist.py [interval [count]]
48 changes: 48 additions & 0 deletions examples/disksnoop.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* disksnoop.c Trace block device I/O: basic version of iosnoop.
* For Linux, uses BCC, eBPF. See .py file.
*
* Copyright (c) 2015 Brendan Gregg.
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* 11-Aug-2015 Brendan Gregg Created this.
*/

#include <uapi/linux/ptrace.h>
#include <linux/blkdev.h>

struct key_t {
struct request *req;
};
BPF_TABLE("hash", struct key_t, u64, start, 10240);

int do_request(struct pt_regs *ctx, struct request *req) {
struct key_t key = {};
u64 ts;

// stash start timestamp by request ptr
ts = bpf_ktime_get_ns();
key.req = req;
start.update(&key, &ts);

return 0;
}

int do_completion(struct pt_regs *ctx, struct request *req) {
struct key_t key = {};
u64 *tsp, delta;

key.req = req;
tsp = start.lookup(&key);

if (tsp != 0) {
delta = bpf_ktime_get_ns() - *tsp;
bpf_trace_printk("%d %x %d\n", req->__data_len,
req->cmd_flags, delta / 1000);
start.delete(&key);
}

return 0;
}
53 changes: 53 additions & 0 deletions examples/disksnoop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/python
#
# disksnoop.py Trace block device I/O: basic version of iosnoop.
# For Linux, uses BCC, eBPF. See .c file.
#
# Written as a basic example of tracing latency.
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 11-Aug-2015 Brendan Gregg Created this.

from __future__ import print_function
from bpf import BPF
import sys

REQ_WRITE = 1 # from include/linux/blk_types.h

# load BPF program
b = BPF(src_file="disksnoop.c")
BPF.attach_kprobe(b.load_func("do_request", BPF.KPROBE), "blk_start_request")
BPF.attach_kprobe(b.load_func("do_completion", BPF.KPROBE), "blk_update_request")

# header
print("%-18s %-2s %-7s %8s" % ("TIME(s)", "T", "BYTES", "LAT(ms)"))

# open trace pipe
try:
trace = open("/sys/kernel/debug/tracing/trace_pipe", "r")
except:
print("ERROR: opening trace_pipe", file=sys.stderr)
exit(1)

# format output
while 1:
try:
line = trace.readline().rstrip()
except KeyboardInterrupt:
pass; exit()
prolog, time_s, colon, bytes_s, flags_s, us_s = \
line.rsplit(" ", 5)

time_s = time_s[:-1] # strip trailing ":"
flags = int(flags_s, 16)
if flags & REQ_WRITE:
type_s = "W"
elif bytes_s == "0": # see blk_fill_rwbs() for logic
type_s = "M"
else:
type_s = "R"
ms = float(int(us_s, 10)) / 1000

print("%-18s %-2s %-7s %8.2f" % (time_s, type_s, bytes_s, ms))
40 changes: 40 additions & 0 deletions examples/disksnoop_example.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
Demonstrations of disksnoop.py, the Linux eBPF/bcc version.


This traces block I/O, a prints a line to summarize each I/O completed:

# ./disksnoop.py
TIME(s) T BYTES LAT(ms)
16458043.435457 W 4096 2.73
16458043.435981 W 4096 3.24
16458043.436012 W 4096 3.13
16458043.437326 W 4096 4.44
16458044.126545 R 4096 42.82
16458044.129872 R 4096 3.24
16458044.130705 R 4096 0.73
16458044.142813 R 4096 12.01
16458044.147302 R 4096 4.33
16458044.148117 R 4096 0.71
16458044.148950 R 4096 0.70
16458044.164332 R 4096 15.29
16458044.168003 R 4096 3.58
16458044.171676 R 4096 3.59
16458044.172453 R 4096 0.72
16458044.173213 R 4096 0.71
16458044.173989 R 4096 0.72
16458044.174739 R 4096 0.70
16458044.190334 R 4096 15.52
16458044.196608 R 4096 6.17
16458044.203091 R 4096 6.35

The output includes a basic timestamp (in seconds), the type of I/O (W == write,
R == read, M == metadata), the size of the I/O in bytes, and the latency (or
duration) of the I/O in milliseconds.

The latency is measured from I/O request to the device, to the device
completion. This excludes latency spent queued in the OS.

Most of the I/O in this example were 0.7 and 4 milliseconds in duration. There
was an outlier of 42.82 milliseconds, a read which followed many writes (the
high latency may have been caused by the writes still being serviced on the
storage device).
Loading

0 comments on commit 307ac0b

Please sign in to comment.