diff --git a/examples/bitehist.c b/examples/bitehist.c new file mode 100644 index 000000000000..e4e6772da219 --- /dev/null +++ b/examples/bitehist.c @@ -0,0 +1,48 @@ +/* + * bitehist.c Block I/O size histogram. + * For Linux, uses BCC, eBPF. See .py file. + * + * Based on eBPF sample tracex2 by Alexi Starovoitov. + * Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * 15-Aug-2015 Brendan Gregg Created this. + */ + +#include +#include "../../linux-4.2-rc5/include/linux/blkdev.h" + +BPF_TABLE("array", int, u64, dist, 64); + +static unsigned int log2(unsigned int v) +{ + unsigned int r; + unsigned int shift; + + r = (v > 0xFFFF) << 4; v >>= r; + shift = (v > 0xFF) << 3; v >>= shift; r |= shift; + shift = (v > 0xF) << 2; v >>= shift; r |= shift; + shift = (v > 0x3) << 1; v >>= shift; r |= shift; + r |= (v >> 1); + return r; +} + +static unsigned int log2l(unsigned long v) +{ + unsigned int hi = v >> 32; + if (hi) + return log2(hi) + 32 + 1; + else + return log2(v) + 1; +} + +int do_request(struct pt_regs *ctx, struct request *req) +{ + int index = log2l(req->__data_len / 1024); + u64 *leaf = dist.lookup(&index); + if (leaf) (*leaf)++; + + return 0; +} diff --git a/examples/bitehist.py b/examples/bitehist.py new file mode 100755 index 000000000000..ae2112b69057 --- /dev/null +++ b/examples/bitehist.py @@ -0,0 +1,109 @@ +#!/usr/bin/python +# +# bitehist.py Block I/O size histogram. +# For Linux, uses BCC, eBPF. See .c file. +# +# Written as a basic example of using a histogram to show a distribution. +# +# USAGE: bitehist.py [interval [count]] +# +# The default interval is 5 seconds. A Ctrl-C will print the partially +# gathered histogram then exit. +# +# Copyright (c) 2015 Brendan Gregg. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 15-Aug-2015 Brendan Gregg Created this. + +from bpf import BPF +from ctypes import c_ushort, c_int, c_ulonglong +from time import sleep +from sys import argv + +def usage(): + print("USAGE: %s [interval [count]]" % argv[0]) + exit() + +# arguments +interval = 5 +count = -1 +if len(argv) > 1: + try: + interval = int(argv[1]) + if interval == 0: + raise + if len(argv) > 2: + count = int(argv[2]) + except: # also catches -h, --help + usage() + +# load BPF program +b = BPF(src_file = "bitehist.c") +BPF.attach_kprobe(b.load_func("do_request", BPF.KPROBE), "blk_start_request") +dist = b.get_table("dist", c_int, c_ulonglong) +dist_max = 64 + +# header +print("Tracing... Hit Ctrl-C to end.") +last = {} +for i in range(1, dist_max + 1): + last[i] = 0 + +# functions +stars_max = 38 +def stars(val, val_max, width): + i = 0 + text = "" + while (1): + if (i > (width * val / val_max) - 1) or (i > width - 1): + break + text += "*" + i += 1 + if val > val_max: + text = text[:-1] + "+" + return text + +def print_log2_hist(d, val_type): + idx_max = -1 + val_max = 0 + for i in range(1, dist_max + 1): + try: + val = dist[c_int(i)].value - last[i] + if (val > 0): + idx_max = i + if (val > val_max): + val_max = val + except: + break + if idx_max > 0: + print(" %-15s : count distribution" % val_type); + for i in range(1, idx_max + 1): + low = (1 << i) >> 1 + high = (1 << i) - 1 + if (low == high): + low -= 1 + try: + val = dist[c_int(i)].value - last[i] + print("%8d -> %-8d : %-8d |%-*s|" % (low, high, val, + stars_max, stars(val, val_max, stars_max))) + last[i] = dist[c_int(i)].value + except: + break + +# output +loop = 0 +do_exit = 0 +while (1): + if count > 0: + loop += 1 + if loop > count: + exit() + try: + sleep(interval) + except KeyboardInterrupt: + pass; do_exit = 1 + + print + print_log2_hist(dist, "kbytes") + if do_exit: + exit() diff --git a/examples/bitehist_example.txt b/examples/bitehist_example.txt new file mode 100644 index 000000000000..5af66e191d45 --- /dev/null +++ b/examples/bitehist_example.txt @@ -0,0 +1,70 @@ +Demonstrations of bitehist.py, the Linux eBPF/bcc version. + +This prints a power-of-2 histogram to show the block I/O size distribution. +By default, a summary is printed every five seconds: + +# ./bitehist.py +Tracing... Hit Ctrl-C to end. + + kbytes : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 26 |************* | + 8 -> 15 : 3 |* | + 16 -> 31 : 5 |** | + 32 -> 63 : 6 |*** | + 64 -> 127 : 7 |*** | + 128 -> 255 : 75 |**************************************| + + kbytes : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 83 |**************************************| + 8 -> 15 : 2 | | + 16 -> 31 : 6 |** | + 32 -> 63 : 6 |** | + 64 -> 127 : 5 |** | + 128 -> 255 : 21 |********* | +^C + kbytes : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 8 |**************************************| + +The first output shows a bimodal distribution. The largest mode of 75 I/O were +between 128 and 255 Kbytes in size, and another mode of 26 I/O were between 4 +and 7 Kbytes in size. + +The next output summary shows the workload is doing more 4 - 7 Kbyte I/O. + +The final output is partial, showing what was measured until Ctrl-C was hit. + + +For an output interval of one second, and three summaries: + +# ./bitehist.py 1 3 +Tracing... Hit Ctrl-C to end. + + kbytes : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 4 |**************************************| + + kbytes : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 5 |**************************************| + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 1 |******* | + + kbytes : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 4 |**************************************| + + +Full usage: + +# ./bitehist.py -h +USAGE: ./bitehist.py [interval [count]] diff --git a/examples/disksnoop.c b/examples/disksnoop.c new file mode 100644 index 000000000000..cd24583fdf10 --- /dev/null +++ b/examples/disksnoop.c @@ -0,0 +1,48 @@ +/* + * disksnoop.c Trace block device I/O: basic version of iosnoop. + * For Linux, uses BCC, eBPF. See .py file. + * + * Copyright (c) 2015 Brendan Gregg. + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * 11-Aug-2015 Brendan Gregg Created this. + */ + +#include +#include "../../linux-4.2-rc5/include/linux/blkdev.h" + +struct key_t { + struct request *req; +}; +BPF_TABLE("hash", struct key_t, u64, start, 10240); + +int do_request(struct pt_regs *ctx, struct request *req) { + struct key_t key = {}; + u64 ts; + + // stash start timestamp by request ptr + ts = bpf_ktime_get_ns(); + key.req = req; + start.update(&key, &ts); + + return 0; +} + +int do_completion(struct pt_regs *ctx, struct request *req) { + struct key_t key = {}; + u64 *tsp, delta; + + key.req = req; + tsp = start.lookup(&key); + + if (tsp != 0) { + delta = bpf_ktime_get_ns() - *tsp; + bpf_trace_printk("%d %x %d\n", req->__data_len, + req->cmd_flags, delta / 1000); + start.delete(&key); + } + + return 0; +} diff --git a/examples/disksnoop.py b/examples/disksnoop.py new file mode 100755 index 000000000000..11e5ab615879 --- /dev/null +++ b/examples/disksnoop.py @@ -0,0 +1,52 @@ +#!/usr/bin/python +# +# disksnoop.py Trace block device I/O: basic version of iosnoop. +# For Linux, uses BCC, eBPF. See .c file. +# +# Written as a basic example of tracing latency. +# +# Copyright (c) 2015 Brendan Gregg. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 11-Aug-2015 Brendan Gregg Created this. + +from bpf import BPF +import sys + +REQ_WRITE = 1 # from include/linux/blk_types.h + +# load BPF program +b = BPF(src_file="disksnoop.c") +BPF.attach_kprobe(b.load_func("do_request", BPF.KPROBE), "blk_start_request") +BPF.attach_kprobe(b.load_func("do_completion", BPF.KPROBE), "blk_update_request") + +# header +print "%-18s %-2s %-7s %8s" % ("TIME(s)", "T", "BYTES", "LAT(ms)") + +# open trace pipe +try: + trace = open("/sys/kernel/debug/tracing/trace_pipe", "r") +except: + print >> sys.stderr, "ERROR: opening trace_pipe" + exit(1) + +# format output +while 1: + try: + line = trace.readline().rstrip() + except KeyboardInterrupt: + pass; exit() + prolog, time_s, colon, bytes_s, flags_s, us_s = \ + line.rsplit(" ", 5) + + time_s = time_s[:-1] # strip trailing ":" + flags = int(flags_s, 16) + if flags & REQ_WRITE: + type_s = "W" + elif bytes_s == "0": # see blk_fill_rwbs() for logic + type_s = "M" + else: + type_s = "R" + ms = float(int(us_s, 10)) / 1000 + + print "%-18s %-2s %-7s %8.2f" % (time_s, type_s, bytes_s, ms) diff --git a/examples/disksnoop_example.txt b/examples/disksnoop_example.txt new file mode 100644 index 000000000000..835291223de2 --- /dev/null +++ b/examples/disksnoop_example.txt @@ -0,0 +1,40 @@ +Demonstrations of disksnoop.py, the Linux eBPF/bcc version. + + +This traces block I/O, a prints a line to summarize each I/O completed: + +# ./disksnoop.py +TIME(s) T BYTES LAT(ms) +16458043.435457 W 4096 2.73 +16458043.435981 W 4096 3.24 +16458043.436012 W 4096 3.13 +16458043.437326 W 4096 4.44 +16458044.126545 R 4096 42.82 +16458044.129872 R 4096 3.24 +16458044.130705 R 4096 0.73 +16458044.142813 R 4096 12.01 +16458044.147302 R 4096 4.33 +16458044.148117 R 4096 0.71 +16458044.148950 R 4096 0.70 +16458044.164332 R 4096 15.29 +16458044.168003 R 4096 3.58 +16458044.171676 R 4096 3.59 +16458044.172453 R 4096 0.72 +16458044.173213 R 4096 0.71 +16458044.173989 R 4096 0.72 +16458044.174739 R 4096 0.70 +16458044.190334 R 4096 15.52 +16458044.196608 R 4096 6.17 +16458044.203091 R 4096 6.35 + +The output includes a basic timestamp (in seconds), the type of I/O (W == write, +R == read, M == metadata), the size of the I/O in bytes, and the latency (or +duration) of the I/O in milliseconds. + +The latency is measured from I/O request to the device, to the device +completion. This excludes latency spent queued in the OS. + +Most of the I/O in this example were 0.7 and 4 milliseconds in duration. There +was an outlier of 42.82 milliseconds, a read which followed many writes (the +high latency may have been caused by the writes still being serviced on the +storage device). diff --git a/examples/vfsreadlat.c b/examples/vfsreadlat.c new file mode 100644 index 000000000000..2d3141c4f551 --- /dev/null +++ b/examples/vfsreadlat.c @@ -0,0 +1,73 @@ +/* + * vfsreadlat.c VFS read latency distribution. + * For Linux, uses BCC, eBPF. See .py file. + * + * Based on eBPF sample tracex2 by Alexi Starovoitov. + * Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * 15-Aug-2015 Brendan Gregg Created this. + */ + +#include + +struct key_t { + u32 pid; +}; + +BPF_TABLE("hash", struct key_t, u64, start, 10240); +BPF_TABLE("array", int, u64, dist, 64); + +static unsigned int log2(unsigned int v) +{ + unsigned int r; + unsigned int shift; + + r = (v > 0xFFFF) << 4; v >>= r; + shift = (v > 0xFF) << 3; v >>= shift; r |= shift; + shift = (v > 0xF) << 2; v >>= shift; r |= shift; + shift = (v > 0x3) << 1; v >>= shift; r |= shift; + r |= (v >> 1); + return r; +} + +static unsigned int log2l(unsigned long v) +{ + unsigned int hi = v >> 32; + if (hi) + return log2(hi) + 32 + 1; + else + return log2(v) + 1; +} + +int do_entry(struct pt_regs *ctx) +{ + struct key_t key = {}; + u64 ts, *val, zero = 0; + + key.pid = bpf_get_current_pid_tgid(); + ts = bpf_ktime_get_ns(); + start.update(&key, &ts); + return 0; +} + +int do_return(struct pt_regs *ctx) +{ + struct key_t key = {}; + u64 *tsp, delta; + + key.pid = bpf_get_current_pid_tgid(); + tsp = start.lookup(&key); + + if (tsp != 0) { + delta = bpf_ktime_get_ns() - *tsp; + int index = log2l(delta / 1000); + u64 *leaf = dist.lookup(&index); + if (leaf) (*leaf)++; + start.delete(&key); + } + + return 0; +} diff --git a/examples/vfsreadlat.py b/examples/vfsreadlat.py new file mode 100755 index 000000000000..798ccf1abed9 --- /dev/null +++ b/examples/vfsreadlat.py @@ -0,0 +1,110 @@ +#!/usr/bin/python +# +# vfsreadlat.py VFS read latency distribution. +# For Linux, uses BCC, eBPF. See .c file. +# +# Written as a basic example of a function latency distribution histogram. +# +# USAGE: vfsreadlat.py [interval [count]] +# +# The default interval is 5 seconds. A Ctrl-C will print the partially +# gathered histogram then exit. +# +# Copyright (c) 2015 Brendan Gregg. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 15-Aug-2015 Brendan Gregg Created this. + +from bpf import BPF +from ctypes import c_ushort, c_int, c_ulonglong +from time import sleep +from sys import argv + +def usage(): + print("USAGE: %s [interval [count]]" % argv[0]) + exit() + +# arguments +interval = 5 +count = -1 +if len(argv) > 1: + try: + interval = int(argv[1]) + if interval == 0: + raise + if len(argv) > 2: + count = int(argv[2]) + except: # also catches -h, --help + usage() + +# load BPF program +b = BPF(src_file = "vfsreadlat.c") +BPF.attach_kprobe(b.load_func("do_entry", BPF.KPROBE), "vfs_read") +BPF.attach_kretprobe(b.load_func("do_return", BPF.KPROBE), "vfs_read") +dist = b.get_table("dist", c_int, c_ulonglong) +dist_max = 64 + +# header +print("Tracing... Hit Ctrl-C to end.") +last = {} +for i in range(1, dist_max + 1): + last[i] = 0 + +# functions +stars_max = 38 +def stars(val, val_max, width): + i = 0 + text = "" + while (1): + if (i > (width * val / val_max) - 1) or (i > width - 1): + break + text += "*" + i += 1 + if val > val_max: + text = text[:-1] + "+" + return text + +def print_log2_hist(d, val_type): + idx_max = -1 + val_max = 0 + for i in range(1, dist_max + 1): + try: + val = dist[c_int(i)].value - last[i] + if (val > 0): + idx_max = i + if (val > val_max): + val_max = val + except: + break + if idx_max > 0: + print(" %-15s : count distribution" % val_type); + for i in range(1, idx_max + 1): + low = (1 << i) >> 1 + high = (1 << i) - 1 + if (low == high): + low -= 1 + try: + val = dist[c_int(i)].value - last[i] + print("%8d -> %-8d : %-8d |%-*s|" % (low, high, val, + stars_max, stars(val, val_max, stars_max))) + last[i] = dist[c_int(i)].value + except: + break + +# output +loop = 0 +do_exit = 0 +while (1): + if count > 0: + loop += 1 + if loop > count: + exit() + try: + sleep(interval) + except KeyboardInterrupt: + pass; do_exit = 1 + + print + print_log2_hist(dist, "usecs") + if do_exit: + exit() diff --git a/examples/vfsreadlat_example.txt b/examples/vfsreadlat_example.txt new file mode 100644 index 000000000000..1d95f6a576e4 --- /dev/null +++ b/examples/vfsreadlat_example.txt @@ -0,0 +1,63 @@ +Demonstrations of vfsreadlat.py, the Linux eBPF/bcc version. + + +This example traces the latency of vfs_read (time from call to return), printing +it as a histogram distribution. By default, output is every five seconds: + +# ./vfsreadlat.py +Tracing... Hit Ctrl-C to end. + + usecs : count distribution + 0 -> 1 : 4457 |*************************************+| + 2 -> 3 : 447 |*** | + 4 -> 7 : 2059 |***************** | + 8 -> 15 : 1179 |********** | + 16 -> 31 : 63 | | + 32 -> 63 : 0 | | + 64 -> 127 : 2 | | + 128 -> 255 : 0 | | + 256 -> 511 : 3 | | + 512 -> 1023 : 1 | | + 1024 -> 2047 : 3 | | + 2048 -> 4095 : 2 | | + 4096 -> 8191 : 0 | | + 8192 -> 16383 : 0 | | + 16384 -> 32767 : 0 | | + 32768 -> 65535 : 0 | | + 65536 -> 131071 : 4 | | + 131072 -> 262143 : 2 | | + 262144 -> 524287 : 0 | | + 524288 -> 1048575 : 4 | | +^C + usecs : count distribution + 0 -> 1 : 241 |*************************************+| + 2 -> 3 : 17 |** | + 4 -> 7 : 2 | | + 8 -> 15 : 4 | | + 16 -> 31 : 2 | | + 32 -> 63 : 0 | | + 64 -> 127 : 1 | | + 128 -> 255 : 0 | | + 256 -> 511 : 1 | | + 512 -> 1023 : 1 | | + 1024 -> 2047 : 0 | | + 2048 -> 4095 : 1 | | + 4096 -> 8191 : 0 | | + 8192 -> 16383 : 0 | | + 16384 -> 32767 : 0 | | + 32768 -> 65535 : 0 | | + 65536 -> 131071 : 0 | | + 131072 -> 262143 : 0 | | + 262144 -> 524287 : 0 | | + 524288 -> 1048575 : 1 | | + +These examples show outliers in the 524 - 1048 milliseconds range. Since +vfs_read() will catch many types of events, this could be anything including +keystroke latency on ssh sessions. Further drilling with bcc will be necessary +to identify more details. + + +Full usage: + +# ./vfsreadlat.py -h +USAGE: ./vfsreadlat.py [interval [count]] diff --git a/man/man8/pidpersec.8 b/man/man8/pidpersec.8 new file mode 100644 index 000000000000..2164ffafdb99 --- /dev/null +++ b/man/man8/pidpersec.8 @@ -0,0 +1,41 @@ +.TH pidpersec 8 "2015-08-18" "USER COMMANDS" +.SH NAME +pidpersec \- Count new processes (via fork()). Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B pidpersec +.SH DESCRIPTION +pidpersec shows how many new processes were created each second. There +can be performance issues caused by many short-lived processes, which may not +be visible in sampling tools like top(1). pidpersec provides one way to +investigate this behavior. + +This works by tracing the kernel sched_fork() function using dynamic tracing, +and will need updating to match any changes to this function. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH EXAMPLES +.TP +Count new processes created each second: +# +.B pidpersec +.SH OVERHEAD +This traces the kernel fork function, and maintains an in-kernel count which is +read asynchronously from user-space. As the rate of this is generally expected to +be low (<< 1000/s), the overhead is also expected to be negligible. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Brendan Gregg +.SH SEE ALSO +top(1) diff --git a/man/man8/syncsnoop.8 b/man/man8/syncsnoop.8 new file mode 100644 index 000000000000..eb641d66372f --- /dev/null +++ b/man/man8/syncsnoop.8 @@ -0,0 +1,49 @@ +.TH syncsnoop 8 "2015-08-18" "USER COMMANDS" +.SH NAME +syncsnoop \- Trace sync() syscall. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B syncsnoop +.SH DESCRIPTION +syncsnoop traces calls to sync(), which flushes file system buffers to +storage devices. These calls can cause performance perturbations, and it can +be useful to know if they are happening and how frequently. + +This works by tracing the kernel sys_sync() function using dynamic tracing, and +will need updating to match any changes to this function. + +This program is also a basic example of eBPF/bcc. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH EXAMPLES +.TP +Trace calls to sync(): +# +.B syncsnoop +.SH FIELDS +.TP +TIME(s) +Time of the call, in seconds. +.TP +CALL +Call traced. +.SH OVERHEAD +This traces the kernel sync function and prints output for each event. As the +rate of this is generally expected to be low (<< 100/s), the overhead is also +expected to be negligible. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Brendan Gregg +.SH SEE ALSO +iostat(1) diff --git a/man/man8/vfscount.8 b/man/man8/vfscount.8 new file mode 100644 index 000000000000..af5459b8421c --- /dev/null +++ b/man/man8/vfscount.8 @@ -0,0 +1,52 @@ +.TH vfscount 8 "2015-08-18" "USER COMMANDS" +.SH NAME +vfscount \- Count some common VFS calls. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B vfscount +.SH DESCRIPTION +This counts common VFS calls. This can be useful for general workload +characterization of these operations. + +This works by tracing some kernel vfs functions using dynamic tracing, and will +need updating to match any changes to these functions. Edit the script to +customize and add functions to trace, which is easy to do. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH EXAMPLES +.TP +Count some VFS calls until Ctrl-C is hit: +# +.B vfscount +.SH FIELDS +.TP +ADDR +Address of the instruction pointer that was traced (only useful if the FUNC column is suspicious and you would like to double check the translation). +.TP +FUNC +Kernel function name +.TP +COUNT +Number of calls while tracing +.SH OVERHEAD +This traces various kernel vfs functions and maintains in-kernel counts, which +are asynchronously copied to user-space. While the rate of VFS operations can +be very high (>1M/sec), this is a relatively efficient way to trace these +events, and so the overhead is expected to be small for normal workloads. +Measure in a test environment. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Brendan Gregg +.SH SEE ALSO +vfsstat(8) diff --git a/man/man8/vfsstat.8 b/man/man8/vfsstat.8 new file mode 100644 index 000000000000..929e6b638111 --- /dev/null +++ b/man/man8/vfsstat.8 @@ -0,0 +1,65 @@ +.TH vfsstat 8 "2015-08-18" "USER COMMANDS" +.SH NAME +vfsstat \- Statistics for some common VFS calls. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B vfsstat +[interval [count]] +.SH DESCRIPTION +This traces some common VFS calls and prints per-second summaries. This can +be useful for general workload characterization, and looking for patterns +in operation usage over time. + +This works by tracing some kernel vfs functions using dynamic tracing, and will +need updating to match any changes to these functions. Edit the script to +customize which functions are traced. Also see vfscount, which is more +easily customized to trace multiple functions. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH EXAMPLES +.TP +Print summaries each second: +# +.B vfsstat +.TP +Print output every five seconds, three times: +# +.B vfsstat 5 3 +.SH FIELDS +.TP +READ/s +Number of vfs_read() calls as a per-second average. +.TP +WRITE/s +Number of vfs_write() calls as a per-second average. +.TP +CREATE/s +Number of vfs_create() calls as a per-second average. +.TP +OPEN/s +Number of vfs_open() calls as a per-second average. +.TP +FSYNC/s +Number of vfs_fsync() calls as a per-second average. +.SH OVERHEAD +This traces various kernel vfs functions and maintains in-kernel counts, which +are asynchronously copied to user-space. While the rate of VFS operations can +be very high (>1M/sec), this is a relatively efficient way to trace these +events, and so the overhead is expected to be small for normal workloads. +Measure in a test environment. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Brendan Gregg +.SH SEE ALSO +vfscount(8) diff --git a/tools/pidpersec b/tools/pidpersec new file mode 100755 index 000000000000..b84fbe672199 --- /dev/null +++ b/tools/pidpersec @@ -0,0 +1,40 @@ +#!/usr/bin/python +# +# pidpersec Count new processes (via fork). +# For Linux, uses BCC, eBPF. See .c file. +# +# USAGE: pidpersec +# +# Written as a basic example of counting an event. +# +# Copyright (c) 2015 Brendan Gregg. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 11-Aug-2015 Brendan Gregg Created this. + +from bpf import BPF +from ctypes import c_ushort, c_int, c_ulonglong +from time import sleep, strftime + +# load BPF program +b = BPF(src_file = "pidpersec.c") +BPF.attach_kprobe(b.load_func("do_count", BPF.KPROBE), "sched_fork") +stats = b.get_table("stats", c_int, c_ulonglong) + +# stat indexes +S_COUNT = 1 + +# header +print "Tracing... Ctrl-C to end." + +# output +last = 0 +while (1): + try: + sleep(1) + except KeyboardInterrupt: + pass; exit() + + print "%s: PIDs/sec: %d" % (strftime("%H:%M:%S"), + (stats[c_int(S_COUNT)].value - last)) + last = stats[c_int(S_COUNT)].value diff --git a/tools/pidpersec.c b/tools/pidpersec.c new file mode 100644 index 000000000000..512d6ca629b6 --- /dev/null +++ b/tools/pidpersec.c @@ -0,0 +1,29 @@ +/* + * pidpersec.c Count new processes (via fork). + * For Linux, uses BCC, eBPF. See the Python front-end. + * + * USAGE: pidpersec.py + * + * Copyright (c) 2015 Brendan Gregg. + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * 11-Aug-2015 Brendan Gregg Created this. + */ + +#include + +enum stat_types { + S_COUNT = 1, + S_MAXSTAT +}; + +BPF_TABLE("array", int, u64, stats, S_MAXSTAT + 1); + +void stats_increment(int key) { + u64 *leaf = stats.lookup(&key); + if (leaf) (*leaf)++; +} + +void do_count(struct pt_regs *ctx) { stats_increment(S_COUNT); } diff --git a/tools/pidpersec_example.txt b/tools/pidpersec_example.txt new file mode 100644 index 000000000000..b274dd4e6dc0 --- /dev/null +++ b/tools/pidpersec_example.txt @@ -0,0 +1,22 @@ +Demonstrations of pidpersec, the Linux eBPF/bcc version. + + +This shows the number of new processes created per second, measured by tracing +the kernel fork() routine: + +# ./pidpersec +Tracing... Ctrl-C to end. +18:33:06: PIDs/sec: 4 +18:33:07: PIDs/sec: 5 +18:33:08: PIDs/sec: 4 +18:33:09: PIDs/sec: 4 +18:33:10: PIDs/sec: 21 +18:33:11: PIDs/sec: 5 +18:33:12: PIDs/sec: 4 +18:33:13: PIDs/sec: 4 + +Each second there are four new processes (this happens to be caused by a +launcher script that is retrying in a loop, and encountering errors). + +At 18:33:10, I typed "man ls" in another server session, which caused an +increase in the number of new processes as the necessary commands were run. diff --git a/tools/syncsnoop b/tools/syncsnoop new file mode 100755 index 000000000000..bc2447d126cc --- /dev/null +++ b/tools/syncsnoop @@ -0,0 +1,46 @@ +#!/usr/bin/python +# +# syncsnoop Trace sync() syscall. +# For Linux, uses BCC, eBPF. Embedded C. +# +# Written as a basic example of BCC trace & reformat. See +# examples/hello_world.py for a BCC trace with default output example. +# +# Copyright (c) 2015 Brendan Gregg. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 13-Aug-2015 Brendan Gregg Created this. + +from bpf import BPF +import sys + +# load BPF program +b = BPF(text = """ +int do_sync(void *ctx) { + bpf_trace_printk("sync()\\n"); + return 0; +}; +""") +BPF.attach_kprobe(b.load_func("do_sync", BPF.KPROBE), "sys_sync") + +# header +print "%-18s %s" % ("TIME(s)", "CALL") + +# open trace pipe +try: + trace = open("/sys/kernel/debug/tracing/trace_pipe", "r") +except: + print >> sys.stderr, "ERROR: opening trace_pipe" + exit(1) + +# format output +while 1: + try: + line = trace.readline().rstrip() + except KeyboardInterrupt: + pass; exit() + + prolog, time_s, colon, word = line.rsplit(" ", 3) + time_s = time_s[:-1] # strip trailing ":" + + print "%-18s %s" % (time_s, word) diff --git a/tools/syncsnoop_example.txt b/tools/syncsnoop_example.txt new file mode 100644 index 000000000000..56cc21e18feb --- /dev/null +++ b/tools/syncsnoop_example.txt @@ -0,0 +1,14 @@ +Demonstrations of syncsnoop, the Linux eBPF/bcc version. + + +This program traces calls to the kernel sync() routine, with basic timestamps: + +# ./syncsnoop +TIME(s) CALL +16458148.611952 sync() +16458151.533709 sync() +^C + +While tracing, the "sync" command was executed in another server session. + +This can be useful to identify that sync() is being called, and its frequency. diff --git a/tools/vfscount b/tools/vfscount new file mode 100755 index 000000000000..60adcb314097 --- /dev/null +++ b/tools/vfscount @@ -0,0 +1,73 @@ +#!/usr/bin/python +# +# vfscount Count some VFS calls. +# For Linux, uses BCC, eBPF. See .c file. +# +# Written as a basic example of counting functions. +# +# Copyright (c) 2015 Brendan Gregg. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 14-Aug-2015 Brendan Gregg Created this. + +from bpf import BPF +from ctypes import c_ushort, c_int, c_ulonglong +from time import sleep, strftime +from sys import stderr + +# kernel symbol translation +ksym_addrs = [] # addresses for binary search +ksym_names = [] # same index as ksym_addrs +def load_kallsyms(): + symfile = "/proc/kallsyms" + try: + syms = open(symfile, "r") + except: + print >> stderr, "ERROR: reading " + symfile + exit() + line = syms.readline() + for line in iter(syms): + cols = line.split() + name = cols[2] + if name[:4] != "vfs_": # perf optimization + continue + addr = int(cols[0], 16) + ksym_addrs.append(addr) + ksym_names.append(name) + syms.close() +def ksym(addr): + start = -1 + end = len(ksym_addrs) + while end != start + 1: + mid = (start + end) / 2 + if addr < ksym_addrs[mid]: + end = mid + else: + start = mid + if start == -1: + return "[unknown]" + return ksym_names[start] +load_kallsyms() + +# load BPF program +b = BPF(src_file = "vfscount.c") +fn = b.load_func("do_count", BPF.KPROBE) +BPF.attach_kprobe(fn, "vfs_read") +BPF.attach_kprobe(fn, "vfs_write") +BPF.attach_kprobe(fn, "vfs_fsync") +BPF.attach_kprobe(fn, "vfs_open") +BPF.attach_kprobe(fn, "vfs_create") +counts = b.get_table("counts") + +# header +print "Tracing... Ctrl-C to end." + +# output +try: + sleep(99999999) +except KeyboardInterrupt: + pass + +print "\n%-16s %-12s %8s" % ("ADDR", "FUNC", "COUNT") +for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): + print "%-16x %-12s %8d" % (k.ip, ksym(k.ip), v.value) diff --git a/tools/vfscount.c b/tools/vfscount.c new file mode 100644 index 000000000000..1920c25a934a --- /dev/null +++ b/tools/vfscount.c @@ -0,0 +1,30 @@ +/* + * vfscount.c Count some VFS calls. + * For Linux, uses BCC, eBPF. See the Python front-end. + * + * USAGE: vfscount.py + * + * Copyright (c) 2015 Brendan Gregg. + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * 14-Aug-2015 Brendan Gregg Created this. + */ + +#include + +struct key_t { + u64 ip; +}; + +BPF_TABLE("hash", struct key_t, u64, counts, 256); + +int do_count(struct pt_regs *ctx) { + struct key_t key = {}; + u64 zero = 0, *val; + key.ip = ctx->ip; + val = counts.lookup_or_init(&key, &zero); + (*val)++; + return 0; +} diff --git a/tools/vfscount_example.txt b/tools/vfscount_example.txt new file mode 100644 index 000000000000..fc5865e6d56d --- /dev/null +++ b/tools/vfscount_example.txt @@ -0,0 +1,17 @@ +Demonstrations of vfscount, the Linux eBPF/bcc version. + + +This counts VFS calls, by tracing various kernel calls beginning with "vfs_" +(edit the script to customize): + +# ./vfscount +Tracing... Ctrl-C to end. +^C +ADDR FUNC COUNT +ffffffff811f2cc1 vfs_create 24 +ffffffff811e71c1 vfs_write 203 +ffffffff811e6061 vfs_open 765 +ffffffff811e7091 vfs_read 1852 + +This can be useful for workload characterization, to see what types of +operations are in use. diff --git a/tools/vfsstat b/tools/vfsstat new file mode 100755 index 000000000000..3c5ee997661c --- /dev/null +++ b/tools/vfsstat @@ -0,0 +1,87 @@ +#!/usr/bin/python +# +# vfsstat Count some VFS calls. +# For Linux, uses BCC, eBPF. See .c file. +# +# Written as a basic example of counting multiple events as a stat tool. +# +# USAGE: vfsstat [interval [count]] +# +# Copyright (c) 2015 Brendan Gregg. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 14-Aug-2015 Brendan Gregg Created this. + +from __future__ import print_function +from bpf import BPF +from ctypes import c_ushort, c_int, c_ulonglong +from time import sleep, strftime +from sys import argv + +def usage(): + print("USAGE: %s [interval [count]]" % argv[0]) + exit() + +# arguments +interval = 1 +count = -1 +if len(argv) > 1: + try: + interval = int(argv[1]) + if interval == 0: + raise + if len(argv) > 2: + count = int(argv[2]) + except: # also catches -h, --help + usage() + +# load BPF program +b = BPF(src_file = "vfsstat.c") +BPF.attach_kprobe(b.load_func("do_read", BPF.KPROBE), "vfs_read") +BPF.attach_kprobe(b.load_func("do_write", BPF.KPROBE), "vfs_write") +BPF.attach_kprobe(b.load_func("do_fsync", BPF.KPROBE), "vfs_fsync") +BPF.attach_kprobe(b.load_func("do_open", BPF.KPROBE), "vfs_open") +BPF.attach_kprobe(b.load_func("do_create", BPF.KPROBE), "vfs_create") +stats = b.get_table("stats", c_int, c_ulonglong) + +# stat column labels and indexes +stat_types = { + "READ" : 1, + "WRITE" : 2, + "FSYNC" : 3, + "OPEN" : 4, + "CREATE" : 5 +} + +# header +print("%-8s " % "TIME", end="") +last = {} +for stype in stat_types.keys(): + print(" %8s" % (stype + "/s"), end="") + idx = stat_types[stype] + last[idx] = 0 +print("") + +# output +i = 0 +while (1): + if count > 0: + i += 1 + if i > count: + exit() + try: + sleep(interval) + except KeyboardInterrupt: + pass; exit() + + print("%-8s: " % strftime("%H:%M:%S"), end="") + # print each statistic as a column + for stype in stat_types.keys(): + idx = stat_types[stype] + try: + delta = stats[c_int(idx)].value - last[idx] + print(" %8d" % (delta / interval), end="") + last[idx] = stats[c_int(idx)].value + except: + print(" %8d" % 0, end="") + print("") diff --git a/tools/vfsstat.c b/tools/vfsstat.c new file mode 100644 index 000000000000..d72047e34650 --- /dev/null +++ b/tools/vfsstat.c @@ -0,0 +1,37 @@ +/* + * vfsstat.c Count some VFS calls. + * For Linux, uses BCC, eBPF. See the Python front-end. + * + * USAGE: vfsstat.py [interval [count]] + * + * Copyright (c) 2015 Brendan Gregg. + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * 14-Aug-2015 Brendan Gregg Created this. + */ + +#include + +enum stat_types { + S_READ = 1, + S_WRITE, + S_FSYNC, + S_OPEN, + S_CREATE, + S_MAXSTAT +}; + +BPF_TABLE("array", int, u64, stats, S_MAXSTAT + 1); + +void stats_increment(int key) { + u64 *leaf = stats.lookup(&key); + if (leaf) (*leaf)++; +} + +void do_read(struct pt_regs *ctx) { stats_increment(S_READ); } +void do_write(struct pt_regs *ctx) { stats_increment(S_WRITE); } +void do_fsync(struct pt_regs *ctx) { stats_increment(S_FSYNC); } +void do_open(struct pt_regs *ctx) { stats_increment(S_OPEN); } +void do_create(struct pt_regs *ctx) { stats_increment(S_CREATE); } diff --git a/tools/vfsstat_example.txt b/tools/vfsstat_example.txt new file mode 100644 index 000000000000..48ef514d7654 --- /dev/null +++ b/tools/vfsstat_example.txt @@ -0,0 +1,36 @@ +Demonstrations of vfsstat, the Linux eBPF/bcc version. + + +This traces some common VFS calls and prints per-second summaries. By default, +the output interval is one second: + +# ./vfsstat +TIME READ/s WRITE/s CREATE/s OPEN/s FSYNC/s +18:35:32: 231 12 4 98 0 +18:35:33: 274 13 4 106 0 +18:35:34: 586 86 4 251 0 +18:35:35: 241 15 4 99 0 +18:35:36: 232 10 4 98 0 +18:35:37: 244 10 4 107 0 +18:35:38: 235 13 4 97 0 +18:35:39: 6749 2633 4 1446 0 +18:35:40: 277 31 4 115 0 +18:35:41: 238 16 6 102 0 +18:35:42: 284 50 8 114 0 +^C + + +Here we are using an output interval of five seconds, and printing three output +lines: + +# ./vfsstat 5 3 +TIME READ/s WRITE/s CREATE/s OPEN/s FSYNC/s +18:35:55: 238 8 3 101 0 +18:36:00: 962 233 4 247 0 +18:36:05: 241 8 3 100 0 + + +Full usage: + +# ./vfsstat -h +USAGE: ./vfsstat [interval [count]]